blob: e771c20303107a836c55a98e617d9c6fedf67623 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
R David Murraya2860e82011-04-16 09:20:30 -0400224 def test_get_payload_n_raises_on_non_multipart(self):
225 msg = Message()
226 self.assertRaises(TypeError, msg.get_payload, 1)
227
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
R David Murraya2860e82011-04-16 09:20:30 -0400383 def test_del_param_on_nonexistent_header(self):
384 msg = Message()
385 msg.del_param('filename', 'content-disposition')
386
387 def test_del_nonexistent_param(self):
388 msg = Message()
389 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
390 existing_header = msg['Content-Type']
391 msg.del_param('foobar', header='Content-Type')
392 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
393
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R David Murraya2860e82011-04-16 09:20:30 -0400524 def test_broken_unicode_payload(self):
525 # This test improves coverage but is not a compliance test.
526 # The behavior in this situation is currently undefined by the API.
527 x = 'this is a br\xf6ken thing to do'
528 msg = Message()
529 msg['content-type'] = 'text/plain'
530 msg['content-transfer-encoding'] = '8bit'
531 msg.set_payload(x)
532 self.assertEqual(msg.get_payload(decode=True),
533 bytes(x, 'raw-unicode-escape'))
534
535 def test_questionable_bytes_payload(self):
536 # This test improves coverage but is not a compliance test,
537 # since it involves poking inside the black box.
538 x = 'this is a quéstionable thing to do'.encode('utf-8')
539 msg = Message()
540 msg['content-type'] = 'text/plain; charset="utf-8"'
541 msg['content-transfer-encoding'] = '8bit'
542 msg._payload = x
543 self.assertEqual(msg.get_payload(decode=True), x)
544
R. David Murray7ec754b2010-12-13 23:51:19 +0000545 # Issue 1078919
546 def test_ascii_add_header(self):
547 msg = Message()
548 msg.add_header('Content-Disposition', 'attachment',
549 filename='bud.gif')
550 self.assertEqual('attachment; filename="bud.gif"',
551 msg['Content-Disposition'])
552
553 def test_noascii_add_header(self):
554 msg = Message()
555 msg.add_header('Content-Disposition', 'attachment',
556 filename="Fußballer.ppt")
557 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000558 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000559 msg['Content-Disposition'])
560
561 def test_nonascii_add_header_via_triple(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename=('iso-8859-1', '', 'Fußballer.ppt'))
565 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000566 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
567 msg['Content-Disposition'])
568
569 def test_ascii_add_header_with_tspecial(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="windows [filename].ppt")
573 self.assertEqual(
574 'attachment; filename="windows [filename].ppt"',
575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_with_tspecial(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename="Fußballer [filename].ppt")
581 self.assertEqual(
582 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584
R David Murraya2860e82011-04-16 09:20:30 -0400585 def test_add_header_with_name_only_param(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
588 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
589
590 def test_add_header_with_no_value(self):
591 msg = Message()
592 msg.add_header('X-Status', None)
593 self.assertEqual('', msg['X-Status'])
594
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000595 # Issue 5871: reject an attempt to embed a header inside a header value
596 # (header injection attack).
597 def test_embeded_header_via_Header_rejected(self):
598 msg = Message()
599 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
600 self.assertRaises(errors.HeaderParseError, msg.as_string)
601
602 def test_embeded_header_via_string_rejected(self):
603 msg = Message()
604 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
R David Murray7441a7a2012-03-14 02:59:51 -0400607 def test_unicode_header_defaults_to_utf8_encoding(self):
608 # Issue 14291
609 m = MIMEText('abc\n')
610 m['Subject'] = 'É test'
611 self.assertEqual(str(m),textwrap.dedent("""\
612 Content-Type: text/plain; charset="us-ascii"
613 MIME-Version: 1.0
614 Content-Transfer-Encoding: 7bit
615 Subject: =?utf-8?q?=C3=89_test?=
616
617 abc
618 """))
619
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000620# Test the email.encoders module
621class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400622
623 def test_EncodersEncode_base64(self):
624 with openfile('PyBanner048.gif', 'rb') as fp:
625 bindata = fp.read()
626 mimed = email.mime.image.MIMEImage(bindata)
627 base64ed = mimed.get_payload()
628 # the transfer-encoded body lines should all be <=76 characters
629 lines = base64ed.split('\n')
630 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
631
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000632 def test_encode_empty_payload(self):
633 eq = self.assertEqual
634 msg = Message()
635 msg.set_charset('us-ascii')
636 eq(msg['content-transfer-encoding'], '7bit')
637
638 def test_default_cte(self):
639 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000640 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000641 msg = MIMEText('hello world')
642 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000643 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000644 msg = MIMEText('hello \xf8 world')
645 eq(msg['content-transfer-encoding'], '8bit')
646 # And now with a different charset
647 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
648 eq(msg['content-transfer-encoding'], 'quoted-printable')
649
R. David Murraye85200d2010-05-06 01:41:14 +0000650 def test_encode7or8bit(self):
651 # Make sure a charset whose input character set is 8bit but
652 # whose output character set is 7bit gets a transfer-encoding
653 # of 7bit.
654 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000655 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000656 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657
Ezio Melottib3aedd42010-11-20 19:04:17 +0000658
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659# Test long header wrapping
660class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400661
662 maxDiff = None
663
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000664 def test_split_long_continuation(self):
665 eq = self.ndiffAssertEqual
666 msg = email.message_from_string("""\
667Subject: bug demonstration
668\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
669\tmore text
670
671test
672""")
673 sfp = StringIO()
674 g = Generator(sfp)
675 g.flatten(msg)
676 eq(sfp.getvalue(), """\
677Subject: bug demonstration
678\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
679\tmore text
680
681test
682""")
683
684 def test_another_long_almost_unsplittable_header(self):
685 eq = self.ndiffAssertEqual
686 hstr = """\
687bug demonstration
688\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
689\tmore text"""
690 h = Header(hstr, continuation_ws='\t')
691 eq(h.encode(), """\
692bug demonstration
693\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
694\tmore text""")
695 h = Header(hstr.replace('\t', ' '))
696 eq(h.encode(), """\
697bug demonstration
698 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
699 more text""")
700
701 def test_long_nonstring(self):
702 eq = self.ndiffAssertEqual
703 g = Charset("iso-8859-1")
704 cz = Charset("iso-8859-2")
705 utf8 = Charset("utf-8")
706 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
707 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
708 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
709 b'bef\xf6rdert. ')
710 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
711 b'd\xf9vtipu.. ')
712 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
713 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
714 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
715 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
716 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
717 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
718 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
719 '\u3044\u307e\u3059\u3002')
720 h = Header(g_head, g, header_name='Subject')
721 h.append(cz_head, cz)
722 h.append(utf8_head, utf8)
723 msg = Message()
724 msg['Subject'] = h
725 sfp = StringIO()
726 g = Generator(sfp)
727 g.flatten(msg)
728 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000729Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
730 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
731 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
732 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
733 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
734 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
735 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
736 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
737 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
738 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
739 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000740
741""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000742 eq(h.encode(maxlinelen=76), """\
743=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
744 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
745 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
746 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
747 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
748 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
749 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
750 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
751 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
752 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
753 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000754
755 def test_long_header_encode(self):
756 eq = self.ndiffAssertEqual
757 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
758 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
759 header_name='X-Foobar-Spoink-Defrobnit')
760 eq(h.encode(), '''\
761wasnipoop; giraffes="very-long-necked-animals";
762 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
763
764 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
765 eq = self.ndiffAssertEqual
766 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
767 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
768 header_name='X-Foobar-Spoink-Defrobnit',
769 continuation_ws='\t')
770 eq(h.encode(), '''\
771wasnipoop; giraffes="very-long-necked-animals";
772 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
773
774 def test_long_header_encode_with_tab_continuation(self):
775 eq = self.ndiffAssertEqual
776 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
777 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
778 header_name='X-Foobar-Spoink-Defrobnit',
779 continuation_ws='\t')
780 eq(h.encode(), '''\
781wasnipoop; giraffes="very-long-necked-animals";
782\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
783
R David Murray3a6152f2011-03-14 21:13:03 -0400784 def test_header_encode_with_different_output_charset(self):
785 h = Header('文', 'euc-jp')
786 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
787
788 def test_long_header_encode_with_different_output_charset(self):
789 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
790 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
791 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
792 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
793 res = """\
794=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
795 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
796 self.assertEqual(h.encode(), res)
797
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798 def test_header_splitter(self):
799 eq = self.ndiffAssertEqual
800 msg = MIMEText('')
801 # It'd be great if we could use add_header() here, but that doesn't
802 # guarantee an order of the parameters.
803 msg['X-Foobar-Spoink-Defrobnit'] = (
804 'wasnipoop; giraffes="very-long-necked-animals"; '
805 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
806 sfp = StringIO()
807 g = Generator(sfp)
808 g.flatten(msg)
809 eq(sfp.getvalue(), '''\
810Content-Type: text/plain; charset="us-ascii"
811MIME-Version: 1.0
812Content-Transfer-Encoding: 7bit
813X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
814 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
815
816''')
817
818 def test_no_semis_header_splitter(self):
819 eq = self.ndiffAssertEqual
820 msg = Message()
821 msg['From'] = 'test@dom.ain'
822 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
823 msg.set_payload('Test')
824 sfp = StringIO()
825 g = Generator(sfp)
826 g.flatten(msg)
827 eq(sfp.getvalue(), """\
828From: test@dom.ain
829References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
830 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
831
832Test""")
833
R David Murray7da4db12011-04-07 20:37:17 -0400834 def test_last_split_chunk_does_not_fit(self):
835 eq = self.ndiffAssertEqual
836 h = Header('Subject: the first part of this is short, but_the_second'
837 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
838 '_all_by_itself')
839 eq(h.encode(), """\
840Subject: the first part of this is short,
841 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
842
843 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
844 eq = self.ndiffAssertEqual
845 h = Header(', but_the_second'
846 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
847 '_all_by_itself')
848 eq(h.encode(), """\
849,
850 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
851
852 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
853 eq = self.ndiffAssertEqual
854 h = Header(', , but_the_second'
855 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
856 '_all_by_itself')
857 eq(h.encode(), """\
858, ,
859 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
860
861 def test_trailing_splitable_on_overlong_unsplitable(self):
862 eq = self.ndiffAssertEqual
863 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
864 'be_on_a_line_all_by_itself;')
865 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
866 "be_on_a_line_all_by_itself;")
867
868 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
869 eq = self.ndiffAssertEqual
870 h = Header('; '
871 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400872 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400873 eq(h.encode(), """\
874;
R David Murray01581ee2011-04-18 10:04:34 -0400875 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400876
R David Murraye1292a22011-04-07 20:54:03 -0400877 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400878 eq = self.ndiffAssertEqual
879 h = Header('This is a long line that has two whitespaces in a row. '
880 'This used to cause truncation of the header when folded')
881 eq(h.encode(), """\
882This is a long line that has two whitespaces in a row. This used to cause
883 truncation of the header when folded""")
884
R David Murray01581ee2011-04-18 10:04:34 -0400885 def test_splitter_split_on_punctuation_only_if_fws(self):
886 eq = self.ndiffAssertEqual
887 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
888 'they;arenotlegal;fold,points')
889 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
890 "arenotlegal;fold,points")
891
892 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
893 eq = self.ndiffAssertEqual
894 h = Header('this is a test where we need to have more than one line '
895 'before; our final line that is just too big to fit;; '
896 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
897 'be_on_a_line_all_by_itself;')
898 eq(h.encode(), """\
899this is a test where we need to have more than one line before;
900 our final line that is just too big to fit;;
901 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
902
903 def test_overlong_last_part_followed_by_split_point(self):
904 eq = self.ndiffAssertEqual
905 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
906 'be_on_a_line_all_by_itself ')
907 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
908 "should_be_on_a_line_all_by_itself ")
909
910 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
911 eq = self.ndiffAssertEqual
912 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
913 'before_our_final_line_; ; '
914 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
915 'be_on_a_line_all_by_itself; ')
916 eq(h.encode(), """\
917this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
918 ;
919 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
920
921 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
922 eq = self.ndiffAssertEqual
923 h = Header('this is a test where we need to have more than one line '
924 'before our final line; ; '
925 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
926 'be_on_a_line_all_by_itself; ')
927 eq(h.encode(), """\
928this is a test where we need to have more than one line before our final line;
929 ;
930 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
931
932 def test_long_header_with_whitespace_runs(self):
933 eq = self.ndiffAssertEqual
934 msg = Message()
935 msg['From'] = 'test@dom.ain'
936 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
937 msg.set_payload('Test')
938 sfp = StringIO()
939 g = Generator(sfp)
940 g.flatten(msg)
941 eq(sfp.getvalue(), """\
942From: test@dom.ain
943References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
944 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
945 <foo@dom.ain> <foo@dom.ain>\x20\x20
946
947Test""")
948
949 def test_long_run_with_semi_header_splitter(self):
950 eq = self.ndiffAssertEqual
951 msg = Message()
952 msg['From'] = 'test@dom.ain'
953 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
954 msg.set_payload('Test')
955 sfp = StringIO()
956 g = Generator(sfp)
957 g.flatten(msg)
958 eq(sfp.getvalue(), """\
959From: test@dom.ain
960References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
961 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
962 <foo@dom.ain>; abc
963
964Test""")
965
966 def test_splitter_split_on_punctuation_only_if_fws(self):
967 eq = self.ndiffAssertEqual
968 msg = Message()
969 msg['From'] = 'test@dom.ain'
970 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
971 'they;arenotlegal;fold,points')
972 msg.set_payload('Test')
973 sfp = StringIO()
974 g = Generator(sfp)
975 g.flatten(msg)
976 # XXX the space after the header should not be there.
977 eq(sfp.getvalue(), """\
978From: test@dom.ain
979References:\x20
980 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
981
982Test""")
983
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984 def test_no_split_long_header(self):
985 eq = self.ndiffAssertEqual
986 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000987 h = Header(hstr)
988 # These come on two lines because Headers are really field value
989 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000990 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000991References:
992 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
993 h = Header('x' * 80)
994 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000995
996 def test_splitting_multiple_long_lines(self):
997 eq = self.ndiffAssertEqual
998 hstr = """\
999from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1000\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1001\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1002"""
1003 h = Header(hstr, continuation_ws='\t')
1004 eq(h.encode(), """\
1005from babylon.socal-raves.org (localhost [127.0.0.1]);
1006 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1007 for <mailman-admin@babylon.socal-raves.org>;
1008 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1009\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1010 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1011 for <mailman-admin@babylon.socal-raves.org>;
1012 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1014 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1015 for <mailman-admin@babylon.socal-raves.org>;
1016 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1017
1018 def test_splitting_first_line_only_is_long(self):
1019 eq = self.ndiffAssertEqual
1020 hstr = """\
1021from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1022\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1023\tid 17k4h5-00034i-00
1024\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1025 h = Header(hstr, maxlinelen=78, header_name='Received',
1026 continuation_ws='\t')
1027 eq(h.encode(), """\
1028from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1029 helo=cthulhu.gerg.ca)
1030\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1031\tid 17k4h5-00034i-00
1032\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1033
1034 def test_long_8bit_header(self):
1035 eq = self.ndiffAssertEqual
1036 msg = Message()
1037 h = Header('Britische Regierung gibt', 'iso-8859-1',
1038 header_name='Subject')
1039 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001040 eq(h.encode(maxlinelen=76), """\
1041=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1042 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001043 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001044 eq(msg.as_string(maxheaderlen=76), """\
1045Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1046 =?iso-8859-1?q?hore-Windkraftprojekte?=
1047
1048""")
1049 eq(msg.as_string(maxheaderlen=0), """\
1050Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001051
1052""")
1053
1054 def test_long_8bit_header_no_charset(self):
1055 eq = self.ndiffAssertEqual
1056 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001057 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1058 'f\xfcr Offshore-Windkraftprojekte '
1059 '<a-very-long-address@example.com>')
1060 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001061 eq(msg.as_string(maxheaderlen=78), """\
1062Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1063 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1064
1065""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001066 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001067 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001068 header_name='Reply-To')
1069 eq(msg.as_string(maxheaderlen=78), """\
1070Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1071 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001072
1073""")
1074
1075 def test_long_to_header(self):
1076 eq = self.ndiffAssertEqual
1077 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001078 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 '"Someone Test #B" <someone@umich.edu>, '
1080 '"Someone Test #C" <someone@eecs.umich.edu>, '
1081 '"Someone Test #D" <someone@eecs.umich.edu>')
1082 msg = Message()
1083 msg['To'] = to
1084 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001085To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001086 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001087 "Someone Test #C" <someone@eecs.umich.edu>,
1088 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089
1090''')
1091
1092 def test_long_line_after_append(self):
1093 eq = self.ndiffAssertEqual
1094 s = 'This is an example of string which has almost the limit of header length.'
1095 h = Header(s)
1096 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001097 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001098This is an example of string which has almost the limit of header length.
1099 Add another line.""")
1100
1101 def test_shorter_line_with_append(self):
1102 eq = self.ndiffAssertEqual
1103 s = 'This is a shorter line.'
1104 h = Header(s)
1105 h.append('Add another sentence. (Surprise?)')
1106 eq(h.encode(),
1107 'This is a shorter line. Add another sentence. (Surprise?)')
1108
1109 def test_long_field_name(self):
1110 eq = self.ndiffAssertEqual
1111 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001112 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1113 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1114 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1115 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116 h = Header(gs, 'iso-8859-1', header_name=fn)
1117 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001118 eq(h.encode(maxlinelen=76), """\
1119=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1120 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1121 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1122 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001123
1124 def test_long_received_header(self):
1125 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1126 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1127 'Wed, 05 Mar 2003 18:10:18 -0700')
1128 msg = Message()
1129 msg['Received-1'] = Header(h, continuation_ws='\t')
1130 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001131 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001132 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001133Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1134 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001135 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001136Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1137 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001138 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001139
1140""")
1141
1142 def test_string_headerinst_eq(self):
1143 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1144 'tu-muenchen.de> (David Bremner\'s message of '
1145 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1146 msg = Message()
1147 msg['Received-1'] = Header(h, header_name='Received-1',
1148 continuation_ws='\t')
1149 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001150 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001151 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001152Received-1:\x20
1153 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1154 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1155Received-2:\x20
1156 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1157 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158
1159""")
1160
1161 def test_long_unbreakable_lines_with_continuation(self):
1162 eq = self.ndiffAssertEqual
1163 msg = Message()
1164 t = """\
1165iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1166 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1167 msg['Face-1'] = t
1168 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001169 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001170 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001171 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001172 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001173Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001174 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001176Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001177 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001178 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001179Face-3:\x20
1180 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1181 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182
1183""")
1184
1185 def test_another_long_multiline_header(self):
1186 eq = self.ndiffAssertEqual
1187 m = ('Received: from siimage.com '
1188 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001189 'Microsoft SMTPSVC(5.0.2195.4905); '
1190 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 msg = email.message_from_string(m)
1192 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001193Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1194 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196''')
1197
1198 def test_long_lines_with_different_header(self):
1199 eq = self.ndiffAssertEqual
1200 h = ('List-Unsubscribe: '
1201 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1202 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1203 '?subject=unsubscribe>')
1204 msg = Message()
1205 msg['List'] = h
1206 msg['List'] = Header(h, header_name='List')
1207 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001208List: List-Unsubscribe:
1209 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001210 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001211List: List-Unsubscribe:
1212 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214
1215""")
1216
R. David Murray6f0022d2011-01-07 21:57:25 +00001217 def test_long_rfc2047_header_with_embedded_fws(self):
1218 h = Header(textwrap.dedent("""\
1219 We're going to pretend this header is in a non-ascii character set
1220 \tto see if line wrapping with encoded words and embedded
1221 folding white space works"""),
1222 charset='utf-8',
1223 header_name='Test')
1224 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1225 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1226 =?utf-8?q?cter_set?=
1227 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1228 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1229
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230
Ezio Melottib3aedd42010-11-20 19:04:17 +00001231
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001232# Test mangling of "From " lines in the body of a message
1233class TestFromMangling(unittest.TestCase):
1234 def setUp(self):
1235 self.msg = Message()
1236 self.msg['From'] = 'aaa@bbb.org'
1237 self.msg.set_payload("""\
1238From the desk of A.A.A.:
1239Blah blah blah
1240""")
1241
1242 def test_mangled_from(self):
1243 s = StringIO()
1244 g = Generator(s, mangle_from_=True)
1245 g.flatten(self.msg)
1246 self.assertEqual(s.getvalue(), """\
1247From: aaa@bbb.org
1248
1249>From the desk of A.A.A.:
1250Blah blah blah
1251""")
1252
1253 def test_dont_mangle_from(self):
1254 s = StringIO()
1255 g = Generator(s, mangle_from_=False)
1256 g.flatten(self.msg)
1257 self.assertEqual(s.getvalue(), """\
1258From: aaa@bbb.org
1259
1260From the desk of A.A.A.:
1261Blah blah blah
1262""")
1263
1264
Ezio Melottib3aedd42010-11-20 19:04:17 +00001265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266# Test the basic MIMEAudio class
1267class TestMIMEAudio(unittest.TestCase):
1268 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001269 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001270 self._audiodata = fp.read()
1271 self._au = MIMEAudio(self._audiodata)
1272
1273 def test_guess_minor_type(self):
1274 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1275
1276 def test_encoding(self):
1277 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001278 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1279 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001280
1281 def test_checkSetMinor(self):
1282 au = MIMEAudio(self._audiodata, 'fish')
1283 self.assertEqual(au.get_content_type(), 'audio/fish')
1284
1285 def test_add_header(self):
1286 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001287 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001288 self._au.add_header('Content-Disposition', 'attachment',
1289 filename='audiotest.au')
1290 eq(self._au['content-disposition'],
1291 'attachment; filename="audiotest.au"')
1292 eq(self._au.get_params(header='content-disposition'),
1293 [('attachment', ''), ('filename', 'audiotest.au')])
1294 eq(self._au.get_param('filename', header='content-disposition'),
1295 'audiotest.au')
1296 missing = []
1297 eq(self._au.get_param('attachment', header='content-disposition'), '')
1298 unless(self._au.get_param('foo', failobj=missing,
1299 header='content-disposition') is missing)
1300 # Try some missing stuff
1301 unless(self._au.get_param('foobar', missing) is missing)
1302 unless(self._au.get_param('attachment', missing,
1303 header='foobar') is missing)
1304
1305
Ezio Melottib3aedd42010-11-20 19:04:17 +00001306
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001307# Test the basic MIMEImage class
1308class TestMIMEImage(unittest.TestCase):
1309 def setUp(self):
1310 with openfile('PyBanner048.gif', 'rb') as fp:
1311 self._imgdata = fp.read()
1312 self._im = MIMEImage(self._imgdata)
1313
1314 def test_guess_minor_type(self):
1315 self.assertEqual(self._im.get_content_type(), 'image/gif')
1316
1317 def test_encoding(self):
1318 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001319 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1320 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
1322 def test_checkSetMinor(self):
1323 im = MIMEImage(self._imgdata, 'fish')
1324 self.assertEqual(im.get_content_type(), 'image/fish')
1325
1326 def test_add_header(self):
1327 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001328 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329 self._im.add_header('Content-Disposition', 'attachment',
1330 filename='dingusfish.gif')
1331 eq(self._im['content-disposition'],
1332 'attachment; filename="dingusfish.gif"')
1333 eq(self._im.get_params(header='content-disposition'),
1334 [('attachment', ''), ('filename', 'dingusfish.gif')])
1335 eq(self._im.get_param('filename', header='content-disposition'),
1336 'dingusfish.gif')
1337 missing = []
1338 eq(self._im.get_param('attachment', header='content-disposition'), '')
1339 unless(self._im.get_param('foo', failobj=missing,
1340 header='content-disposition') is missing)
1341 # Try some missing stuff
1342 unless(self._im.get_param('foobar', missing) is missing)
1343 unless(self._im.get_param('attachment', missing,
1344 header='foobar') is missing)
1345
1346
Ezio Melottib3aedd42010-11-20 19:04:17 +00001347
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001348# Test the basic MIMEApplication class
1349class TestMIMEApplication(unittest.TestCase):
1350 def test_headers(self):
1351 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001352 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001353 eq(msg.get_content_type(), 'application/octet-stream')
1354 eq(msg['content-transfer-encoding'], 'base64')
1355
1356 def test_body(self):
1357 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001358 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1359 msg = MIMEApplication(bytesdata)
1360 # whitespace in the cte encoded block is RFC-irrelevant.
1361 eq(msg.get_payload().strip(), '+vv8/f7/')
1362 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001363
1364
Ezio Melottib3aedd42010-11-20 19:04:17 +00001365
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366# Test the basic MIMEText class
1367class TestMIMEText(unittest.TestCase):
1368 def setUp(self):
1369 self._msg = MIMEText('hello there')
1370
1371 def test_types(self):
1372 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001373 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001374 eq(self._msg.get_content_type(), 'text/plain')
1375 eq(self._msg.get_param('charset'), 'us-ascii')
1376 missing = []
1377 unless(self._msg.get_param('foobar', missing) is missing)
1378 unless(self._msg.get_param('charset', missing, header='foobar')
1379 is missing)
1380
1381 def test_payload(self):
1382 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001383 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384
1385 def test_charset(self):
1386 eq = self.assertEqual
1387 msg = MIMEText('hello there', _charset='us-ascii')
1388 eq(msg.get_charset().input_charset, 'us-ascii')
1389 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1390
R. David Murray850fc852010-06-03 01:58:28 +00001391 def test_7bit_input(self):
1392 eq = self.assertEqual
1393 msg = MIMEText('hello there', _charset='us-ascii')
1394 eq(msg.get_charset().input_charset, 'us-ascii')
1395 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1396
1397 def test_7bit_input_no_charset(self):
1398 eq = self.assertEqual
1399 msg = MIMEText('hello there')
1400 eq(msg.get_charset(), 'us-ascii')
1401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1402 self.assertTrue('hello there' in msg.as_string())
1403
1404 def test_utf8_input(self):
1405 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1406 eq = self.assertEqual
1407 msg = MIMEText(teststr, _charset='utf-8')
1408 eq(msg.get_charset().output_charset, 'utf-8')
1409 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1410 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1411
1412 @unittest.skip("can't fix because of backward compat in email5, "
1413 "will fix in email6")
1414 def test_utf8_input_no_charset(self):
1415 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1416 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1417
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001418
Ezio Melottib3aedd42010-11-20 19:04:17 +00001419
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001420# Test complicated multipart/* messages
1421class TestMultipart(TestEmailBase):
1422 def setUp(self):
1423 with openfile('PyBanner048.gif', 'rb') as fp:
1424 data = fp.read()
1425 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1426 image = MIMEImage(data, name='dingusfish.gif')
1427 image.add_header('content-disposition', 'attachment',
1428 filename='dingusfish.gif')
1429 intro = MIMEText('''\
1430Hi there,
1431
1432This is the dingus fish.
1433''')
1434 container.attach(intro)
1435 container.attach(image)
1436 container['From'] = 'Barry <barry@digicool.com>'
1437 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1438 container['Subject'] = 'Here is your dingus fish'
1439
1440 now = 987809702.54848599
1441 timetuple = time.localtime(now)
1442 if timetuple[-1] == 0:
1443 tzsecs = time.timezone
1444 else:
1445 tzsecs = time.altzone
1446 if tzsecs > 0:
1447 sign = '-'
1448 else:
1449 sign = '+'
1450 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1451 container['Date'] = time.strftime(
1452 '%a, %d %b %Y %H:%M:%S',
1453 time.localtime(now)) + tzoffset
1454 self._msg = container
1455 self._im = image
1456 self._txt = intro
1457
1458 def test_hierarchy(self):
1459 # convenience
1460 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001461 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001462 raises = self.assertRaises
1463 # tests
1464 m = self._msg
1465 unless(m.is_multipart())
1466 eq(m.get_content_type(), 'multipart/mixed')
1467 eq(len(m.get_payload()), 2)
1468 raises(IndexError, m.get_payload, 2)
1469 m0 = m.get_payload(0)
1470 m1 = m.get_payload(1)
1471 unless(m0 is self._txt)
1472 unless(m1 is self._im)
1473 eq(m.get_payload(), [m0, m1])
1474 unless(not m0.is_multipart())
1475 unless(not m1.is_multipart())
1476
1477 def test_empty_multipart_idempotent(self):
1478 text = """\
1479Content-Type: multipart/mixed; boundary="BOUNDARY"
1480MIME-Version: 1.0
1481Subject: A subject
1482To: aperson@dom.ain
1483From: bperson@dom.ain
1484
1485
1486--BOUNDARY
1487
1488
1489--BOUNDARY--
1490"""
1491 msg = Parser().parsestr(text)
1492 self.ndiffAssertEqual(text, msg.as_string())
1493
1494 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1495 outer = MIMEBase('multipart', 'mixed')
1496 outer['Subject'] = 'A subject'
1497 outer['To'] = 'aperson@dom.ain'
1498 outer['From'] = 'bperson@dom.ain'
1499 outer.set_boundary('BOUNDARY')
1500 self.ndiffAssertEqual(outer.as_string(), '''\
1501Content-Type: multipart/mixed; boundary="BOUNDARY"
1502MIME-Version: 1.0
1503Subject: A subject
1504To: aperson@dom.ain
1505From: bperson@dom.ain
1506
1507--BOUNDARY
1508
1509--BOUNDARY--''')
1510
1511 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.preamble = ''
1517 outer.epilogue = ''
1518 outer.set_boundary('BOUNDARY')
1519 self.ndiffAssertEqual(outer.as_string(), '''\
1520Content-Type: multipart/mixed; boundary="BOUNDARY"
1521MIME-Version: 1.0
1522Subject: A subject
1523To: aperson@dom.ain
1524From: bperson@dom.ain
1525
1526
1527--BOUNDARY
1528
1529--BOUNDARY--
1530''')
1531
1532 def test_one_part_in_a_multipart(self):
1533 eq = self.ndiffAssertEqual
1534 outer = MIMEBase('multipart', 'mixed')
1535 outer['Subject'] = 'A subject'
1536 outer['To'] = 'aperson@dom.ain'
1537 outer['From'] = 'bperson@dom.ain'
1538 outer.set_boundary('BOUNDARY')
1539 msg = MIMEText('hello world')
1540 outer.attach(msg)
1541 eq(outer.as_string(), '''\
1542Content-Type: multipart/mixed; boundary="BOUNDARY"
1543MIME-Version: 1.0
1544Subject: A subject
1545To: aperson@dom.ain
1546From: bperson@dom.ain
1547
1548--BOUNDARY
1549Content-Type: text/plain; charset="us-ascii"
1550MIME-Version: 1.0
1551Content-Transfer-Encoding: 7bit
1552
1553hello world
1554--BOUNDARY--''')
1555
1556 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1557 eq = self.ndiffAssertEqual
1558 outer = MIMEBase('multipart', 'mixed')
1559 outer['Subject'] = 'A subject'
1560 outer['To'] = 'aperson@dom.ain'
1561 outer['From'] = 'bperson@dom.ain'
1562 outer.preamble = ''
1563 msg = MIMEText('hello world')
1564 outer.attach(msg)
1565 outer.set_boundary('BOUNDARY')
1566 eq(outer.as_string(), '''\
1567Content-Type: multipart/mixed; boundary="BOUNDARY"
1568MIME-Version: 1.0
1569Subject: A subject
1570To: aperson@dom.ain
1571From: bperson@dom.ain
1572
1573
1574--BOUNDARY
1575Content-Type: text/plain; charset="us-ascii"
1576MIME-Version: 1.0
1577Content-Transfer-Encoding: 7bit
1578
1579hello world
1580--BOUNDARY--''')
1581
1582
1583 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1584 eq = self.ndiffAssertEqual
1585 outer = MIMEBase('multipart', 'mixed')
1586 outer['Subject'] = 'A subject'
1587 outer['To'] = 'aperson@dom.ain'
1588 outer['From'] = 'bperson@dom.ain'
1589 outer.preamble = None
1590 msg = MIMEText('hello world')
1591 outer.attach(msg)
1592 outer.set_boundary('BOUNDARY')
1593 eq(outer.as_string(), '''\
1594Content-Type: multipart/mixed; boundary="BOUNDARY"
1595MIME-Version: 1.0
1596Subject: A subject
1597To: aperson@dom.ain
1598From: bperson@dom.ain
1599
1600--BOUNDARY
1601Content-Type: text/plain; charset="us-ascii"
1602MIME-Version: 1.0
1603Content-Transfer-Encoding: 7bit
1604
1605hello world
1606--BOUNDARY--''')
1607
1608
1609 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1610 eq = self.ndiffAssertEqual
1611 outer = MIMEBase('multipart', 'mixed')
1612 outer['Subject'] = 'A subject'
1613 outer['To'] = 'aperson@dom.ain'
1614 outer['From'] = 'bperson@dom.ain'
1615 outer.epilogue = None
1616 msg = MIMEText('hello world')
1617 outer.attach(msg)
1618 outer.set_boundary('BOUNDARY')
1619 eq(outer.as_string(), '''\
1620Content-Type: multipart/mixed; boundary="BOUNDARY"
1621MIME-Version: 1.0
1622Subject: A subject
1623To: aperson@dom.ain
1624From: bperson@dom.ain
1625
1626--BOUNDARY
1627Content-Type: text/plain; charset="us-ascii"
1628MIME-Version: 1.0
1629Content-Transfer-Encoding: 7bit
1630
1631hello world
1632--BOUNDARY--''')
1633
1634
1635 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1636 eq = self.ndiffAssertEqual
1637 outer = MIMEBase('multipart', 'mixed')
1638 outer['Subject'] = 'A subject'
1639 outer['To'] = 'aperson@dom.ain'
1640 outer['From'] = 'bperson@dom.ain'
1641 outer.epilogue = ''
1642 msg = MIMEText('hello world')
1643 outer.attach(msg)
1644 outer.set_boundary('BOUNDARY')
1645 eq(outer.as_string(), '''\
1646Content-Type: multipart/mixed; boundary="BOUNDARY"
1647MIME-Version: 1.0
1648Subject: A subject
1649To: aperson@dom.ain
1650From: bperson@dom.ain
1651
1652--BOUNDARY
1653Content-Type: text/plain; charset="us-ascii"
1654MIME-Version: 1.0
1655Content-Transfer-Encoding: 7bit
1656
1657hello world
1658--BOUNDARY--
1659''')
1660
1661
1662 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1663 eq = self.ndiffAssertEqual
1664 outer = MIMEBase('multipart', 'mixed')
1665 outer['Subject'] = 'A subject'
1666 outer['To'] = 'aperson@dom.ain'
1667 outer['From'] = 'bperson@dom.ain'
1668 outer.epilogue = '\n'
1669 msg = MIMEText('hello world')
1670 outer.attach(msg)
1671 outer.set_boundary('BOUNDARY')
1672 eq(outer.as_string(), '''\
1673Content-Type: multipart/mixed; boundary="BOUNDARY"
1674MIME-Version: 1.0
1675Subject: A subject
1676To: aperson@dom.ain
1677From: bperson@dom.ain
1678
1679--BOUNDARY
1680Content-Type: text/plain; charset="us-ascii"
1681MIME-Version: 1.0
1682Content-Transfer-Encoding: 7bit
1683
1684hello world
1685--BOUNDARY--
1686
1687''')
1688
1689 def test_message_external_body(self):
1690 eq = self.assertEqual
1691 msg = self._msgobj('msg_36.txt')
1692 eq(len(msg.get_payload()), 2)
1693 msg1 = msg.get_payload(1)
1694 eq(msg1.get_content_type(), 'multipart/alternative')
1695 eq(len(msg1.get_payload()), 2)
1696 for subpart in msg1.get_payload():
1697 eq(subpart.get_content_type(), 'message/external-body')
1698 eq(len(subpart.get_payload()), 1)
1699 subsubpart = subpart.get_payload(0)
1700 eq(subsubpart.get_content_type(), 'text/plain')
1701
1702 def test_double_boundary(self):
1703 # msg_37.txt is a multipart that contains two dash-boundary's in a
1704 # row. Our interpretation of RFC 2046 calls for ignoring the second
1705 # and subsequent boundaries.
1706 msg = self._msgobj('msg_37.txt')
1707 self.assertEqual(len(msg.get_payload()), 3)
1708
1709 def test_nested_inner_contains_outer_boundary(self):
1710 eq = self.ndiffAssertEqual
1711 # msg_38.txt has an inner part that contains outer boundaries. My
1712 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1713 # these are illegal and should be interpreted as unterminated inner
1714 # parts.
1715 msg = self._msgobj('msg_38.txt')
1716 sfp = StringIO()
1717 iterators._structure(msg, sfp)
1718 eq(sfp.getvalue(), """\
1719multipart/mixed
1720 multipart/mixed
1721 multipart/alternative
1722 text/plain
1723 text/plain
1724 text/plain
1725 text/plain
1726""")
1727
1728 def test_nested_with_same_boundary(self):
1729 eq = self.ndiffAssertEqual
1730 # msg 39.txt is similarly evil in that it's got inner parts that use
1731 # the same boundary as outer parts. Again, I believe the way this is
1732 # parsed is closest to the spirit of RFC 2046
1733 msg = self._msgobj('msg_39.txt')
1734 sfp = StringIO()
1735 iterators._structure(msg, sfp)
1736 eq(sfp.getvalue(), """\
1737multipart/mixed
1738 multipart/mixed
1739 multipart/alternative
1740 application/octet-stream
1741 application/octet-stream
1742 text/plain
1743""")
1744
1745 def test_boundary_in_non_multipart(self):
1746 msg = self._msgobj('msg_40.txt')
1747 self.assertEqual(msg.as_string(), '''\
1748MIME-Version: 1.0
1749Content-Type: text/html; boundary="--961284236552522269"
1750
1751----961284236552522269
1752Content-Type: text/html;
1753Content-Transfer-Encoding: 7Bit
1754
1755<html></html>
1756
1757----961284236552522269--
1758''')
1759
1760 def test_boundary_with_leading_space(self):
1761 eq = self.assertEqual
1762 msg = email.message_from_string('''\
1763MIME-Version: 1.0
1764Content-Type: multipart/mixed; boundary=" XXXX"
1765
1766-- XXXX
1767Content-Type: text/plain
1768
1769
1770-- XXXX
1771Content-Type: text/plain
1772
1773-- XXXX--
1774''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001775 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001776 eq(msg.get_boundary(), ' XXXX')
1777 eq(len(msg.get_payload()), 2)
1778
1779 def test_boundary_without_trailing_newline(self):
1780 m = Parser().parsestr("""\
1781Content-Type: multipart/mixed; boundary="===============0012394164=="
1782MIME-Version: 1.0
1783
1784--===============0012394164==
1785Content-Type: image/file1.jpg
1786MIME-Version: 1.0
1787Content-Transfer-Encoding: base64
1788
1789YXNkZg==
1790--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001791 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001792
1793
Ezio Melottib3aedd42010-11-20 19:04:17 +00001794
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001795# Test some badly formatted messages
R David Murray3edd22a2011-04-18 13:59:37 -04001796class TestNonConformantBase:
1797
1798 def _msgobj(self, filename):
1799 with openfile(filename) as fp:
1800 return email.message_from_file(fp, policy=self.policy)
1801
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001802 def test_parse_missing_minor_type(self):
1803 eq = self.assertEqual
1804 msg = self._msgobj('msg_14.txt')
1805 eq(msg.get_content_type(), 'text/plain')
1806 eq(msg.get_content_maintype(), 'text')
1807 eq(msg.get_content_subtype(), 'plain')
1808
1809 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001810 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001811 msg = self._msgobj('msg_15.txt')
1812 # XXX We can probably eventually do better
1813 inner = msg.get_payload(0)
1814 unless(hasattr(inner, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001815 self.assertEqual(len(self.get_defects(inner)), 1)
1816 unless(isinstance(self.get_defects(inner)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001817 errors.StartBoundaryNotFoundDefect))
1818
1819 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001820 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001821 msg = self._msgobj('msg_25.txt')
1822 unless(isinstance(msg.get_payload(), str))
R David Murray3edd22a2011-04-18 13:59:37 -04001823 self.assertEqual(len(self.get_defects(msg)), 2)
1824 unless(isinstance(self.get_defects(msg)[0],
1825 errors.NoBoundaryInMultipartDefect))
1826 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 errors.MultipartInvariantViolationDefect))
1828
R David Murray749073a2011-06-22 13:47:53 -04001829 multipart_msg = textwrap.dedent("""\
1830 Date: Wed, 14 Nov 2007 12:56:23 GMT
1831 From: foo@bar.invalid
1832 To: foo@bar.invalid
1833 Subject: Content-Transfer-Encoding: base64 and multipart
1834 MIME-Version: 1.0
1835 Content-Type: multipart/mixed;
1836 boundary="===============3344438784458119861=="{}
1837
1838 --===============3344438784458119861==
1839 Content-Type: text/plain
1840
1841 Test message
1842
1843 --===============3344438784458119861==
1844 Content-Type: application/octet-stream
1845 Content-Transfer-Encoding: base64
1846
1847 YWJj
1848
1849 --===============3344438784458119861==--
1850 """)
1851
1852 def test_multipart_invalid_cte(self):
1853 msg = email.message_from_string(
1854 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"),
1855 policy = self.policy)
1856 self.assertEqual(len(self.get_defects(msg)), 1)
1857 self.assertIsInstance(self.get_defects(msg)[0],
1858 errors.InvalidMultipartContentTransferEncodingDefect)
1859
1860 def test_multipart_no_cte_no_defect(self):
1861 msg = email.message_from_string(
1862 self.multipart_msg.format(''),
1863 policy = self.policy)
1864 self.assertEqual(len(self.get_defects(msg)), 0)
1865
1866 def test_multipart_valid_cte_no_defect(self):
1867 for cte in ('7bit', '8bit', 'BINary'):
1868 msg = email.message_from_string(
1869 self.multipart_msg.format(
1870 "\nContent-Transfer-Encoding: {}".format(cte)),
1871 policy = self.policy)
1872 self.assertEqual(len(self.get_defects(msg)), 0)
1873
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001874 def test_invalid_content_type(self):
1875 eq = self.assertEqual
1876 neq = self.ndiffAssertEqual
1877 msg = Message()
1878 # RFC 2045, $5.2 says invalid yields text/plain
1879 msg['Content-Type'] = 'text'
1880 eq(msg.get_content_maintype(), 'text')
1881 eq(msg.get_content_subtype(), 'plain')
1882 eq(msg.get_content_type(), 'text/plain')
1883 # Clear the old value and try something /really/ invalid
1884 del msg['content-type']
1885 msg['Content-Type'] = 'foo'
1886 eq(msg.get_content_maintype(), 'text')
1887 eq(msg.get_content_subtype(), 'plain')
1888 eq(msg.get_content_type(), 'text/plain')
1889 # Still, make sure that the message is idempotently generated
1890 s = StringIO()
1891 g = Generator(s)
1892 g.flatten(msg)
1893 neq(s.getvalue(), 'Content-Type: foo\n\n')
1894
1895 def test_no_start_boundary(self):
1896 eq = self.ndiffAssertEqual
1897 msg = self._msgobj('msg_31.txt')
1898 eq(msg.get_payload(), """\
1899--BOUNDARY
1900Content-Type: text/plain
1901
1902message 1
1903
1904--BOUNDARY
1905Content-Type: text/plain
1906
1907message 2
1908
1909--BOUNDARY--
1910""")
1911
1912 def test_no_separating_blank_line(self):
1913 eq = self.ndiffAssertEqual
1914 msg = self._msgobj('msg_35.txt')
1915 eq(msg.as_string(), """\
1916From: aperson@dom.ain
1917To: bperson@dom.ain
1918Subject: here's something interesting
1919
1920counter to RFC 2822, there's no separating newline here
1921""")
1922
1923 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001924 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001925 msg = self._msgobj('msg_41.txt')
1926 unless(hasattr(msg, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001927 self.assertEqual(len(self.get_defects(msg)), 2)
1928 unless(isinstance(self.get_defects(msg)[0],
1929 errors.NoBoundaryInMultipartDefect))
1930 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001931 errors.MultipartInvariantViolationDefect))
1932
1933 def test_missing_start_boundary(self):
1934 outer = self._msgobj('msg_42.txt')
1935 # The message structure is:
1936 #
1937 # multipart/mixed
1938 # text/plain
1939 # message/rfc822
1940 # multipart/mixed [*]
1941 #
1942 # [*] This message is missing its start boundary
1943 bad = outer.get_payload(1).get_payload(0)
R David Murray3edd22a2011-04-18 13:59:37 -04001944 self.assertEqual(len(self.get_defects(bad)), 1)
1945 self.assertTrue(isinstance(self.get_defects(bad)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 errors.StartBoundaryNotFoundDefect))
1947
1948 def test_first_line_is_continuation_header(self):
1949 eq = self.assertEqual
1950 m = ' Line 1\nLine 2\nLine 3'
R David Murray3edd22a2011-04-18 13:59:37 -04001951 msg = email.message_from_string(m, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001952 eq(msg.keys(), [])
1953 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murray3edd22a2011-04-18 13:59:37 -04001954 eq(len(self.get_defects(msg)), 1)
1955 self.assertTrue(isinstance(self.get_defects(msg)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001956 errors.FirstHeaderLineIsContinuationDefect))
R David Murray3edd22a2011-04-18 13:59:37 -04001957 eq(self.get_defects(msg)[0].line, ' Line 1\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958
1959
R David Murray3edd22a2011-04-18 13:59:37 -04001960class TestNonConformant(TestNonConformantBase, TestEmailBase):
1961
1962 policy=email.policy.default
1963
1964 def get_defects(self, obj):
1965 return obj.defects
1966
1967
1968class TestNonConformantCapture(TestNonConformantBase, TestEmailBase):
1969
1970 class CapturePolicy(email.policy.Policy):
1971 captured = None
1972 def register_defect(self, obj, defect):
1973 self.captured.append(defect)
1974
1975 def setUp(self):
1976 self.policy = self.CapturePolicy(captured=list())
1977
1978 def get_defects(self, obj):
1979 return self.policy.captured
1980
1981
1982class TestRaisingDefects(TestEmailBase):
1983
1984 def _msgobj(self, filename):
1985 with openfile(filename) as fp:
1986 return email.message_from_file(fp, policy=email.policy.strict)
1987
1988 def test_same_boundary_inner_outer(self):
1989 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
1990 self._msgobj('msg_15.txt')
1991
1992 def test_multipart_no_boundary(self):
1993 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1994 self._msgobj('msg_25.txt')
1995
1996 def test_lying_multipart(self):
1997 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1998 self._msgobj('msg_41.txt')
1999
2000
2001 def test_missing_start_boundary(self):
2002 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2003 self._msgobj('msg_42.txt')
2004
2005 def test_first_line_is_continuation_header(self):
2006 m = ' Line 1\nLine 2\nLine 3'
2007 with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
2008 msg = email.message_from_string(m, policy=email.policy.strict)
2009
Ezio Melottib3aedd42010-11-20 19:04:17 +00002010
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002012class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013 def test_rfc2047_multiline(self):
2014 eq = self.assertEqual
2015 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2016 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2017 dh = decode_header(s)
2018 eq(dh, [
2019 (b'Re:', None),
2020 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2021 (b'baz foo bar', None),
2022 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2023 header = make_header(dh)
2024 eq(str(header),
2025 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002026 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002027Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2028 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002029
2030 def test_whitespace_eater_unicode(self):
2031 eq = self.assertEqual
2032 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2033 dh = decode_header(s)
2034 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2035 (b'Pirard <pirard@dom.ain>', None)])
2036 header = str(make_header(dh))
2037 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2038
2039 def test_whitespace_eater_unicode_2(self):
2040 eq = self.assertEqual
2041 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2042 dh = decode_header(s)
2043 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2044 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2045 hu = str(make_header(dh))
2046 eq(hu, 'The quick brown fox jumped over the lazy dog')
2047
2048 def test_rfc2047_missing_whitespace(self):
2049 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2050 dh = decode_header(s)
2051 self.assertEqual(dh, [(s, None)])
2052
2053 def test_rfc2047_with_whitespace(self):
2054 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2055 dh = decode_header(s)
2056 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2057 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2058 (b'sbord', None)])
2059
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002060 def test_rfc2047_B_bad_padding(self):
2061 s = '=?iso-8859-1?B?%s?='
2062 data = [ # only test complete bytes
2063 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2064 ('dmk=', b'vi'), ('dmk', b'vi')
2065 ]
2066 for q, a in data:
2067 dh = decode_header(s % q)
2068 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002069
R. David Murray31e984c2010-10-01 15:40:20 +00002070 def test_rfc2047_Q_invalid_digits(self):
2071 # issue 10004.
2072 s = '=?iso-8659-1?Q?andr=e9=zz?='
2073 self.assertEqual(decode_header(s),
2074 [(b'andr\xe9=zz', 'iso-8659-1')])
2075
Ezio Melottib3aedd42010-11-20 19:04:17 +00002076
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002077# Test the MIMEMessage class
2078class TestMIMEMessage(TestEmailBase):
2079 def setUp(self):
2080 with openfile('msg_11.txt') as fp:
2081 self._text = fp.read()
2082
2083 def test_type_error(self):
2084 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2085
2086 def test_valid_argument(self):
2087 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002088 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089 subject = 'A sub-message'
2090 m = Message()
2091 m['Subject'] = subject
2092 r = MIMEMessage(m)
2093 eq(r.get_content_type(), 'message/rfc822')
2094 payload = r.get_payload()
2095 unless(isinstance(payload, list))
2096 eq(len(payload), 1)
2097 subpart = payload[0]
2098 unless(subpart is m)
2099 eq(subpart['subject'], subject)
2100
2101 def test_bad_multipart(self):
2102 eq = self.assertEqual
2103 msg1 = Message()
2104 msg1['Subject'] = 'subpart 1'
2105 msg2 = Message()
2106 msg2['Subject'] = 'subpart 2'
2107 r = MIMEMessage(msg1)
2108 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2109
2110 def test_generate(self):
2111 # First craft the message to be encapsulated
2112 m = Message()
2113 m['Subject'] = 'An enclosed message'
2114 m.set_payload('Here is the body of the message.\n')
2115 r = MIMEMessage(m)
2116 r['Subject'] = 'The enclosing message'
2117 s = StringIO()
2118 g = Generator(s)
2119 g.flatten(r)
2120 self.assertEqual(s.getvalue(), """\
2121Content-Type: message/rfc822
2122MIME-Version: 1.0
2123Subject: The enclosing message
2124
2125Subject: An enclosed message
2126
2127Here is the body of the message.
2128""")
2129
2130 def test_parse_message_rfc822(self):
2131 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002132 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002133 msg = self._msgobj('msg_11.txt')
2134 eq(msg.get_content_type(), 'message/rfc822')
2135 payload = msg.get_payload()
2136 unless(isinstance(payload, list))
2137 eq(len(payload), 1)
2138 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002139 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140 eq(submsg['subject'], 'An enclosed message')
2141 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2142
2143 def test_dsn(self):
2144 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002145 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002146 # msg 16 is a Delivery Status Notification, see RFC 1894
2147 msg = self._msgobj('msg_16.txt')
2148 eq(msg.get_content_type(), 'multipart/report')
2149 unless(msg.is_multipart())
2150 eq(len(msg.get_payload()), 3)
2151 # Subpart 1 is a text/plain, human readable section
2152 subpart = msg.get_payload(0)
2153 eq(subpart.get_content_type(), 'text/plain')
2154 eq(subpart.get_payload(), """\
2155This report relates to a message you sent with the following header fields:
2156
2157 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2158 Date: Sun, 23 Sep 2001 20:10:55 -0700
2159 From: "Ian T. Henry" <henryi@oxy.edu>
2160 To: SoCal Raves <scr@socal-raves.org>
2161 Subject: [scr] yeah for Ians!!
2162
2163Your message cannot be delivered to the following recipients:
2164
2165 Recipient address: jangel1@cougar.noc.ucla.edu
2166 Reason: recipient reached disk quota
2167
2168""")
2169 # Subpart 2 contains the machine parsable DSN information. It
2170 # consists of two blocks of headers, represented by two nested Message
2171 # objects.
2172 subpart = msg.get_payload(1)
2173 eq(subpart.get_content_type(), 'message/delivery-status')
2174 eq(len(subpart.get_payload()), 2)
2175 # message/delivery-status should treat each block as a bunch of
2176 # headers, i.e. a bunch of Message objects.
2177 dsn1 = subpart.get_payload(0)
2178 unless(isinstance(dsn1, Message))
2179 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2180 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2181 # Try a missing one <wink>
2182 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2183 dsn2 = subpart.get_payload(1)
2184 unless(isinstance(dsn2, Message))
2185 eq(dsn2['action'], 'failed')
2186 eq(dsn2.get_params(header='original-recipient'),
2187 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2188 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2189 # Subpart 3 is the original message
2190 subpart = msg.get_payload(2)
2191 eq(subpart.get_content_type(), 'message/rfc822')
2192 payload = subpart.get_payload()
2193 unless(isinstance(payload, list))
2194 eq(len(payload), 1)
2195 subsubpart = payload[0]
2196 unless(isinstance(subsubpart, Message))
2197 eq(subsubpart.get_content_type(), 'text/plain')
2198 eq(subsubpart['message-id'],
2199 '<002001c144a6$8752e060$56104586@oxy.edu>')
2200
2201 def test_epilogue(self):
2202 eq = self.ndiffAssertEqual
2203 with openfile('msg_21.txt') as fp:
2204 text = fp.read()
2205 msg = Message()
2206 msg['From'] = 'aperson@dom.ain'
2207 msg['To'] = 'bperson@dom.ain'
2208 msg['Subject'] = 'Test'
2209 msg.preamble = 'MIME message'
2210 msg.epilogue = 'End of MIME message\n'
2211 msg1 = MIMEText('One')
2212 msg2 = MIMEText('Two')
2213 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2214 msg.attach(msg1)
2215 msg.attach(msg2)
2216 sfp = StringIO()
2217 g = Generator(sfp)
2218 g.flatten(msg)
2219 eq(sfp.getvalue(), text)
2220
2221 def test_no_nl_preamble(self):
2222 eq = self.ndiffAssertEqual
2223 msg = Message()
2224 msg['From'] = 'aperson@dom.ain'
2225 msg['To'] = 'bperson@dom.ain'
2226 msg['Subject'] = 'Test'
2227 msg.preamble = 'MIME message'
2228 msg.epilogue = ''
2229 msg1 = MIMEText('One')
2230 msg2 = MIMEText('Two')
2231 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2232 msg.attach(msg1)
2233 msg.attach(msg2)
2234 eq(msg.as_string(), """\
2235From: aperson@dom.ain
2236To: bperson@dom.ain
2237Subject: Test
2238Content-Type: multipart/mixed; boundary="BOUNDARY"
2239
2240MIME message
2241--BOUNDARY
2242Content-Type: text/plain; charset="us-ascii"
2243MIME-Version: 1.0
2244Content-Transfer-Encoding: 7bit
2245
2246One
2247--BOUNDARY
2248Content-Type: text/plain; charset="us-ascii"
2249MIME-Version: 1.0
2250Content-Transfer-Encoding: 7bit
2251
2252Two
2253--BOUNDARY--
2254""")
2255
2256 def test_default_type(self):
2257 eq = self.assertEqual
2258 with openfile('msg_30.txt') as fp:
2259 msg = email.message_from_file(fp)
2260 container1 = msg.get_payload(0)
2261 eq(container1.get_default_type(), 'message/rfc822')
2262 eq(container1.get_content_type(), 'message/rfc822')
2263 container2 = msg.get_payload(1)
2264 eq(container2.get_default_type(), 'message/rfc822')
2265 eq(container2.get_content_type(), 'message/rfc822')
2266 container1a = container1.get_payload(0)
2267 eq(container1a.get_default_type(), 'text/plain')
2268 eq(container1a.get_content_type(), 'text/plain')
2269 container2a = container2.get_payload(0)
2270 eq(container2a.get_default_type(), 'text/plain')
2271 eq(container2a.get_content_type(), 'text/plain')
2272
2273 def test_default_type_with_explicit_container_type(self):
2274 eq = self.assertEqual
2275 with openfile('msg_28.txt') as fp:
2276 msg = email.message_from_file(fp)
2277 container1 = msg.get_payload(0)
2278 eq(container1.get_default_type(), 'message/rfc822')
2279 eq(container1.get_content_type(), 'message/rfc822')
2280 container2 = msg.get_payload(1)
2281 eq(container2.get_default_type(), 'message/rfc822')
2282 eq(container2.get_content_type(), 'message/rfc822')
2283 container1a = container1.get_payload(0)
2284 eq(container1a.get_default_type(), 'text/plain')
2285 eq(container1a.get_content_type(), 'text/plain')
2286 container2a = container2.get_payload(0)
2287 eq(container2a.get_default_type(), 'text/plain')
2288 eq(container2a.get_content_type(), 'text/plain')
2289
2290 def test_default_type_non_parsed(self):
2291 eq = self.assertEqual
2292 neq = self.ndiffAssertEqual
2293 # Set up container
2294 container = MIMEMultipart('digest', 'BOUNDARY')
2295 container.epilogue = ''
2296 # Set up subparts
2297 subpart1a = MIMEText('message 1\n')
2298 subpart2a = MIMEText('message 2\n')
2299 subpart1 = MIMEMessage(subpart1a)
2300 subpart2 = MIMEMessage(subpart2a)
2301 container.attach(subpart1)
2302 container.attach(subpart2)
2303 eq(subpart1.get_content_type(), 'message/rfc822')
2304 eq(subpart1.get_default_type(), 'message/rfc822')
2305 eq(subpart2.get_content_type(), 'message/rfc822')
2306 eq(subpart2.get_default_type(), 'message/rfc822')
2307 neq(container.as_string(0), '''\
2308Content-Type: multipart/digest; boundary="BOUNDARY"
2309MIME-Version: 1.0
2310
2311--BOUNDARY
2312Content-Type: message/rfc822
2313MIME-Version: 1.0
2314
2315Content-Type: text/plain; charset="us-ascii"
2316MIME-Version: 1.0
2317Content-Transfer-Encoding: 7bit
2318
2319message 1
2320
2321--BOUNDARY
2322Content-Type: message/rfc822
2323MIME-Version: 1.0
2324
2325Content-Type: text/plain; charset="us-ascii"
2326MIME-Version: 1.0
2327Content-Transfer-Encoding: 7bit
2328
2329message 2
2330
2331--BOUNDARY--
2332''')
2333 del subpart1['content-type']
2334 del subpart1['mime-version']
2335 del subpart2['content-type']
2336 del subpart2['mime-version']
2337 eq(subpart1.get_content_type(), 'message/rfc822')
2338 eq(subpart1.get_default_type(), 'message/rfc822')
2339 eq(subpart2.get_content_type(), 'message/rfc822')
2340 eq(subpart2.get_default_type(), 'message/rfc822')
2341 neq(container.as_string(0), '''\
2342Content-Type: multipart/digest; boundary="BOUNDARY"
2343MIME-Version: 1.0
2344
2345--BOUNDARY
2346
2347Content-Type: text/plain; charset="us-ascii"
2348MIME-Version: 1.0
2349Content-Transfer-Encoding: 7bit
2350
2351message 1
2352
2353--BOUNDARY
2354
2355Content-Type: text/plain; charset="us-ascii"
2356MIME-Version: 1.0
2357Content-Transfer-Encoding: 7bit
2358
2359message 2
2360
2361--BOUNDARY--
2362''')
2363
2364 def test_mime_attachments_in_constructor(self):
2365 eq = self.assertEqual
2366 text1 = MIMEText('')
2367 text2 = MIMEText('')
2368 msg = MIMEMultipart(_subparts=(text1, text2))
2369 eq(len(msg.get_payload()), 2)
2370 eq(msg.get_payload(0), text1)
2371 eq(msg.get_payload(1), text2)
2372
Christian Heimes587c2bf2008-01-19 16:21:02 +00002373 def test_default_multipart_constructor(self):
2374 msg = MIMEMultipart()
2375 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376
Ezio Melottib3aedd42010-11-20 19:04:17 +00002377
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002378# A general test of parser->model->generator idempotency. IOW, read a message
2379# in, parse it into a message object tree, then without touching the tree,
2380# regenerate the plain text. The original text and the transformed text
2381# should be identical. Note: that we ignore the Unix-From since that may
2382# contain a changed date.
2383class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002384
2385 linesep = '\n'
2386
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002387 def _msgobj(self, filename):
2388 with openfile(filename) as fp:
2389 data = fp.read()
2390 msg = email.message_from_string(data)
2391 return msg, data
2392
R. David Murray719a4492010-11-21 16:53:48 +00002393 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002394 eq = self.ndiffAssertEqual
2395 s = StringIO()
2396 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002397 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002398 eq(text, s.getvalue())
2399
2400 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002401 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002402 msg, text = self._msgobj('msg_01.txt')
2403 eq(msg.get_content_type(), 'text/plain')
2404 eq(msg.get_content_maintype(), 'text')
2405 eq(msg.get_content_subtype(), 'plain')
2406 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2407 eq(msg.get_param('charset'), 'us-ascii')
2408 eq(msg.preamble, None)
2409 eq(msg.epilogue, None)
2410 self._idempotent(msg, text)
2411
2412 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002413 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 msg, text = self._msgobj('msg_03.txt')
2415 eq(msg.get_content_type(), 'text/plain')
2416 eq(msg.get_params(), None)
2417 eq(msg.get_param('charset'), None)
2418 self._idempotent(msg, text)
2419
2420 def test_simple_multipart(self):
2421 msg, text = self._msgobj('msg_04.txt')
2422 self._idempotent(msg, text)
2423
2424 def test_MIME_digest(self):
2425 msg, text = self._msgobj('msg_02.txt')
2426 self._idempotent(msg, text)
2427
2428 def test_long_header(self):
2429 msg, text = self._msgobj('msg_27.txt')
2430 self._idempotent(msg, text)
2431
2432 def test_MIME_digest_with_part_headers(self):
2433 msg, text = self._msgobj('msg_28.txt')
2434 self._idempotent(msg, text)
2435
2436 def test_mixed_with_image(self):
2437 msg, text = self._msgobj('msg_06.txt')
2438 self._idempotent(msg, text)
2439
2440 def test_multipart_report(self):
2441 msg, text = self._msgobj('msg_05.txt')
2442 self._idempotent(msg, text)
2443
2444 def test_dsn(self):
2445 msg, text = self._msgobj('msg_16.txt')
2446 self._idempotent(msg, text)
2447
2448 def test_preamble_epilogue(self):
2449 msg, text = self._msgobj('msg_21.txt')
2450 self._idempotent(msg, text)
2451
2452 def test_multipart_one_part(self):
2453 msg, text = self._msgobj('msg_23.txt')
2454 self._idempotent(msg, text)
2455
2456 def test_multipart_no_parts(self):
2457 msg, text = self._msgobj('msg_24.txt')
2458 self._idempotent(msg, text)
2459
2460 def test_no_start_boundary(self):
2461 msg, text = self._msgobj('msg_31.txt')
2462 self._idempotent(msg, text)
2463
2464 def test_rfc2231_charset(self):
2465 msg, text = self._msgobj('msg_32.txt')
2466 self._idempotent(msg, text)
2467
2468 def test_more_rfc2231_parameters(self):
2469 msg, text = self._msgobj('msg_33.txt')
2470 self._idempotent(msg, text)
2471
2472 def test_text_plain_in_a_multipart_digest(self):
2473 msg, text = self._msgobj('msg_34.txt')
2474 self._idempotent(msg, text)
2475
2476 def test_nested_multipart_mixeds(self):
2477 msg, text = self._msgobj('msg_12a.txt')
2478 self._idempotent(msg, text)
2479
2480 def test_message_external_body_idempotent(self):
2481 msg, text = self._msgobj('msg_36.txt')
2482 self._idempotent(msg, text)
2483
R. David Murray719a4492010-11-21 16:53:48 +00002484 def test_message_delivery_status(self):
2485 msg, text = self._msgobj('msg_43.txt')
2486 self._idempotent(msg, text, unixfrom=True)
2487
R. David Murray96fd54e2010-10-08 15:55:28 +00002488 def test_message_signed_idempotent(self):
2489 msg, text = self._msgobj('msg_45.txt')
2490 self._idempotent(msg, text)
2491
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002492 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002493 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002494 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002495 # Get a message object and reset the seek pointer for other tests
2496 msg, text = self._msgobj('msg_05.txt')
2497 eq(msg.get_content_type(), 'multipart/report')
2498 # Test the Content-Type: parameters
2499 params = {}
2500 for pk, pv in msg.get_params():
2501 params[pk] = pv
2502 eq(params['report-type'], 'delivery-status')
2503 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002504 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2505 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002506 eq(len(msg.get_payload()), 3)
2507 # Make sure the subparts are what we expect
2508 msg1 = msg.get_payload(0)
2509 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002510 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002511 msg2 = msg.get_payload(1)
2512 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002513 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002514 msg3 = msg.get_payload(2)
2515 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002516 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002517 payload = msg3.get_payload()
2518 unless(isinstance(payload, list))
2519 eq(len(payload), 1)
2520 msg4 = payload[0]
2521 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002522 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523
2524 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002525 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002526 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002527 msg, text = self._msgobj('msg_06.txt')
2528 # Check some of the outer headers
2529 eq(msg.get_content_type(), 'message/rfc822')
2530 # Make sure the payload is a list of exactly one sub-Message, and that
2531 # that submessage has a type of text/plain
2532 payload = msg.get_payload()
2533 unless(isinstance(payload, list))
2534 eq(len(payload), 1)
2535 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002536 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002537 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002538 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002539 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002540
2541
Ezio Melottib3aedd42010-11-20 19:04:17 +00002542
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002543# Test various other bits of the package's functionality
2544class TestMiscellaneous(TestEmailBase):
2545 def test_message_from_string(self):
2546 with openfile('msg_01.txt') as fp:
2547 text = fp.read()
2548 msg = email.message_from_string(text)
2549 s = StringIO()
2550 # Don't wrap/continue long headers since we're trying to test
2551 # idempotency.
2552 g = Generator(s, maxheaderlen=0)
2553 g.flatten(msg)
2554 self.assertEqual(text, s.getvalue())
2555
2556 def test_message_from_file(self):
2557 with openfile('msg_01.txt') as fp:
2558 text = fp.read()
2559 fp.seek(0)
2560 msg = email.message_from_file(fp)
2561 s = StringIO()
2562 # Don't wrap/continue long headers since we're trying to test
2563 # idempotency.
2564 g = Generator(s, maxheaderlen=0)
2565 g.flatten(msg)
2566 self.assertEqual(text, s.getvalue())
2567
2568 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002569 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002570 with openfile('msg_01.txt') as fp:
2571 text = fp.read()
2572
2573 # Create a subclass
2574 class MyMessage(Message):
2575 pass
2576
2577 msg = email.message_from_string(text, MyMessage)
2578 unless(isinstance(msg, MyMessage))
2579 # Try something more complicated
2580 with openfile('msg_02.txt') as fp:
2581 text = fp.read()
2582 msg = email.message_from_string(text, MyMessage)
2583 for subpart in msg.walk():
2584 unless(isinstance(subpart, MyMessage))
2585
2586 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002587 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002588 # Create a subclass
2589 class MyMessage(Message):
2590 pass
2591
2592 with openfile('msg_01.txt') as fp:
2593 msg = email.message_from_file(fp, MyMessage)
2594 unless(isinstance(msg, MyMessage))
2595 # Try something more complicated
2596 with openfile('msg_02.txt') as fp:
2597 msg = email.message_from_file(fp, MyMessage)
2598 for subpart in msg.walk():
2599 unless(isinstance(subpart, MyMessage))
2600
2601 def test__all__(self):
2602 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002603 self.assertEqual(sorted(module.__all__), [
2604 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2605 'generator', 'header', 'iterators', 'message',
2606 'message_from_binary_file', 'message_from_bytes',
2607 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002608 'quoprimime', 'utils',
2609 ])
2610
2611 def test_formatdate(self):
2612 now = time.time()
2613 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2614 time.gmtime(now)[:6])
2615
2616 def test_formatdate_localtime(self):
2617 now = time.time()
2618 self.assertEqual(
2619 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2620 time.localtime(now)[:6])
2621
2622 def test_formatdate_usegmt(self):
2623 now = time.time()
2624 self.assertEqual(
2625 utils.formatdate(now, localtime=False),
2626 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2627 self.assertEqual(
2628 utils.formatdate(now, localtime=False, usegmt=True),
2629 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2630
2631 def test_parsedate_none(self):
2632 self.assertEqual(utils.parsedate(''), None)
2633
2634 def test_parsedate_compact(self):
2635 # The FWS after the comma is optional
2636 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2637 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2638
2639 def test_parsedate_no_dayofweek(self):
2640 eq = self.assertEqual
2641 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2642 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2643
2644 def test_parsedate_compact_no_dayofweek(self):
2645 eq = self.assertEqual
2646 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2647 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2648
R. David Murray4a62e892010-12-23 20:35:46 +00002649 def test_parsedate_no_space_before_positive_offset(self):
2650 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2651 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2652
2653 def test_parsedate_no_space_before_negative_offset(self):
2654 # Issue 1155362: we already handled '+' for this case.
2655 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2656 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2657
2658
R David Murrayaccd1c02011-03-13 20:06:23 -04002659 def test_parsedate_accepts_time_with_dots(self):
2660 eq = self.assertEqual
2661 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2662 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2663 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2664 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2665
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002666 def test_parsedate_acceptable_to_time_functions(self):
2667 eq = self.assertEqual
2668 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2669 t = int(time.mktime(timetup))
2670 eq(time.localtime(t)[:6], timetup[:6])
2671 eq(int(time.strftime('%Y', timetup)), 2003)
2672 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2673 t = int(time.mktime(timetup[:9]))
2674 eq(time.localtime(t)[:6], timetup[:6])
2675 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2676
R. David Murray219d1c82010-08-25 00:45:55 +00002677 def test_parsedate_y2k(self):
2678 """Test for parsing a date with a two-digit year.
2679
2680 Parsing a date with a two-digit year should return the correct
2681 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2682 obsoletes RFC822) requires four-digit years.
2683
2684 """
2685 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2686 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2687 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2688 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2689
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002690 def test_parseaddr_empty(self):
2691 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2692 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2693
2694 def test_noquote_dump(self):
2695 self.assertEqual(
2696 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2697 'A Silly Person <person@dom.ain>')
2698
2699 def test_escape_dump(self):
2700 self.assertEqual(
2701 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002702 r'"A (Very) Silly Person" <person@dom.ain>')
2703 self.assertEqual(
2704 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2705 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002706 a = r'A \(Special\) Person'
2707 b = 'person@dom.ain'
2708 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2709
2710 def test_escape_backslashes(self):
2711 self.assertEqual(
2712 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2713 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2714 a = r'Arthur \Backslash\ Foobar'
2715 b = 'person@dom.ain'
2716 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2717
R David Murray8debacb2011-04-06 09:35:57 -04002718 def test_quotes_unicode_names(self):
2719 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2720 name = "H\u00e4ns W\u00fcrst"
2721 addr = 'person@dom.ain'
2722 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2723 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2724 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2725 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2726 latin1_quopri)
2727
2728 def test_accepts_any_charset_like_object(self):
2729 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2730 name = "H\u00e4ns W\u00fcrst"
2731 addr = 'person@dom.ain'
2732 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2733 foobar = "FOOBAR"
2734 class CharsetMock:
2735 def header_encode(self, string):
2736 return foobar
2737 mock = CharsetMock()
2738 mock_expected = "%s <%s>" % (foobar, addr)
2739 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2740 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2741 utf8_base64)
2742
2743 def test_invalid_charset_like_object_raises_error(self):
2744 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2745 name = "H\u00e4ns W\u00fcrst"
2746 addr = 'person@dom.ain'
2747 # A object without a header_encode method:
2748 bad_charset = object()
2749 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2750 bad_charset)
2751
2752 def test_unicode_address_raises_error(self):
2753 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2754 addr = 'pers\u00f6n@dom.in'
2755 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2756 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2757
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002758 def test_name_with_dot(self):
2759 x = 'John X. Doe <jxd@example.com>'
2760 y = '"John X. Doe" <jxd@example.com>'
2761 a, b = ('John X. Doe', 'jxd@example.com')
2762 self.assertEqual(utils.parseaddr(x), (a, b))
2763 self.assertEqual(utils.parseaddr(y), (a, b))
2764 # formataddr() quotes the name if there's a dot in it
2765 self.assertEqual(utils.formataddr((a, b)), y)
2766
R. David Murray5397e862010-10-02 15:58:26 +00002767 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2768 # issue 10005. Note that in the third test the second pair of
2769 # backslashes is not actually a quoted pair because it is not inside a
2770 # comment or quoted string: the address being parsed has a quoted
2771 # string containing a quoted backslash, followed by 'example' and two
2772 # backslashes, followed by another quoted string containing a space and
2773 # the word 'example'. parseaddr copies those two backslashes
2774 # literally. Per rfc5322 this is not technically correct since a \ may
2775 # not appear in an address outside of a quoted string. It is probably
2776 # a sensible Postel interpretation, though.
2777 eq = self.assertEqual
2778 eq(utils.parseaddr('""example" example"@example.com'),
2779 ('', '""example" example"@example.com'))
2780 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2781 ('', '"\\"example\\" example"@example.com'))
2782 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2783 ('', '"\\\\"example\\\\" example"@example.com'))
2784
R. David Murray63563cd2010-12-18 18:25:38 +00002785 def test_parseaddr_preserves_spaces_in_local_part(self):
2786 # issue 9286. A normal RFC5322 local part should not contain any
2787 # folding white space, but legacy local parts can (they are a sequence
2788 # of atoms, not dotatoms). On the other hand we strip whitespace from
2789 # before the @ and around dots, on the assumption that the whitespace
2790 # around the punctuation is a mistake in what would otherwise be
2791 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2792 self.assertEqual(('', "merwok wok@xample.com"),
2793 utils.parseaddr("merwok wok@xample.com"))
2794 self.assertEqual(('', "merwok wok@xample.com"),
2795 utils.parseaddr("merwok wok@xample.com"))
2796 self.assertEqual(('', "merwok wok@xample.com"),
2797 utils.parseaddr(" merwok wok @xample.com"))
2798 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2799 utils.parseaddr('merwok"wok" wok@xample.com'))
2800 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2801 utils.parseaddr('merwok. wok . wok@xample.com'))
2802
R David Murrayb53319f2012-03-14 15:31:47 -04002803 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2804 addr = ("'foo@example.com' (foo@example.com)",
2805 'foo@example.com')
2806 addrstr = ('"\'foo@example.com\' '
2807 '(foo@example.com)" <foo@example.com>')
2808 self.assertEqual(utils.parseaddr(addrstr), addr)
2809 self.assertEqual(utils.formataddr(addr), addrstr)
2810
2811
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002812 def test_multiline_from_comment(self):
2813 x = """\
2814Foo
2815\tBar <foo@example.com>"""
2816 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2817
2818 def test_quote_dump(self):
2819 self.assertEqual(
2820 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2821 r'"A Silly; Person" <person@dom.ain>')
2822
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002823 def test_charset_richcomparisons(self):
2824 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002825 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002826 cset1 = Charset()
2827 cset2 = Charset()
2828 eq(cset1, 'us-ascii')
2829 eq(cset1, 'US-ASCII')
2830 eq(cset1, 'Us-AsCiI')
2831 eq('us-ascii', cset1)
2832 eq('US-ASCII', cset1)
2833 eq('Us-AsCiI', cset1)
2834 ne(cset1, 'usascii')
2835 ne(cset1, 'USASCII')
2836 ne(cset1, 'UsAsCiI')
2837 ne('usascii', cset1)
2838 ne('USASCII', cset1)
2839 ne('UsAsCiI', cset1)
2840 eq(cset1, cset2)
2841 eq(cset2, cset1)
2842
2843 def test_getaddresses(self):
2844 eq = self.assertEqual
2845 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2846 'Bud Person <bperson@dom.ain>']),
2847 [('Al Person', 'aperson@dom.ain'),
2848 ('Bud Person', 'bperson@dom.ain')])
2849
2850 def test_getaddresses_nasty(self):
2851 eq = self.assertEqual
2852 eq(utils.getaddresses(['foo: ;']), [('', '')])
2853 eq(utils.getaddresses(
2854 ['[]*-- =~$']),
2855 [('', ''), ('', ''), ('', '*--')])
2856 eq(utils.getaddresses(
2857 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2858 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2859
2860 def test_getaddresses_embedded_comment(self):
2861 """Test proper handling of a nested comment"""
2862 eq = self.assertEqual
2863 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2864 eq(addrs[0][1], 'foo@bar.com')
2865
2866 def test_utils_quote_unquote(self):
2867 eq = self.assertEqual
2868 msg = Message()
2869 msg.add_header('content-disposition', 'attachment',
2870 filename='foo\\wacky"name')
2871 eq(msg.get_filename(), 'foo\\wacky"name')
2872
2873 def test_get_body_encoding_with_bogus_charset(self):
2874 charset = Charset('not a charset')
2875 self.assertEqual(charset.get_body_encoding(), 'base64')
2876
2877 def test_get_body_encoding_with_uppercase_charset(self):
2878 eq = self.assertEqual
2879 msg = Message()
2880 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2881 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2882 charsets = msg.get_charsets()
2883 eq(len(charsets), 1)
2884 eq(charsets[0], 'utf-8')
2885 charset = Charset(charsets[0])
2886 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002887 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002888 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2889 eq(msg.get_payload(decode=True), b'hello world')
2890 eq(msg['content-transfer-encoding'], 'base64')
2891 # Try another one
2892 msg = Message()
2893 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2894 charsets = msg.get_charsets()
2895 eq(len(charsets), 1)
2896 eq(charsets[0], 'us-ascii')
2897 charset = Charset(charsets[0])
2898 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2899 msg.set_payload('hello world', charset=charset)
2900 eq(msg.get_payload(), 'hello world')
2901 eq(msg['content-transfer-encoding'], '7bit')
2902
2903 def test_charsets_case_insensitive(self):
2904 lc = Charset('us-ascii')
2905 uc = Charset('US-ASCII')
2906 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2907
2908 def test_partial_falls_inside_message_delivery_status(self):
2909 eq = self.ndiffAssertEqual
2910 # The Parser interface provides chunks of data to FeedParser in 8192
2911 # byte gulps. SF bug #1076485 found one of those chunks inside
2912 # message/delivery-status header block, which triggered an
2913 # unreadline() of NeedMoreData.
2914 msg = self._msgobj('msg_43.txt')
2915 sfp = StringIO()
2916 iterators._structure(msg, sfp)
2917 eq(sfp.getvalue(), """\
2918multipart/report
2919 text/plain
2920 message/delivery-status
2921 text/plain
2922 text/plain
2923 text/plain
2924 text/plain
2925 text/plain
2926 text/plain
2927 text/plain
2928 text/plain
2929 text/plain
2930 text/plain
2931 text/plain
2932 text/plain
2933 text/plain
2934 text/plain
2935 text/plain
2936 text/plain
2937 text/plain
2938 text/plain
2939 text/plain
2940 text/plain
2941 text/plain
2942 text/plain
2943 text/plain
2944 text/plain
2945 text/plain
2946 text/plain
2947 text/rfc822-headers
2948""")
2949
R. David Murraya0b44b52010-12-02 21:47:19 +00002950 def test_make_msgid_domain(self):
2951 self.assertEqual(
2952 email.utils.make_msgid(domain='testdomain-string')[-19:],
2953 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002954
Ezio Melottib3aedd42010-11-20 19:04:17 +00002955
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002956# Test the iterator/generators
2957class TestIterators(TestEmailBase):
2958 def test_body_line_iterator(self):
2959 eq = self.assertEqual
2960 neq = self.ndiffAssertEqual
2961 # First a simple non-multipart message
2962 msg = self._msgobj('msg_01.txt')
2963 it = iterators.body_line_iterator(msg)
2964 lines = list(it)
2965 eq(len(lines), 6)
2966 neq(EMPTYSTRING.join(lines), msg.get_payload())
2967 # Now a more complicated multipart
2968 msg = self._msgobj('msg_02.txt')
2969 it = iterators.body_line_iterator(msg)
2970 lines = list(it)
2971 eq(len(lines), 43)
2972 with openfile('msg_19.txt') as fp:
2973 neq(EMPTYSTRING.join(lines), fp.read())
2974
2975 def test_typed_subpart_iterator(self):
2976 eq = self.assertEqual
2977 msg = self._msgobj('msg_04.txt')
2978 it = iterators.typed_subpart_iterator(msg, 'text')
2979 lines = []
2980 subparts = 0
2981 for subpart in it:
2982 subparts += 1
2983 lines.append(subpart.get_payload())
2984 eq(subparts, 2)
2985 eq(EMPTYSTRING.join(lines), """\
2986a simple kind of mirror
2987to reflect upon our own
2988a simple kind of mirror
2989to reflect upon our own
2990""")
2991
2992 def test_typed_subpart_iterator_default_type(self):
2993 eq = self.assertEqual
2994 msg = self._msgobj('msg_03.txt')
2995 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2996 lines = []
2997 subparts = 0
2998 for subpart in it:
2999 subparts += 1
3000 lines.append(subpart.get_payload())
3001 eq(subparts, 1)
3002 eq(EMPTYSTRING.join(lines), """\
3003
3004Hi,
3005
3006Do you like this message?
3007
3008-Me
3009""")
3010
R. David Murray45bf773f2010-07-17 01:19:57 +00003011 def test_pushCR_LF(self):
3012 '''FeedParser BufferedSubFile.push() assumed it received complete
3013 line endings. A CR ending one push() followed by a LF starting
3014 the next push() added an empty line.
3015 '''
3016 imt = [
3017 ("a\r \n", 2),
3018 ("b", 0),
3019 ("c\n", 1),
3020 ("", 0),
3021 ("d\r\n", 1),
3022 ("e\r", 0),
3023 ("\nf", 1),
3024 ("\r\n", 1),
3025 ]
3026 from email.feedparser import BufferedSubFile, NeedMoreData
3027 bsf = BufferedSubFile()
3028 om = []
3029 nt = 0
3030 for il, n in imt:
3031 bsf.push(il)
3032 nt += n
3033 n1 = 0
3034 while True:
3035 ol = bsf.readline()
3036 if ol == NeedMoreData:
3037 break
3038 om.append(ol)
3039 n1 += 1
3040 self.assertTrue(n == n1)
3041 self.assertTrue(len(om) == nt)
3042 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3043
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003044
Ezio Melottib3aedd42010-11-20 19:04:17 +00003045
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003046class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003048 def test_header_parser(self):
3049 eq = self.assertEqual
3050 # Parse only the headers of a complex multipart MIME document
3051 with openfile('msg_02.txt') as fp:
3052 msg = HeaderParser().parse(fp)
3053 eq(msg['from'], 'ppp-request@zzz.org')
3054 eq(msg['to'], 'ppp@zzz.org')
3055 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003056 self.assertFalse(msg.is_multipart())
3057 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003058
R David Murrayb35c8502011-04-13 16:46:05 -04003059 def test_bytes_header_parser(self):
3060 eq = self.assertEqual
3061 # Parse only the headers of a complex multipart MIME document
3062 with openfile('msg_02.txt', 'rb') as fp:
3063 msg = email.parser.BytesHeaderParser().parse(fp)
3064 eq(msg['from'], 'ppp-request@zzz.org')
3065 eq(msg['to'], 'ppp@zzz.org')
3066 eq(msg.get_content_type(), 'multipart/mixed')
3067 self.assertFalse(msg.is_multipart())
3068 self.assertTrue(isinstance(msg.get_payload(), str))
3069 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3070
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003071 def test_whitespace_continuation(self):
3072 eq = self.assertEqual
3073 # This message contains a line after the Subject: header that has only
3074 # whitespace, but it is not empty!
3075 msg = email.message_from_string("""\
3076From: aperson@dom.ain
3077To: bperson@dom.ain
3078Subject: the next line has a space on it
3079\x20
3080Date: Mon, 8 Apr 2002 15:09:19 -0400
3081Message-ID: spam
3082
3083Here's the message body
3084""")
3085 eq(msg['subject'], 'the next line has a space on it\n ')
3086 eq(msg['message-id'], 'spam')
3087 eq(msg.get_payload(), "Here's the message body\n")
3088
3089 def test_whitespace_continuation_last_header(self):
3090 eq = self.assertEqual
3091 # Like the previous test, but the subject line is the last
3092 # header.
3093 msg = email.message_from_string("""\
3094From: aperson@dom.ain
3095To: bperson@dom.ain
3096Date: Mon, 8 Apr 2002 15:09:19 -0400
3097Message-ID: spam
3098Subject: the next line has a space on it
3099\x20
3100
3101Here's the message body
3102""")
3103 eq(msg['subject'], 'the next line has a space on it\n ')
3104 eq(msg['message-id'], 'spam')
3105 eq(msg.get_payload(), "Here's the message body\n")
3106
3107 def test_crlf_separation(self):
3108 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003109 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003110 msg = Parser().parse(fp)
3111 eq(len(msg.get_payload()), 2)
3112 part1 = msg.get_payload(0)
3113 eq(part1.get_content_type(), 'text/plain')
3114 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3115 part2 = msg.get_payload(1)
3116 eq(part2.get_content_type(), 'application/riscos')
3117
R. David Murray8451c4b2010-10-23 22:19:56 +00003118 def test_crlf_flatten(self):
3119 # Using newline='\n' preserves the crlfs in this input file.
3120 with openfile('msg_26.txt', newline='\n') as fp:
3121 text = fp.read()
3122 msg = email.message_from_string(text)
3123 s = StringIO()
3124 g = Generator(s)
3125 g.flatten(msg, linesep='\r\n')
3126 self.assertEqual(s.getvalue(), text)
3127
R David Murray3edd22a2011-04-18 13:59:37 -04003128 def test_crlf_control_via_policy(self):
3129 with openfile('msg_26.txt', newline='\n') as fp:
3130 text = fp.read()
3131 msg = email.message_from_string(text)
3132 s = StringIO()
3133 g = email.generator.Generator(s, policy=email.policy.SMTP)
3134 g.flatten(msg)
3135 self.assertEqual(s.getvalue(), text)
3136
3137 def test_flatten_linesep_overrides_policy(self):
3138 # msg_27 is lf separated
3139 with openfile('msg_27.txt', newline='\n') as fp:
3140 text = fp.read()
3141 msg = email.message_from_string(text)
3142 s = StringIO()
3143 g = email.generator.Generator(s, policy=email.policy.SMTP)
3144 g.flatten(msg, linesep='\n')
3145 self.assertEqual(s.getvalue(), text)
3146
R. David Murray8451c4b2010-10-23 22:19:56 +00003147 maxDiff = None
3148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149 def test_multipart_digest_with_extra_mime_headers(self):
3150 eq = self.assertEqual
3151 neq = self.ndiffAssertEqual
3152 with openfile('msg_28.txt') as fp:
3153 msg = email.message_from_file(fp)
3154 # Structure is:
3155 # multipart/digest
3156 # message/rfc822
3157 # text/plain
3158 # message/rfc822
3159 # text/plain
3160 eq(msg.is_multipart(), 1)
3161 eq(len(msg.get_payload()), 2)
3162 part1 = msg.get_payload(0)
3163 eq(part1.get_content_type(), 'message/rfc822')
3164 eq(part1.is_multipart(), 1)
3165 eq(len(part1.get_payload()), 1)
3166 part1a = part1.get_payload(0)
3167 eq(part1a.is_multipart(), 0)
3168 eq(part1a.get_content_type(), 'text/plain')
3169 neq(part1a.get_payload(), 'message 1\n')
3170 # next message/rfc822
3171 part2 = msg.get_payload(1)
3172 eq(part2.get_content_type(), 'message/rfc822')
3173 eq(part2.is_multipart(), 1)
3174 eq(len(part2.get_payload()), 1)
3175 part2a = part2.get_payload(0)
3176 eq(part2a.is_multipart(), 0)
3177 eq(part2a.get_content_type(), 'text/plain')
3178 neq(part2a.get_payload(), 'message 2\n')
3179
3180 def test_three_lines(self):
3181 # A bug report by Andrew McNamara
3182 lines = ['From: Andrew Person <aperson@dom.ain',
3183 'Subject: Test',
3184 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3185 msg = email.message_from_string(NL.join(lines))
3186 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3187
3188 def test_strip_line_feed_and_carriage_return_in_headers(self):
3189 eq = self.assertEqual
3190 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3191 value1 = 'text'
3192 value2 = 'more text'
3193 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3194 value1, value2)
3195 msg = email.message_from_string(m)
3196 eq(msg.get('Header'), value1)
3197 eq(msg.get('Next-Header'), value2)
3198
3199 def test_rfc2822_header_syntax(self):
3200 eq = self.assertEqual
3201 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3202 msg = email.message_from_string(m)
3203 eq(len(msg), 3)
3204 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3205 eq(msg.get_payload(), 'body')
3206
3207 def test_rfc2822_space_not_allowed_in_header(self):
3208 eq = self.assertEqual
3209 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3210 msg = email.message_from_string(m)
3211 eq(len(msg.keys()), 0)
3212
3213 def test_rfc2822_one_character_header(self):
3214 eq = self.assertEqual
3215 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3216 msg = email.message_from_string(m)
3217 headers = msg.keys()
3218 headers.sort()
3219 eq(headers, ['A', 'B', 'CC'])
3220 eq(msg.get_payload(), 'body')
3221
R. David Murray45e0e142010-06-16 02:19:40 +00003222 def test_CRLFLF_at_end_of_part(self):
3223 # issue 5610: feedparser should not eat two chars from body part ending
3224 # with "\r\n\n".
3225 m = (
3226 "From: foo@bar.com\n"
3227 "To: baz\n"
3228 "Mime-Version: 1.0\n"
3229 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3230 "\n"
3231 "--BOUNDARY\n"
3232 "Content-Type: text/plain\n"
3233 "\n"
3234 "body ending with CRLF newline\r\n"
3235 "\n"
3236 "--BOUNDARY--\n"
3237 )
3238 msg = email.message_from_string(m)
3239 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003240
Ezio Melottib3aedd42010-11-20 19:04:17 +00003241
R. David Murray96fd54e2010-10-08 15:55:28 +00003242class Test8BitBytesHandling(unittest.TestCase):
3243 # In Python3 all input is string, but that doesn't work if the actual input
3244 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3245 # decode byte streams using the surrogateescape error handler, and
3246 # reconvert to binary at appropriate places if we detect surrogates. This
3247 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3248 # but it does allow us to parse and preserve them, and to decode body
3249 # parts that use an 8bit CTE.
3250
3251 bodytest_msg = textwrap.dedent("""\
3252 From: foo@bar.com
3253 To: baz
3254 Mime-Version: 1.0
3255 Content-Type: text/plain; charset={charset}
3256 Content-Transfer-Encoding: {cte}
3257
3258 {bodyline}
3259 """)
3260
3261 def test_known_8bit_CTE(self):
3262 m = self.bodytest_msg.format(charset='utf-8',
3263 cte='8bit',
3264 bodyline='pöstal').encode('utf-8')
3265 msg = email.message_from_bytes(m)
3266 self.assertEqual(msg.get_payload(), "pöstal\n")
3267 self.assertEqual(msg.get_payload(decode=True),
3268 "pöstal\n".encode('utf-8'))
3269
3270 def test_unknown_8bit_CTE(self):
3271 m = self.bodytest_msg.format(charset='notavalidcharset',
3272 cte='8bit',
3273 bodyline='pöstal').encode('utf-8')
3274 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003275 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003276 self.assertEqual(msg.get_payload(decode=True),
3277 "pöstal\n".encode('utf-8'))
3278
3279 def test_8bit_in_quopri_body(self):
3280 # This is non-RFC compliant data...without 'decode' the library code
3281 # decodes the body using the charset from the headers, and because the
3282 # source byte really is utf-8 this works. This is likely to fail
3283 # against real dirty data (ie: produce mojibake), but the data is
3284 # invalid anyway so it is as good a guess as any. But this means that
3285 # this test just confirms the current behavior; that behavior is not
3286 # necessarily the best possible behavior. With 'decode' it is
3287 # returning the raw bytes, so that test should be of correct behavior,
3288 # or at least produce the same result that email4 did.
3289 m = self.bodytest_msg.format(charset='utf-8',
3290 cte='quoted-printable',
3291 bodyline='p=C3=B6stál').encode('utf-8')
3292 msg = email.message_from_bytes(m)
3293 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3294 self.assertEqual(msg.get_payload(decode=True),
3295 'pöstál\n'.encode('utf-8'))
3296
3297 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3298 # This is similar to the previous test, but proves that if the 8bit
3299 # byte is undecodeable in the specified charset, it gets replaced
3300 # by the unicode 'unknown' character. Again, this may or may not
3301 # be the ideal behavior. Note that if decode=False none of the
3302 # decoders will get involved, so this is the only test we need
3303 # for this behavior.
3304 m = self.bodytest_msg.format(charset='ascii',
3305 cte='quoted-printable',
3306 bodyline='p=C3=B6stál').encode('utf-8')
3307 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003308 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003309 self.assertEqual(msg.get_payload(decode=True),
3310 'pöstál\n'.encode('utf-8'))
3311
3312 def test_8bit_in_base64_body(self):
3313 # Sticking an 8bit byte in a base64 block makes it undecodable by
3314 # normal means, so the block is returned undecoded, but as bytes.
3315 m = self.bodytest_msg.format(charset='utf-8',
3316 cte='base64',
3317 bodyline='cMO2c3RhbAá=').encode('utf-8')
3318 msg = email.message_from_bytes(m)
3319 self.assertEqual(msg.get_payload(decode=True),
3320 'cMO2c3RhbAá=\n'.encode('utf-8'))
3321
3322 def test_8bit_in_uuencode_body(self):
3323 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3324 # normal means, so the block is returned undecoded, but as bytes.
3325 m = self.bodytest_msg.format(charset='utf-8',
3326 cte='uuencode',
3327 bodyline='<,.V<W1A; á ').encode('utf-8')
3328 msg = email.message_from_bytes(m)
3329 self.assertEqual(msg.get_payload(decode=True),
3330 '<,.V<W1A; á \n'.encode('utf-8'))
3331
3332
R. David Murray92532142011-01-07 23:25:30 +00003333 headertest_headers = (
3334 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3335 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3336 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3337 '\tJean de Baddie',
3338 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3339 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3340 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3341 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3342 )
3343 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3344 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003345
3346 def test_get_8bit_header(self):
3347 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003348 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3349 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003350
3351 def test_print_8bit_headers(self):
3352 msg = email.message_from_bytes(self.headertest_msg)
3353 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003354 textwrap.dedent("""\
3355 From: {}
3356 To: {}
3357 Subject: {}
3358 From: {}
3359
3360 Yes, they are flying.
3361 """).format(*[expected[1] for (_, expected) in
3362 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003363
3364 def test_values_with_8bit_headers(self):
3365 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003366 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003367 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003368 'b\uFFFD\uFFFDz',
3369 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3370 'coll\uFFFD\uFFFDgue, le pouf '
3371 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003372 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003373 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003374
3375 def test_items_with_8bit_headers(self):
3376 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003377 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003378 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003379 ('To', 'b\uFFFD\uFFFDz'),
3380 ('Subject', 'Maintenant je vous '
3381 'pr\uFFFD\uFFFDsente '
3382 'mon coll\uFFFD\uFFFDgue, le pouf '
3383 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3384 '\tJean de Baddie'),
3385 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003386
3387 def test_get_all_with_8bit_headers(self):
3388 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003389 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003390 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003391 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003392
R David Murraya2150232011-03-16 21:11:23 -04003393 def test_get_content_type_with_8bit(self):
3394 msg = email.message_from_bytes(textwrap.dedent("""\
3395 Content-Type: text/pl\xA7in; charset=utf-8
3396 """).encode('latin-1'))
3397 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3398 self.assertEqual(msg.get_content_maintype(), "text")
3399 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3400
3401 def test_get_params_with_8bit(self):
3402 msg = email.message_from_bytes(
3403 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3404 self.assertEqual(msg.get_params(header='x-header'),
3405 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3406 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3407 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3408 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3409
3410 def test_get_rfc2231_params_with_8bit(self):
3411 msg = email.message_from_bytes(textwrap.dedent("""\
3412 Content-Type: text/plain; charset=us-ascii;
3413 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3414 ).encode('latin-1'))
3415 self.assertEqual(msg.get_param('title'),
3416 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3417
3418 def test_set_rfc2231_params_with_8bit(self):
3419 msg = email.message_from_bytes(textwrap.dedent("""\
3420 Content-Type: text/plain; charset=us-ascii;
3421 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3422 ).encode('latin-1'))
3423 msg.set_param('title', 'test')
3424 self.assertEqual(msg.get_param('title'), 'test')
3425
3426 def test_del_rfc2231_params_with_8bit(self):
3427 msg = email.message_from_bytes(textwrap.dedent("""\
3428 Content-Type: text/plain; charset=us-ascii;
3429 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3430 ).encode('latin-1'))
3431 msg.del_param('title')
3432 self.assertEqual(msg.get_param('title'), None)
3433 self.assertEqual(msg.get_content_maintype(), 'text')
3434
3435 def test_get_payload_with_8bit_cte_header(self):
3436 msg = email.message_from_bytes(textwrap.dedent("""\
3437 Content-Transfer-Encoding: b\xa7se64
3438 Content-Type: text/plain; charset=latin-1
3439
3440 payload
3441 """).encode('latin-1'))
3442 self.assertEqual(msg.get_payload(), 'payload\n')
3443 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3444
R. David Murray96fd54e2010-10-08 15:55:28 +00003445 non_latin_bin_msg = textwrap.dedent("""\
3446 From: foo@bar.com
3447 To: báz
3448 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3449 \tJean de Baddie
3450 Mime-Version: 1.0
3451 Content-Type: text/plain; charset="utf-8"
3452 Content-Transfer-Encoding: 8bit
3453
3454 Да, они летят.
3455 """).encode('utf-8')
3456
3457 def test_bytes_generator(self):
3458 msg = email.message_from_bytes(self.non_latin_bin_msg)
3459 out = BytesIO()
3460 email.generator.BytesGenerator(out).flatten(msg)
3461 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3462
R. David Murray7372a072011-01-26 21:21:32 +00003463 def test_bytes_generator_handles_None_body(self):
3464 #Issue 11019
3465 msg = email.message.Message()
3466 out = BytesIO()
3467 email.generator.BytesGenerator(out).flatten(msg)
3468 self.assertEqual(out.getvalue(), b"\n")
3469
R. David Murray92532142011-01-07 23:25:30 +00003470 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003471 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003472 To: =?unknown-8bit?q?b=C3=A1z?=
3473 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3474 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3475 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003476 Mime-Version: 1.0
3477 Content-Type: text/plain; charset="utf-8"
3478 Content-Transfer-Encoding: base64
3479
3480 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3481 """)
3482
3483 def test_generator_handles_8bit(self):
3484 msg = email.message_from_bytes(self.non_latin_bin_msg)
3485 out = StringIO()
3486 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003487 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003488
3489 def test_bytes_generator_with_unix_from(self):
3490 # The unixfrom contains a current date, so we can't check it
3491 # literally. Just make sure the first word is 'From' and the
3492 # rest of the message matches the input.
3493 msg = email.message_from_bytes(self.non_latin_bin_msg)
3494 out = BytesIO()
3495 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3496 lines = out.getvalue().split(b'\n')
3497 self.assertEqual(lines[0].split()[0], b'From')
3498 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3499
R. David Murray92532142011-01-07 23:25:30 +00003500 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3501 non_latin_bin_msg_as7bit[2:4] = [
3502 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3503 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3504 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3505
R. David Murray96fd54e2010-10-08 15:55:28 +00003506 def test_message_from_binary_file(self):
3507 fn = 'test.msg'
3508 self.addCleanup(unlink, fn)
3509 with open(fn, 'wb') as testfile:
3510 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003511 with open(fn, 'rb') as testfile:
3512 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003513 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3514
3515 latin_bin_msg = textwrap.dedent("""\
3516 From: foo@bar.com
3517 To: Dinsdale
3518 Subject: Nudge nudge, wink, wink
3519 Mime-Version: 1.0
3520 Content-Type: text/plain; charset="latin-1"
3521 Content-Transfer-Encoding: 8bit
3522
3523 oh là là, know what I mean, know what I mean?
3524 """).encode('latin-1')
3525
3526 latin_bin_msg_as7bit = textwrap.dedent("""\
3527 From: foo@bar.com
3528 To: Dinsdale
3529 Subject: Nudge nudge, wink, wink
3530 Mime-Version: 1.0
3531 Content-Type: text/plain; charset="iso-8859-1"
3532 Content-Transfer-Encoding: quoted-printable
3533
3534 oh l=E0 l=E0, know what I mean, know what I mean?
3535 """)
3536
3537 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3538 m = email.message_from_bytes(self.latin_bin_msg)
3539 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3540
3541 def test_decoded_generator_emits_unicode_body(self):
3542 m = email.message_from_bytes(self.latin_bin_msg)
3543 out = StringIO()
3544 email.generator.DecodedGenerator(out).flatten(m)
3545 #DecodedHeader output contains an extra blank line compared
3546 #to the input message. RDM: not sure if this is a bug or not,
3547 #but it is not specific to the 8bit->7bit conversion.
3548 self.assertEqual(out.getvalue(),
3549 self.latin_bin_msg.decode('latin-1')+'\n')
3550
3551 def test_bytes_feedparser(self):
3552 bfp = email.feedparser.BytesFeedParser()
3553 for i in range(0, len(self.latin_bin_msg), 10):
3554 bfp.feed(self.latin_bin_msg[i:i+10])
3555 m = bfp.close()
3556 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3557
R. David Murray8451c4b2010-10-23 22:19:56 +00003558 def test_crlf_flatten(self):
3559 with openfile('msg_26.txt', 'rb') as fp:
3560 text = fp.read()
3561 msg = email.message_from_bytes(text)
3562 s = BytesIO()
3563 g = email.generator.BytesGenerator(s)
3564 g.flatten(msg, linesep='\r\n')
3565 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003566
3567 def test_8bit_multipart(self):
3568 # Issue 11605
3569 source = textwrap.dedent("""\
3570 Date: Fri, 18 Mar 2011 17:15:43 +0100
3571 To: foo@example.com
3572 From: foodwatch-Newsletter <bar@example.com>
3573 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3574 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3575 MIME-Version: 1.0
3576 Content-Type: multipart/alternative;
3577 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3578
3579 --b1_76a486bee62b0d200f33dc2ca08220ad
3580 Content-Type: text/plain; charset="utf-8"
3581 Content-Transfer-Encoding: 8bit
3582
3583 Guten Tag, ,
3584
3585 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3586 Nachrichten aus Japan.
3587
3588
3589 --b1_76a486bee62b0d200f33dc2ca08220ad
3590 Content-Type: text/html; charset="utf-8"
3591 Content-Transfer-Encoding: 8bit
3592
3593 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3594 "http://www.w3.org/TR/html4/loose.dtd">
3595 <html lang="de">
3596 <head>
3597 <title>foodwatch - Newsletter</title>
3598 </head>
3599 <body>
3600 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3601 die Nachrichten aus Japan.</p>
3602 </body>
3603 </html>
3604 --b1_76a486bee62b0d200f33dc2ca08220ad--
3605
3606 """).encode('utf-8')
3607 msg = email.message_from_bytes(source)
3608 s = BytesIO()
3609 g = email.generator.BytesGenerator(s)
3610 g.flatten(msg)
3611 self.assertEqual(s.getvalue(), source)
3612
R David Murray9fd170e2012-03-14 14:05:03 -04003613 def test_bytes_generator_b_encoding_linesep(self):
3614 # Issue 14062: b encoding was tacking on an extra \n.
3615 m = Message()
3616 # This has enough non-ascii that it should always end up b encoded.
3617 m['Subject'] = Header('žluťoučký kůň')
3618 s = BytesIO()
3619 g = email.generator.BytesGenerator(s)
3620 g.flatten(m, linesep='\r\n')
3621 self.assertEqual(
3622 s.getvalue(),
3623 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3624
3625 def test_generator_b_encoding_linesep(self):
3626 # Since this broke in ByteGenerator, test Generator for completeness.
3627 m = Message()
3628 # This has enough non-ascii that it should always end up b encoded.
3629 m['Subject'] = Header('žluťoučký kůň')
3630 s = StringIO()
3631 g = email.generator.Generator(s)
3632 g.flatten(m, linesep='\r\n')
3633 self.assertEqual(
3634 s.getvalue(),
3635 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3636
R David Murray3edd22a2011-04-18 13:59:37 -04003637 def test_crlf_control_via_policy(self):
3638 # msg_26 is crlf terminated
3639 with openfile('msg_26.txt', 'rb') as fp:
3640 text = fp.read()
3641 msg = email.message_from_bytes(text)
3642 s = BytesIO()
3643 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3644 g.flatten(msg)
3645 self.assertEqual(s.getvalue(), text)
3646
3647 def test_flatten_linesep_overrides_policy(self):
3648 # msg_27 is lf separated
3649 with openfile('msg_27.txt', 'rb') as fp:
3650 text = fp.read()
3651 msg = email.message_from_bytes(text)
3652 s = BytesIO()
3653 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3654 g.flatten(msg, linesep='\n')
3655 self.assertEqual(s.getvalue(), text)
3656
3657 def test_must_be_7bit_handles_unknown_8bit(self):
3658 msg = email.message_from_bytes(self.non_latin_bin_msg)
3659 out = BytesIO()
3660 g = email.generator.BytesGenerator(out,
3661 policy=email.policy.default.clone(must_be_7bit=True))
3662 g.flatten(msg)
3663 self.assertEqual(out.getvalue(),
3664 self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
3665
3666 def test_must_be_7bit_transforms_8bit_cte(self):
3667 msg = email.message_from_bytes(self.latin_bin_msg)
3668 out = BytesIO()
3669 g = email.generator.BytesGenerator(out,
3670 policy=email.policy.default.clone(must_be_7bit=True))
3671 g.flatten(msg)
3672 self.assertEqual(out.getvalue(),
3673 self.latin_bin_msg_as7bit.encode('ascii'))
3674
R. David Murray8451c4b2010-10-23 22:19:56 +00003675 maxDiff = None
3676
Ezio Melottib3aedd42010-11-20 19:04:17 +00003677
R. David Murray719a4492010-11-21 16:53:48 +00003678class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003679
R. David Murraye5db2632010-11-20 15:10:13 +00003680 maxDiff = None
3681
R. David Murray96fd54e2010-10-08 15:55:28 +00003682 def _msgobj(self, filename):
3683 with openfile(filename, 'rb') as fp:
3684 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003685 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003686 msg = email.message_from_bytes(data)
3687 return msg, data
3688
R. David Murray719a4492010-11-21 16:53:48 +00003689 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003690 b = BytesIO()
3691 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003692 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003693 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003694
3695
R. David Murray719a4492010-11-21 16:53:48 +00003696class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3697 TestIdempotent):
3698 linesep = '\n'
3699 blinesep = b'\n'
3700 normalize_linesep_regex = re.compile(br'\r\n')
3701
3702
3703class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3704 TestIdempotent):
3705 linesep = '\r\n'
3706 blinesep = b'\r\n'
3707 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3708
Ezio Melottib3aedd42010-11-20 19:04:17 +00003709
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003710class TestBase64(unittest.TestCase):
3711 def test_len(self):
3712 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003713 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003714 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003715 for size in range(15):
3716 if size == 0 : bsize = 0
3717 elif size <= 3 : bsize = 4
3718 elif size <= 6 : bsize = 8
3719 elif size <= 9 : bsize = 12
3720 elif size <= 12: bsize = 16
3721 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723
3724 def test_decode(self):
3725 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003726 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003727 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003728
3729 def test_encode(self):
3730 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003731 eq(base64mime.body_encode(b''), b'')
3732 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003733 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003734 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003735 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003736 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3738eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3739eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3740eHh4eCB4eHh4IA==
3741""")
3742 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003743 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003744 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003745eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3746eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3747eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3748eHh4eCB4eHh4IA==\r
3749""")
3750
3751 def test_header_encode(self):
3752 eq = self.assertEqual
3753 he = base64mime.header_encode
3754 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003755 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3756 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757 # Test the charset option
3758 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3759 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003760
3761
Ezio Melottib3aedd42010-11-20 19:04:17 +00003762
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003763class TestQuopri(unittest.TestCase):
3764 def setUp(self):
3765 # Set of characters (as byte integers) that don't need to be encoded
3766 # in headers.
3767 self.hlit = list(chain(
3768 range(ord('a'), ord('z') + 1),
3769 range(ord('A'), ord('Z') + 1),
3770 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003771 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772 # Set of characters (as byte integers) that do need to be encoded in
3773 # headers.
3774 self.hnon = [c for c in range(256) if c not in self.hlit]
3775 assert len(self.hlit) + len(self.hnon) == 256
3776 # Set of characters (as byte integers) that don't need to be encoded
3777 # in bodies.
3778 self.blit = list(range(ord(' '), ord('~') + 1))
3779 self.blit.append(ord('\t'))
3780 self.blit.remove(ord('='))
3781 # Set of characters (as byte integers) that do need to be encoded in
3782 # bodies.
3783 self.bnon = [c for c in range(256) if c not in self.blit]
3784 assert len(self.blit) + len(self.bnon) == 256
3785
Guido van Rossum9604e662007-08-30 03:46:43 +00003786 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003787 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003788 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003789 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003790 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003791 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003792 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003793
Guido van Rossum9604e662007-08-30 03:46:43 +00003794 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003795 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003796 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003797 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003798 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003799 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003800 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003801
3802 def test_header_quopri_len(self):
3803 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003804 eq(quoprimime.header_length(b'hello'), 5)
3805 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003806 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003807 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003808 # =?xxx?q?...?= means 10 extra characters
3809 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003810 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3811 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003812 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003813 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814 # =?xxx?q?...?= means 10 extra characters
3815 10)
3816 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003817 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003818 'expected length 1 for %r' % chr(c))
3819 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003820 # Space is special; it's encoded to _
3821 if c == ord(' '):
3822 continue
3823 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003824 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003825 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003826
3827 def test_body_quopri_len(self):
3828 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003829 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003830 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003831 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003832 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003833
3834 def test_quote_unquote_idempotent(self):
3835 for x in range(256):
3836 c = chr(x)
3837 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3838
R David Murrayec1b5b82011-03-23 14:19:05 -04003839 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3840 if charset is None:
3841 encoded_header = quoprimime.header_encode(header)
3842 else:
3843 encoded_header = quoprimime.header_encode(header, charset)
3844 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003845
R David Murraycafd79d2011-03-23 15:25:55 -04003846 def test_header_encode_null(self):
3847 self._test_header_encode(b'', '')
3848
R David Murrayec1b5b82011-03-23 14:19:05 -04003849 def test_header_encode_one_word(self):
3850 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3851
3852 def test_header_encode_two_lines(self):
3853 self._test_header_encode(b'hello\nworld',
3854 '=?iso-8859-1?q?hello=0Aworld?=')
3855
3856 def test_header_encode_non_ascii(self):
3857 self._test_header_encode(b'hello\xc7there',
3858 '=?iso-8859-1?q?hello=C7there?=')
3859
3860 def test_header_encode_alt_charset(self):
3861 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3862 charset='iso-8859-2')
3863
3864 def _test_header_decode(self, encoded_header, expected_decoded_header):
3865 decoded_header = quoprimime.header_decode(encoded_header)
3866 self.assertEqual(decoded_header, expected_decoded_header)
3867
3868 def test_header_decode_null(self):
3869 self._test_header_decode('', '')
3870
3871 def test_header_decode_one_word(self):
3872 self._test_header_decode('hello', 'hello')
3873
3874 def test_header_decode_two_lines(self):
3875 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3876
3877 def test_header_decode_non_ascii(self):
3878 self._test_header_decode('hello=C7there', 'hello\xc7there')
3879
3880 def _test_decode(self, encoded, expected_decoded, eol=None):
3881 if eol is None:
3882 decoded = quoprimime.decode(encoded)
3883 else:
3884 decoded = quoprimime.decode(encoded, eol=eol)
3885 self.assertEqual(decoded, expected_decoded)
3886
3887 def test_decode_null_word(self):
3888 self._test_decode('', '')
3889
3890 def test_decode_null_line_null_word(self):
3891 self._test_decode('\r\n', '\n')
3892
3893 def test_decode_one_word(self):
3894 self._test_decode('hello', 'hello')
3895
3896 def test_decode_one_word_eol(self):
3897 self._test_decode('hello', 'hello', eol='X')
3898
3899 def test_decode_one_line(self):
3900 self._test_decode('hello\r\n', 'hello\n')
3901
3902 def test_decode_one_line_lf(self):
3903 self._test_decode('hello\n', 'hello\n')
3904
R David Murraycafd79d2011-03-23 15:25:55 -04003905 def test_decode_one_line_cr(self):
3906 self._test_decode('hello\r', 'hello\n')
3907
3908 def test_decode_one_line_nl(self):
3909 self._test_decode('hello\n', 'helloX', eol='X')
3910
3911 def test_decode_one_line_crnl(self):
3912 self._test_decode('hello\r\n', 'helloX', eol='X')
3913
R David Murrayec1b5b82011-03-23 14:19:05 -04003914 def test_decode_one_line_one_word(self):
3915 self._test_decode('hello\r\nworld', 'hello\nworld')
3916
3917 def test_decode_one_line_one_word_eol(self):
3918 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3919
3920 def test_decode_two_lines(self):
3921 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3922
R David Murraycafd79d2011-03-23 15:25:55 -04003923 def test_decode_two_lines_eol(self):
3924 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3925
R David Murrayec1b5b82011-03-23 14:19:05 -04003926 def test_decode_one_long_line(self):
3927 self._test_decode('Spam' * 250, 'Spam' * 250)
3928
3929 def test_decode_one_space(self):
3930 self._test_decode(' ', '')
3931
3932 def test_decode_multiple_spaces(self):
3933 self._test_decode(' ' * 5, '')
3934
3935 def test_decode_one_line_trailing_spaces(self):
3936 self._test_decode('hello \r\n', 'hello\n')
3937
3938 def test_decode_two_lines_trailing_spaces(self):
3939 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3940
3941 def test_decode_quoted_word(self):
3942 self._test_decode('=22quoted=20words=22', '"quoted words"')
3943
3944 def test_decode_uppercase_quoting(self):
3945 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3946
3947 def test_decode_lowercase_quoting(self):
3948 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3949
3950 def test_decode_soft_line_break(self):
3951 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3952
3953 def test_decode_false_quoting(self):
3954 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3955
3956 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3957 kwargs = {}
3958 if maxlinelen is None:
3959 # Use body_encode's default.
3960 maxlinelen = 76
3961 else:
3962 kwargs['maxlinelen'] = maxlinelen
3963 if eol is None:
3964 # Use body_encode's default.
3965 eol = '\n'
3966 else:
3967 kwargs['eol'] = eol
3968 encoded_body = quoprimime.body_encode(body, **kwargs)
3969 self.assertEqual(encoded_body, expected_encoded_body)
3970 if eol == '\n' or eol == '\r\n':
3971 # We know how to split the result back into lines, so maxlinelen
3972 # can be checked.
3973 for line in encoded_body.splitlines():
3974 self.assertLessEqual(len(line), maxlinelen)
3975
3976 def test_encode_null(self):
3977 self._test_encode('', '')
3978
3979 def test_encode_null_lines(self):
3980 self._test_encode('\n\n', '\n\n')
3981
3982 def test_encode_one_line(self):
3983 self._test_encode('hello\n', 'hello\n')
3984
3985 def test_encode_one_line_crlf(self):
3986 self._test_encode('hello\r\n', 'hello\n')
3987
3988 def test_encode_one_line_eol(self):
3989 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3990
3991 def test_encode_one_space(self):
3992 self._test_encode(' ', '=20')
3993
3994 def test_encode_one_line_one_space(self):
3995 self._test_encode(' \n', '=20\n')
3996
R David Murrayb938c8c2011-03-24 12:19:26 -04003997# XXX: body_encode() expect strings, but uses ord(char) from these strings
3998# to index into a 256-entry list. For code points above 255, this will fail.
3999# Should there be a check for 8-bit only ord() values in body, or at least
4000# a comment about the expected input?
4001
4002 def test_encode_two_lines_one_space(self):
4003 self._test_encode(' \n \n', '=20\n=20\n')
4004
R David Murrayec1b5b82011-03-23 14:19:05 -04004005 def test_encode_one_word_trailing_spaces(self):
4006 self._test_encode('hello ', 'hello =20')
4007
4008 def test_encode_one_line_trailing_spaces(self):
4009 self._test_encode('hello \n', 'hello =20\n')
4010
4011 def test_encode_one_word_trailing_tab(self):
4012 self._test_encode('hello \t', 'hello =09')
4013
4014 def test_encode_one_line_trailing_tab(self):
4015 self._test_encode('hello \t\n', 'hello =09\n')
4016
4017 def test_encode_trailing_space_before_maxlinelen(self):
4018 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4019
R David Murrayb938c8c2011-03-24 12:19:26 -04004020 def test_encode_trailing_space_at_maxlinelen(self):
4021 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4022
R David Murrayec1b5b82011-03-23 14:19:05 -04004023 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004024 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4025
4026 def test_encode_whitespace_lines(self):
4027 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004028
4029 def test_encode_quoted_equals(self):
4030 self._test_encode('a = b', 'a =3D b')
4031
4032 def test_encode_one_long_string(self):
4033 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4034
4035 def test_encode_one_long_line(self):
4036 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4037
4038 def test_encode_one_very_long_line(self):
4039 self._test_encode('x' * 200 + '\n',
4040 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4041
4042 def test_encode_one_long_line(self):
4043 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4044
4045 def test_encode_shortest_maxlinelen(self):
4046 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004047
R David Murrayb938c8c2011-03-24 12:19:26 -04004048 def test_encode_maxlinelen_too_small(self):
4049 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4050
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004051 def test_encode(self):
4052 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004053 eq(quoprimime.body_encode(''), '')
4054 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004055 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004056 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004057 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004058 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004059xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4060 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4061x xxxx xxxx xxxx xxxx=20""")
4062 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004063 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4064 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004065xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4066 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4067x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004068 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004069one line
4070
4071two line"""), """\
4072one line
4073
4074two line""")
4075
4076
Ezio Melottib3aedd42010-11-20 19:04:17 +00004077
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078# Test the Charset class
4079class TestCharset(unittest.TestCase):
4080 def tearDown(self):
4081 from email import charset as CharsetModule
4082 try:
4083 del CharsetModule.CHARSETS['fake']
4084 except KeyError:
4085 pass
4086
Guido van Rossum9604e662007-08-30 03:46:43 +00004087 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004088 eq = self.assertEqual
4089 # Make sure us-ascii = no Unicode conversion
4090 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004091 eq(c.header_encode('Hello World!'), 'Hello World!')
4092 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004093 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004094 self.assertRaises(UnicodeError, c.header_encode, s)
4095 c = Charset('utf-8')
4096 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097
4098 def test_body_encode(self):
4099 eq = self.assertEqual
4100 # Try a charset with QP body encoding
4101 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004102 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004103 # Try a charset with Base64 body encoding
4104 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004105 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004106 # Try a charset with None body encoding
4107 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004108 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004109 # Try the convert argument, where input codec != output codec
4110 c = Charset('euc-jp')
4111 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004112 # XXX FIXME
4113## try:
4114## eq('\x1b$B5FCO;~IW\x1b(B',
4115## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4116## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4117## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4118## except LookupError:
4119## # We probably don't have the Japanese codecs installed
4120## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004121 # Testing SF bug #625509, which we have to fake, since there are no
4122 # built-in encodings where the header encoding is QP but the body
4123 # encoding is not.
4124 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004125 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004126 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004127 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004128
4129 def test_unicode_charset_name(self):
4130 charset = Charset('us-ascii')
4131 self.assertEqual(str(charset), 'us-ascii')
4132 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4133
4134
Ezio Melottib3aedd42010-11-20 19:04:17 +00004135
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004136# Test multilingual MIME headers.
4137class TestHeader(TestEmailBase):
4138 def test_simple(self):
4139 eq = self.ndiffAssertEqual
4140 h = Header('Hello World!')
4141 eq(h.encode(), 'Hello World!')
4142 h.append(' Goodbye World!')
4143 eq(h.encode(), 'Hello World! Goodbye World!')
4144
4145 def test_simple_surprise(self):
4146 eq = self.ndiffAssertEqual
4147 h = Header('Hello World!')
4148 eq(h.encode(), 'Hello World!')
4149 h.append('Goodbye World!')
4150 eq(h.encode(), 'Hello World! Goodbye World!')
4151
4152 def test_header_needs_no_decoding(self):
4153 h = 'no decoding needed'
4154 self.assertEqual(decode_header(h), [(h, None)])
4155
4156 def test_long(self):
4157 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4158 maxlinelen=76)
4159 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004160 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004161
4162 def test_multilingual(self):
4163 eq = self.ndiffAssertEqual
4164 g = Charset("iso-8859-1")
4165 cz = Charset("iso-8859-2")
4166 utf8 = Charset("utf-8")
4167 g_head = (b'Die Mieter treten hier ein werden mit einem '
4168 b'Foerderband komfortabel den Korridor entlang, '
4169 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4170 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4171 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4172 b'd\xf9vtipu.. ')
4173 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4174 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4175 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4176 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4177 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4178 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4179 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4180 '\u3044\u307e\u3059\u3002')
4181 h = Header(g_head, g)
4182 h.append(cz_head, cz)
4183 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004184 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004185 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004186=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4187 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4188 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4189 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004190 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4191 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4192 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4193 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004194 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4195 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4196 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4197 decoded = decode_header(enc)
4198 eq(len(decoded), 3)
4199 eq(decoded[0], (g_head, 'iso-8859-1'))
4200 eq(decoded[1], (cz_head, 'iso-8859-2'))
4201 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004202 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004203 eq(ustr,
4204 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4205 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4206 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4207 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4208 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4209 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4210 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4211 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4212 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4213 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4214 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4215 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4216 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4217 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4218 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4219 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4220 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004221 # Test make_header()
4222 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004223 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004224
4225 def test_empty_header_encode(self):
4226 h = Header()
4227 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004228
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004229 def test_header_ctor_default_args(self):
4230 eq = self.ndiffAssertEqual
4231 h = Header()
4232 eq(h, '')
4233 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004234 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004235
4236 def test_explicit_maxlinelen(self):
4237 eq = self.ndiffAssertEqual
4238 hstr = ('A very long line that must get split to something other '
4239 'than at the 76th character boundary to test the non-default '
4240 'behavior')
4241 h = Header(hstr)
4242 eq(h.encode(), '''\
4243A very long line that must get split to something other than at the 76th
4244 character boundary to test the non-default behavior''')
4245 eq(str(h), hstr)
4246 h = Header(hstr, header_name='Subject')
4247 eq(h.encode(), '''\
4248A very long line that must get split to something other than at the
4249 76th character boundary to test the non-default behavior''')
4250 eq(str(h), hstr)
4251 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4252 eq(h.encode(), hstr)
4253 eq(str(h), hstr)
4254
Guido van Rossum9604e662007-08-30 03:46:43 +00004255 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004256 eq = self.ndiffAssertEqual
4257 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004258 x = 'xxxx ' * 20
4259 h.append(x)
4260 s = h.encode()
4261 eq(s, """\
4262=?iso-8859-1?q?xxx?=
4263 =?iso-8859-1?q?x_?=
4264 =?iso-8859-1?q?xx?=
4265 =?iso-8859-1?q?xx?=
4266 =?iso-8859-1?q?_x?=
4267 =?iso-8859-1?q?xx?=
4268 =?iso-8859-1?q?x_?=
4269 =?iso-8859-1?q?xx?=
4270 =?iso-8859-1?q?xx?=
4271 =?iso-8859-1?q?_x?=
4272 =?iso-8859-1?q?xx?=
4273 =?iso-8859-1?q?x_?=
4274 =?iso-8859-1?q?xx?=
4275 =?iso-8859-1?q?xx?=
4276 =?iso-8859-1?q?_x?=
4277 =?iso-8859-1?q?xx?=
4278 =?iso-8859-1?q?x_?=
4279 =?iso-8859-1?q?xx?=
4280 =?iso-8859-1?q?xx?=
4281 =?iso-8859-1?q?_x?=
4282 =?iso-8859-1?q?xx?=
4283 =?iso-8859-1?q?x_?=
4284 =?iso-8859-1?q?xx?=
4285 =?iso-8859-1?q?xx?=
4286 =?iso-8859-1?q?_x?=
4287 =?iso-8859-1?q?xx?=
4288 =?iso-8859-1?q?x_?=
4289 =?iso-8859-1?q?xx?=
4290 =?iso-8859-1?q?xx?=
4291 =?iso-8859-1?q?_x?=
4292 =?iso-8859-1?q?xx?=
4293 =?iso-8859-1?q?x_?=
4294 =?iso-8859-1?q?xx?=
4295 =?iso-8859-1?q?xx?=
4296 =?iso-8859-1?q?_x?=
4297 =?iso-8859-1?q?xx?=
4298 =?iso-8859-1?q?x_?=
4299 =?iso-8859-1?q?xx?=
4300 =?iso-8859-1?q?xx?=
4301 =?iso-8859-1?q?_x?=
4302 =?iso-8859-1?q?xx?=
4303 =?iso-8859-1?q?x_?=
4304 =?iso-8859-1?q?xx?=
4305 =?iso-8859-1?q?xx?=
4306 =?iso-8859-1?q?_x?=
4307 =?iso-8859-1?q?xx?=
4308 =?iso-8859-1?q?x_?=
4309 =?iso-8859-1?q?xx?=
4310 =?iso-8859-1?q?xx?=
4311 =?iso-8859-1?q?_?=""")
4312 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004313 h = Header(charset='iso-8859-1', maxlinelen=40)
4314 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004315 s = h.encode()
4316 eq(s, """\
4317=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4318 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4319 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4320 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4321 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4322 eq(x, str(make_header(decode_header(s))))
4323
4324 def test_base64_splittable(self):
4325 eq = self.ndiffAssertEqual
4326 h = Header(charset='koi8-r', maxlinelen=20)
4327 x = 'xxxx ' * 20
4328 h.append(x)
4329 s = h.encode()
4330 eq(s, """\
4331=?koi8-r?b?eHh4?=
4332 =?koi8-r?b?eCB4?=
4333 =?koi8-r?b?eHh4?=
4334 =?koi8-r?b?IHh4?=
4335 =?koi8-r?b?eHgg?=
4336 =?koi8-r?b?eHh4?=
4337 =?koi8-r?b?eCB4?=
4338 =?koi8-r?b?eHh4?=
4339 =?koi8-r?b?IHh4?=
4340 =?koi8-r?b?eHgg?=
4341 =?koi8-r?b?eHh4?=
4342 =?koi8-r?b?eCB4?=
4343 =?koi8-r?b?eHh4?=
4344 =?koi8-r?b?IHh4?=
4345 =?koi8-r?b?eHgg?=
4346 =?koi8-r?b?eHh4?=
4347 =?koi8-r?b?eCB4?=
4348 =?koi8-r?b?eHh4?=
4349 =?koi8-r?b?IHh4?=
4350 =?koi8-r?b?eHgg?=
4351 =?koi8-r?b?eHh4?=
4352 =?koi8-r?b?eCB4?=
4353 =?koi8-r?b?eHh4?=
4354 =?koi8-r?b?IHh4?=
4355 =?koi8-r?b?eHgg?=
4356 =?koi8-r?b?eHh4?=
4357 =?koi8-r?b?eCB4?=
4358 =?koi8-r?b?eHh4?=
4359 =?koi8-r?b?IHh4?=
4360 =?koi8-r?b?eHgg?=
4361 =?koi8-r?b?eHh4?=
4362 =?koi8-r?b?eCB4?=
4363 =?koi8-r?b?eHh4?=
4364 =?koi8-r?b?IA==?=""")
4365 eq(x, str(make_header(decode_header(s))))
4366 h = Header(charset='koi8-r', maxlinelen=40)
4367 h.append(x)
4368 s = h.encode()
4369 eq(s, """\
4370=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4371 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4372 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4373 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4374 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4375 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4376 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377
4378 def test_us_ascii_header(self):
4379 eq = self.assertEqual
4380 s = 'hello'
4381 x = decode_header(s)
4382 eq(x, [('hello', None)])
4383 h = make_header(x)
4384 eq(s, h.encode())
4385
4386 def test_string_charset(self):
4387 eq = self.assertEqual
4388 h = Header()
4389 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004390 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004391
4392## def test_unicode_error(self):
4393## raises = self.assertRaises
4394## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4395## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4396## h = Header()
4397## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4398## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4399## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4400
4401 def test_utf8_shortest(self):
4402 eq = self.assertEqual
4403 h = Header('p\xf6stal', 'utf-8')
4404 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4405 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4406 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4407
4408 def test_bad_8bit_header(self):
4409 raises = self.assertRaises
4410 eq = self.assertEqual
4411 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4412 raises(UnicodeError, Header, x)
4413 h = Header()
4414 raises(UnicodeError, h.append, x)
4415 e = x.decode('utf-8', 'replace')
4416 eq(str(Header(x, errors='replace')), e)
4417 h.append(x, errors='replace')
4418 eq(str(h), e)
4419
R David Murray041015c2011-03-25 15:10:55 -04004420 def test_escaped_8bit_header(self):
4421 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004422 e = x.decode('ascii', 'surrogateescape')
4423 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004424 self.assertEqual(str(h),
4425 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4426 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4427
R David Murraye5e366c2011-06-18 12:57:28 -04004428 def test_header_handles_binary_unknown8bit(self):
4429 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4430 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4431 self.assertEqual(str(h),
4432 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4433 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4434
4435 def test_make_header_handles_binary_unknown8bit(self):
4436 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4437 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4438 h2 = email.header.make_header(email.header.decode_header(h))
4439 self.assertEqual(str(h2),
4440 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4441 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4442
R David Murray041015c2011-03-25 15:10:55 -04004443 def test_modify_returned_list_does_not_change_header(self):
4444 h = Header('test')
4445 chunks = email.header.decode_header(h)
4446 chunks.append(('ascii', 'test2'))
4447 self.assertEqual(str(h), 'test')
4448
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004449 def test_encoded_adjacent_nonencoded(self):
4450 eq = self.assertEqual
4451 h = Header()
4452 h.append('hello', 'iso-8859-1')
4453 h.append('world')
4454 s = h.encode()
4455 eq(s, '=?iso-8859-1?q?hello?= world')
4456 h = make_header(decode_header(s))
4457 eq(h.encode(), s)
4458
4459 def test_whitespace_eater(self):
4460 eq = self.assertEqual
4461 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4462 parts = decode_header(s)
4463 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4464 hdr = make_header(parts)
4465 eq(hdr.encode(),
4466 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4467
4468 def test_broken_base64_header(self):
4469 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004470 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004471 raises(errors.HeaderParseError, decode_header, s)
4472
R. David Murray477efb32011-01-05 01:39:32 +00004473 def test_shift_jis_charset(self):
4474 h = Header('文', charset='shift_jis')
4475 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4476
R David Murrayde912762011-03-16 18:26:23 -04004477 def test_flatten_header_with_no_value(self):
4478 # Issue 11401 (regression from email 4.x) Note that the space after
4479 # the header doesn't reflect the input, but this is also the way
4480 # email 4.x behaved. At some point it would be nice to fix that.
4481 msg = email.message_from_string("EmptyHeader:")
4482 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4483
R David Murray01581ee2011-04-18 10:04:34 -04004484 def test_encode_preserves_leading_ws_on_value(self):
4485 msg = Message()
4486 msg['SomeHeader'] = ' value with leading ws'
4487 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4488
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004489
Ezio Melottib3aedd42010-11-20 19:04:17 +00004490
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004491# Test RFC 2231 header parameters (en/de)coding
4492class TestRFC2231(TestEmailBase):
4493 def test_get_param(self):
4494 eq = self.assertEqual
4495 msg = self._msgobj('msg_29.txt')
4496 eq(msg.get_param('title'),
4497 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4498 eq(msg.get_param('title', unquote=False),
4499 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4500
4501 def test_set_param(self):
4502 eq = self.ndiffAssertEqual
4503 msg = Message()
4504 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4505 charset='us-ascii')
4506 eq(msg.get_param('title'),
4507 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4508 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4509 charset='us-ascii', language='en')
4510 eq(msg.get_param('title'),
4511 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4512 msg = self._msgobj('msg_01.txt')
4513 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4514 charset='us-ascii', language='en')
4515 eq(msg.as_string(maxheaderlen=78), """\
4516Return-Path: <bbb@zzz.org>
4517Delivered-To: bbb@zzz.org
4518Received: by mail.zzz.org (Postfix, from userid 889)
4519\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4520MIME-Version: 1.0
4521Content-Transfer-Encoding: 7bit
4522Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4523From: bbb@ddd.com (John X. Doe)
4524To: bbb@zzz.org
4525Subject: This is a test message
4526Date: Fri, 4 May 2001 14:05:44 -0400
4527Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004528 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004529
4530
4531Hi,
4532
4533Do you like this message?
4534
4535-Me
4536""")
4537
R David Murraya2860e82011-04-16 09:20:30 -04004538 def test_set_param_requote(self):
4539 msg = Message()
4540 msg.set_param('title', 'foo')
4541 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4542 msg.set_param('title', 'bar', requote=False)
4543 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4544 # tspecial is still quoted.
4545 msg.set_param('title', "(bar)bell", requote=False)
4546 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4547
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004548 def test_del_param(self):
4549 eq = self.ndiffAssertEqual
4550 msg = self._msgobj('msg_01.txt')
4551 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4552 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4553 charset='us-ascii', language='en')
4554 msg.del_param('foo', header='Content-Type')
4555 eq(msg.as_string(maxheaderlen=78), """\
4556Return-Path: <bbb@zzz.org>
4557Delivered-To: bbb@zzz.org
4558Received: by mail.zzz.org (Postfix, from userid 889)
4559\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4560MIME-Version: 1.0
4561Content-Transfer-Encoding: 7bit
4562Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4563From: bbb@ddd.com (John X. Doe)
4564To: bbb@zzz.org
4565Subject: This is a test message
4566Date: Fri, 4 May 2001 14:05:44 -0400
4567Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004568 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004569
4570
4571Hi,
4572
4573Do you like this message?
4574
4575-Me
4576""")
4577
4578 def test_rfc2231_get_content_charset(self):
4579 eq = self.assertEqual
4580 msg = self._msgobj('msg_32.txt')
4581 eq(msg.get_content_charset(), 'us-ascii')
4582
R. David Murraydfd7eb02010-12-24 22:36:49 +00004583 def test_rfc2231_parse_rfc_quoting(self):
4584 m = textwrap.dedent('''\
4585 Content-Disposition: inline;
4586 \tfilename*0*=''This%20is%20even%20more%20;
4587 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4588 \tfilename*2="is it not.pdf"
4589
4590 ''')
4591 msg = email.message_from_string(m)
4592 self.assertEqual(msg.get_filename(),
4593 'This is even more ***fun*** is it not.pdf')
4594 self.assertEqual(m, msg.as_string())
4595
4596 def test_rfc2231_parse_extra_quoting(self):
4597 m = textwrap.dedent('''\
4598 Content-Disposition: inline;
4599 \tfilename*0*="''This%20is%20even%20more%20";
4600 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4601 \tfilename*2="is it not.pdf"
4602
4603 ''')
4604 msg = email.message_from_string(m)
4605 self.assertEqual(msg.get_filename(),
4606 'This is even more ***fun*** is it not.pdf')
4607 self.assertEqual(m, msg.as_string())
4608
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004609 def test_rfc2231_no_language_or_charset(self):
4610 m = '''\
4611Content-Transfer-Encoding: 8bit
4612Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4613Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4614
4615'''
4616 msg = email.message_from_string(m)
4617 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004618 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004619 self.assertEqual(
4620 param,
4621 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4622
4623 def test_rfc2231_no_language_or_charset_in_filename(self):
4624 m = '''\
4625Content-Disposition: inline;
4626\tfilename*0*="''This%20is%20even%20more%20";
4627\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4628\tfilename*2="is it not.pdf"
4629
4630'''
4631 msg = email.message_from_string(m)
4632 self.assertEqual(msg.get_filename(),
4633 'This is even more ***fun*** is it not.pdf')
4634
4635 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4636 m = '''\
4637Content-Disposition: inline;
4638\tfilename*0*="''This%20is%20even%20more%20";
4639\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4640\tfilename*2="is it not.pdf"
4641
4642'''
4643 msg = email.message_from_string(m)
4644 self.assertEqual(msg.get_filename(),
4645 'This is even more ***fun*** is it not.pdf')
4646
4647 def test_rfc2231_partly_encoded(self):
4648 m = '''\
4649Content-Disposition: inline;
4650\tfilename*0="''This%20is%20even%20more%20";
4651\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4652\tfilename*2="is it not.pdf"
4653
4654'''
4655 msg = email.message_from_string(m)
4656 self.assertEqual(
4657 msg.get_filename(),
4658 'This%20is%20even%20more%20***fun*** is it not.pdf')
4659
4660 def test_rfc2231_partly_nonencoded(self):
4661 m = '''\
4662Content-Disposition: inline;
4663\tfilename*0="This%20is%20even%20more%20";
4664\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4665\tfilename*2="is it not.pdf"
4666
4667'''
4668 msg = email.message_from_string(m)
4669 self.assertEqual(
4670 msg.get_filename(),
4671 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4672
4673 def test_rfc2231_no_language_or_charset_in_boundary(self):
4674 m = '''\
4675Content-Type: multipart/alternative;
4676\tboundary*0*="''This%20is%20even%20more%20";
4677\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4678\tboundary*2="is it not.pdf"
4679
4680'''
4681 msg = email.message_from_string(m)
4682 self.assertEqual(msg.get_boundary(),
4683 'This is even more ***fun*** is it not.pdf')
4684
4685 def test_rfc2231_no_language_or_charset_in_charset(self):
4686 # This is a nonsensical charset value, but tests the code anyway
4687 m = '''\
4688Content-Type: text/plain;
4689\tcharset*0*="This%20is%20even%20more%20";
4690\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4691\tcharset*2="is it not.pdf"
4692
4693'''
4694 msg = email.message_from_string(m)
4695 self.assertEqual(msg.get_content_charset(),
4696 'this is even more ***fun*** is it not.pdf')
4697
4698 def test_rfc2231_bad_encoding_in_filename(self):
4699 m = '''\
4700Content-Disposition: inline;
4701\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4702\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4703\tfilename*2="is it not.pdf"
4704
4705'''
4706 msg = email.message_from_string(m)
4707 self.assertEqual(msg.get_filename(),
4708 'This is even more ***fun*** is it not.pdf')
4709
4710 def test_rfc2231_bad_encoding_in_charset(self):
4711 m = """\
4712Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4713
4714"""
4715 msg = email.message_from_string(m)
4716 # This should return None because non-ascii characters in the charset
4717 # are not allowed.
4718 self.assertEqual(msg.get_content_charset(), None)
4719
4720 def test_rfc2231_bad_character_in_charset(self):
4721 m = """\
4722Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4723
4724"""
4725 msg = email.message_from_string(m)
4726 # This should return None because non-ascii characters in the charset
4727 # are not allowed.
4728 self.assertEqual(msg.get_content_charset(), None)
4729
4730 def test_rfc2231_bad_character_in_filename(self):
4731 m = '''\
4732Content-Disposition: inline;
4733\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4734\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4735\tfilename*2*="is it not.pdf%E2"
4736
4737'''
4738 msg = email.message_from_string(m)
4739 self.assertEqual(msg.get_filename(),
4740 'This is even more ***fun*** is it not.pdf\ufffd')
4741
4742 def test_rfc2231_unknown_encoding(self):
4743 m = """\
4744Content-Transfer-Encoding: 8bit
4745Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4746
4747"""
4748 msg = email.message_from_string(m)
4749 self.assertEqual(msg.get_filename(), 'myfile.txt')
4750
4751 def test_rfc2231_single_tick_in_filename_extended(self):
4752 eq = self.assertEqual
4753 m = """\
4754Content-Type: application/x-foo;
4755\tname*0*=\"Frank's\"; name*1*=\" Document\"
4756
4757"""
4758 msg = email.message_from_string(m)
4759 charset, language, s = msg.get_param('name')
4760 eq(charset, None)
4761 eq(language, None)
4762 eq(s, "Frank's Document")
4763
4764 def test_rfc2231_single_tick_in_filename(self):
4765 m = """\
4766Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4767
4768"""
4769 msg = email.message_from_string(m)
4770 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004771 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004772 self.assertEqual(param, "Frank's Document")
4773
4774 def test_rfc2231_tick_attack_extended(self):
4775 eq = self.assertEqual
4776 m = """\
4777Content-Type: application/x-foo;
4778\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4779
4780"""
4781 msg = email.message_from_string(m)
4782 charset, language, s = msg.get_param('name')
4783 eq(charset, 'us-ascii')
4784 eq(language, 'en-us')
4785 eq(s, "Frank's Document")
4786
4787 def test_rfc2231_tick_attack(self):
4788 m = """\
4789Content-Type: application/x-foo;
4790\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4791
4792"""
4793 msg = email.message_from_string(m)
4794 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004795 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004796 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4797
4798 def test_rfc2231_no_extended_values(self):
4799 eq = self.assertEqual
4800 m = """\
4801Content-Type: application/x-foo; name=\"Frank's Document\"
4802
4803"""
4804 msg = email.message_from_string(m)
4805 eq(msg.get_param('name'), "Frank's Document")
4806
4807 def test_rfc2231_encoded_then_unencoded_segments(self):
4808 eq = self.assertEqual
4809 m = """\
4810Content-Type: application/x-foo;
4811\tname*0*=\"us-ascii'en-us'My\";
4812\tname*1=\" Document\";
4813\tname*2*=\" For You\"
4814
4815"""
4816 msg = email.message_from_string(m)
4817 charset, language, s = msg.get_param('name')
4818 eq(charset, 'us-ascii')
4819 eq(language, 'en-us')
4820 eq(s, 'My Document For You')
4821
4822 def test_rfc2231_unencoded_then_encoded_segments(self):
4823 eq = self.assertEqual
4824 m = """\
4825Content-Type: application/x-foo;
4826\tname*0=\"us-ascii'en-us'My\";
4827\tname*1*=\" Document\";
4828\tname*2*=\" For You\"
4829
4830"""
4831 msg = email.message_from_string(m)
4832 charset, language, s = msg.get_param('name')
4833 eq(charset, 'us-ascii')
4834 eq(language, 'en-us')
4835 eq(s, 'My Document For You')
4836
4837
Ezio Melottib3aedd42010-11-20 19:04:17 +00004838
R. David Murraya8f480f2010-01-16 18:30:03 +00004839# Tests to ensure that signed parts of an email are completely preserved, as
4840# required by RFC1847 section 2.1. Note that these are incomplete, because the
4841# email package does not currently always preserve the body. See issue 1670765.
4842class TestSigned(TestEmailBase):
4843
4844 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004845 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004846 original = fp.read()
4847 msg = email.message_from_string(original)
4848 return original, msg
4849
4850 def _signed_parts_eq(self, original, result):
4851 # Extract the first mime part of each message
4852 import re
4853 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4854 inpart = repart.search(original).group(2)
4855 outpart = repart.search(result).group(2)
4856 self.assertEqual(outpart, inpart)
4857
4858 def test_long_headers_as_string(self):
4859 original, msg = self._msg_and_obj('msg_45.txt')
4860 result = msg.as_string()
4861 self._signed_parts_eq(original, result)
4862
4863 def test_long_headers_as_string_maxheaderlen(self):
4864 original, msg = self._msg_and_obj('msg_45.txt')
4865 result = msg.as_string(maxheaderlen=60)
4866 self._signed_parts_eq(original, result)
4867
4868 def test_long_headers_flatten(self):
4869 original, msg = self._msg_and_obj('msg_45.txt')
4870 fp = StringIO()
4871 Generator(fp).flatten(msg)
4872 result = fp.getvalue()
4873 self._signed_parts_eq(original, result)
4874
4875
Ezio Melottib3aedd42010-11-20 19:04:17 +00004876
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004877if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004878 unittest.main()