blob: b07f67593cd87d13c632b4ffd515965411f3907c [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
R David Murraya2860e82011-04-16 09:20:30 -0400224 def test_get_payload_n_raises_on_non_multipart(self):
225 msg = Message()
226 self.assertRaises(TypeError, msg.get_payload, 1)
227
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
R David Murraya2860e82011-04-16 09:20:30 -0400383 def test_del_param_on_nonexistent_header(self):
384 msg = Message()
385 msg.del_param('filename', 'content-disposition')
386
387 def test_del_nonexistent_param(self):
388 msg = Message()
389 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
390 existing_header = msg['Content-Type']
391 msg.del_param('foobar', header='Content-Type')
392 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
393
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R David Murraya2860e82011-04-16 09:20:30 -0400524 def test_broken_unicode_payload(self):
525 # This test improves coverage but is not a compliance test.
526 # The behavior in this situation is currently undefined by the API.
527 x = 'this is a br\xf6ken thing to do'
528 msg = Message()
529 msg['content-type'] = 'text/plain'
530 msg['content-transfer-encoding'] = '8bit'
531 msg.set_payload(x)
532 self.assertEqual(msg.get_payload(decode=True),
533 bytes(x, 'raw-unicode-escape'))
534
535 def test_questionable_bytes_payload(self):
536 # This test improves coverage but is not a compliance test,
537 # since it involves poking inside the black box.
538 x = 'this is a quéstionable thing to do'.encode('utf-8')
539 msg = Message()
540 msg['content-type'] = 'text/plain; charset="utf-8"'
541 msg['content-transfer-encoding'] = '8bit'
542 msg._payload = x
543 self.assertEqual(msg.get_payload(decode=True), x)
544
R. David Murray7ec754b2010-12-13 23:51:19 +0000545 # Issue 1078919
546 def test_ascii_add_header(self):
547 msg = Message()
548 msg.add_header('Content-Disposition', 'attachment',
549 filename='bud.gif')
550 self.assertEqual('attachment; filename="bud.gif"',
551 msg['Content-Disposition'])
552
553 def test_noascii_add_header(self):
554 msg = Message()
555 msg.add_header('Content-Disposition', 'attachment',
556 filename="Fußballer.ppt")
557 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000558 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000559 msg['Content-Disposition'])
560
561 def test_nonascii_add_header_via_triple(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename=('iso-8859-1', '', 'Fußballer.ppt'))
565 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000566 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
567 msg['Content-Disposition'])
568
569 def test_ascii_add_header_with_tspecial(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="windows [filename].ppt")
573 self.assertEqual(
574 'attachment; filename="windows [filename].ppt"',
575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_with_tspecial(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename="Fußballer [filename].ppt")
581 self.assertEqual(
582 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584
R David Murraya2860e82011-04-16 09:20:30 -0400585 def test_add_header_with_name_only_param(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
588 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
589
590 def test_add_header_with_no_value(self):
591 msg = Message()
592 msg.add_header('X-Status', None)
593 self.assertEqual('', msg['X-Status'])
594
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000595 # Issue 5871: reject an attempt to embed a header inside a header value
596 # (header injection attack).
597 def test_embeded_header_via_Header_rejected(self):
598 msg = Message()
599 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
600 self.assertRaises(errors.HeaderParseError, msg.as_string)
601
602 def test_embeded_header_via_string_rejected(self):
603 msg = Message()
604 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
R David Murray7441a7a2012-03-14 02:59:51 -0400607 def test_unicode_header_defaults_to_utf8_encoding(self):
608 # Issue 14291
609 m = MIMEText('abc\n')
610 m['Subject'] = 'É test'
611 self.assertEqual(str(m),textwrap.dedent("""\
612 Content-Type: text/plain; charset="us-ascii"
613 MIME-Version: 1.0
614 Content-Transfer-Encoding: 7bit
615 Subject: =?utf-8?q?=C3=89_test?=
616
617 abc
618 """))
619
R David Murray8680bcc2012-03-22 22:17:51 -0400620 def test_unicode_body_defaults_to_utf8_encoding(self):
621 # Issue 14291
622 m = MIMEText('É testabc\n')
623 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400624 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400625 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400626 Content-Transfer-Encoding: base64
627
628 w4kgdGVzdGFiYwo=
629 """))
630
631
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000632# Test the email.encoders module
633class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400634
635 def test_EncodersEncode_base64(self):
636 with openfile('PyBanner048.gif', 'rb') as fp:
637 bindata = fp.read()
638 mimed = email.mime.image.MIMEImage(bindata)
639 base64ed = mimed.get_payload()
640 # the transfer-encoded body lines should all be <=76 characters
641 lines = base64ed.split('\n')
642 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
643
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000644 def test_encode_empty_payload(self):
645 eq = self.assertEqual
646 msg = Message()
647 msg.set_charset('us-ascii')
648 eq(msg['content-transfer-encoding'], '7bit')
649
650 def test_default_cte(self):
651 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000652 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000653 msg = MIMEText('hello world')
654 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000655 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400657 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000658 # And now with a different charset
659 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
660 eq(msg['content-transfer-encoding'], 'quoted-printable')
661
R. David Murraye85200d2010-05-06 01:41:14 +0000662 def test_encode7or8bit(self):
663 # Make sure a charset whose input character set is 8bit but
664 # whose output character set is 7bit gets a transfer-encoding
665 # of 7bit.
666 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000667 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000668 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000669
Ezio Melottib3aedd42010-11-20 19:04:17 +0000670
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000671# Test long header wrapping
672class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400673
674 maxDiff = None
675
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000676 def test_split_long_continuation(self):
677 eq = self.ndiffAssertEqual
678 msg = email.message_from_string("""\
679Subject: bug demonstration
680\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
681\tmore text
682
683test
684""")
685 sfp = StringIO()
686 g = Generator(sfp)
687 g.flatten(msg)
688 eq(sfp.getvalue(), """\
689Subject: bug demonstration
690\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
691\tmore text
692
693test
694""")
695
696 def test_another_long_almost_unsplittable_header(self):
697 eq = self.ndiffAssertEqual
698 hstr = """\
699bug demonstration
700\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
701\tmore text"""
702 h = Header(hstr, continuation_ws='\t')
703 eq(h.encode(), """\
704bug demonstration
705\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
706\tmore text""")
707 h = Header(hstr.replace('\t', ' '))
708 eq(h.encode(), """\
709bug demonstration
710 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711 more text""")
712
713 def test_long_nonstring(self):
714 eq = self.ndiffAssertEqual
715 g = Charset("iso-8859-1")
716 cz = Charset("iso-8859-2")
717 utf8 = Charset("utf-8")
718 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
719 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
720 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
721 b'bef\xf6rdert. ')
722 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
723 b'd\xf9vtipu.. ')
724 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
725 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
726 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
727 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
728 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
729 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
730 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
731 '\u3044\u307e\u3059\u3002')
732 h = Header(g_head, g, header_name='Subject')
733 h.append(cz_head, cz)
734 h.append(utf8_head, utf8)
735 msg = Message()
736 msg['Subject'] = h
737 sfp = StringIO()
738 g = Generator(sfp)
739 g.flatten(msg)
740 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000741Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
742 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
743 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
744 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
745 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
746 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
747 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
748 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
749 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
750 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
751 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000752
753""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000754 eq(h.encode(maxlinelen=76), """\
755=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
756 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
757 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
758 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
759 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
760 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
761 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
762 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
763 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
764 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
765 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000766
767 def test_long_header_encode(self):
768 eq = self.ndiffAssertEqual
769 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
770 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
771 header_name='X-Foobar-Spoink-Defrobnit')
772 eq(h.encode(), '''\
773wasnipoop; giraffes="very-long-necked-animals";
774 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
775
776 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
777 eq = self.ndiffAssertEqual
778 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
779 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
780 header_name='X-Foobar-Spoink-Defrobnit',
781 continuation_ws='\t')
782 eq(h.encode(), '''\
783wasnipoop; giraffes="very-long-necked-animals";
784 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
785
786 def test_long_header_encode_with_tab_continuation(self):
787 eq = self.ndiffAssertEqual
788 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
789 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
790 header_name='X-Foobar-Spoink-Defrobnit',
791 continuation_ws='\t')
792 eq(h.encode(), '''\
793wasnipoop; giraffes="very-long-necked-animals";
794\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
795
R David Murray3a6152f2011-03-14 21:13:03 -0400796 def test_header_encode_with_different_output_charset(self):
797 h = Header('文', 'euc-jp')
798 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
799
800 def test_long_header_encode_with_different_output_charset(self):
801 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
802 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
803 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
804 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
805 res = """\
806=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
807 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
808 self.assertEqual(h.encode(), res)
809
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000810 def test_header_splitter(self):
811 eq = self.ndiffAssertEqual
812 msg = MIMEText('')
813 # It'd be great if we could use add_header() here, but that doesn't
814 # guarantee an order of the parameters.
815 msg['X-Foobar-Spoink-Defrobnit'] = (
816 'wasnipoop; giraffes="very-long-necked-animals"; '
817 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
818 sfp = StringIO()
819 g = Generator(sfp)
820 g.flatten(msg)
821 eq(sfp.getvalue(), '''\
822Content-Type: text/plain; charset="us-ascii"
823MIME-Version: 1.0
824Content-Transfer-Encoding: 7bit
825X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
826 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
827
828''')
829
830 def test_no_semis_header_splitter(self):
831 eq = self.ndiffAssertEqual
832 msg = Message()
833 msg['From'] = 'test@dom.ain'
834 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
835 msg.set_payload('Test')
836 sfp = StringIO()
837 g = Generator(sfp)
838 g.flatten(msg)
839 eq(sfp.getvalue(), """\
840From: test@dom.ain
841References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
842 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
843
844Test""")
845
R David Murray7da4db12011-04-07 20:37:17 -0400846 def test_last_split_chunk_does_not_fit(self):
847 eq = self.ndiffAssertEqual
848 h = Header('Subject: the first part of this is short, but_the_second'
849 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
850 '_all_by_itself')
851 eq(h.encode(), """\
852Subject: the first part of this is short,
853 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
854
855 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
856 eq = self.ndiffAssertEqual
857 h = Header(', but_the_second'
858 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
859 '_all_by_itself')
860 eq(h.encode(), """\
861,
862 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
863
864 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
865 eq = self.ndiffAssertEqual
866 h = Header(', , but_the_second'
867 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
868 '_all_by_itself')
869 eq(h.encode(), """\
870, ,
871 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
872
873 def test_trailing_splitable_on_overlong_unsplitable(self):
874 eq = self.ndiffAssertEqual
875 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
876 'be_on_a_line_all_by_itself;')
877 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
878 "be_on_a_line_all_by_itself;")
879
880 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
881 eq = self.ndiffAssertEqual
882 h = Header('; '
883 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400884 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400885 eq(h.encode(), """\
886;
R David Murray01581ee2011-04-18 10:04:34 -0400887 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400888
R David Murraye1292a22011-04-07 20:54:03 -0400889 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400890 eq = self.ndiffAssertEqual
891 h = Header('This is a long line that has two whitespaces in a row. '
892 'This used to cause truncation of the header when folded')
893 eq(h.encode(), """\
894This is a long line that has two whitespaces in a row. This used to cause
895 truncation of the header when folded""")
896
R David Murray01581ee2011-04-18 10:04:34 -0400897 def test_splitter_split_on_punctuation_only_if_fws(self):
898 eq = self.ndiffAssertEqual
899 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
900 'they;arenotlegal;fold,points')
901 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
902 "arenotlegal;fold,points")
903
904 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
905 eq = self.ndiffAssertEqual
906 h = Header('this is a test where we need to have more than one line '
907 'before; our final line that is just too big to fit;; '
908 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
909 'be_on_a_line_all_by_itself;')
910 eq(h.encode(), """\
911this is a test where we need to have more than one line before;
912 our final line that is just too big to fit;;
913 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
914
915 def test_overlong_last_part_followed_by_split_point(self):
916 eq = self.ndiffAssertEqual
917 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
918 'be_on_a_line_all_by_itself ')
919 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
920 "should_be_on_a_line_all_by_itself ")
921
922 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
923 eq = self.ndiffAssertEqual
924 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
925 'before_our_final_line_; ; '
926 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
927 'be_on_a_line_all_by_itself; ')
928 eq(h.encode(), """\
929this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
930 ;
931 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
932
933 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
934 eq = self.ndiffAssertEqual
935 h = Header('this is a test where we need to have more than one line '
936 'before our final line; ; '
937 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
938 'be_on_a_line_all_by_itself; ')
939 eq(h.encode(), """\
940this is a test where we need to have more than one line before our final line;
941 ;
942 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
943
944 def test_long_header_with_whitespace_runs(self):
945 eq = self.ndiffAssertEqual
946 msg = Message()
947 msg['From'] = 'test@dom.ain'
948 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
949 msg.set_payload('Test')
950 sfp = StringIO()
951 g = Generator(sfp)
952 g.flatten(msg)
953 eq(sfp.getvalue(), """\
954From: test@dom.ain
955References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
956 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
957 <foo@dom.ain> <foo@dom.ain>\x20\x20
958
959Test""")
960
961 def test_long_run_with_semi_header_splitter(self):
962 eq = self.ndiffAssertEqual
963 msg = Message()
964 msg['From'] = 'test@dom.ain'
965 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
966 msg.set_payload('Test')
967 sfp = StringIO()
968 g = Generator(sfp)
969 g.flatten(msg)
970 eq(sfp.getvalue(), """\
971From: test@dom.ain
972References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
973 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
974 <foo@dom.ain>; abc
975
976Test""")
977
978 def test_splitter_split_on_punctuation_only_if_fws(self):
979 eq = self.ndiffAssertEqual
980 msg = Message()
981 msg['From'] = 'test@dom.ain'
982 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
983 'they;arenotlegal;fold,points')
984 msg.set_payload('Test')
985 sfp = StringIO()
986 g = Generator(sfp)
987 g.flatten(msg)
988 # XXX the space after the header should not be there.
989 eq(sfp.getvalue(), """\
990From: test@dom.ain
991References:\x20
992 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
993
994Test""")
995
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000996 def test_no_split_long_header(self):
997 eq = self.ndiffAssertEqual
998 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000999 h = Header(hstr)
1000 # These come on two lines because Headers are really field value
1001 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001002 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001003References:
1004 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1005 h = Header('x' * 80)
1006 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007
1008 def test_splitting_multiple_long_lines(self):
1009 eq = self.ndiffAssertEqual
1010 hstr = """\
1011from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1012\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1014"""
1015 h = Header(hstr, continuation_ws='\t')
1016 eq(h.encode(), """\
1017from babylon.socal-raves.org (localhost [127.0.0.1]);
1018 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1019 for <mailman-admin@babylon.socal-raves.org>;
1020 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1021\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1022 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1023 for <mailman-admin@babylon.socal-raves.org>;
1024 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1025\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1026 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1027 for <mailman-admin@babylon.socal-raves.org>;
1028 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1029
1030 def test_splitting_first_line_only_is_long(self):
1031 eq = self.ndiffAssertEqual
1032 hstr = """\
1033from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1034\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1035\tid 17k4h5-00034i-00
1036\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1037 h = Header(hstr, maxlinelen=78, header_name='Received',
1038 continuation_ws='\t')
1039 eq(h.encode(), """\
1040from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1041 helo=cthulhu.gerg.ca)
1042\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1043\tid 17k4h5-00034i-00
1044\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1045
1046 def test_long_8bit_header(self):
1047 eq = self.ndiffAssertEqual
1048 msg = Message()
1049 h = Header('Britische Regierung gibt', 'iso-8859-1',
1050 header_name='Subject')
1051 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001052 eq(h.encode(maxlinelen=76), """\
1053=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1054 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001056 eq(msg.as_string(maxheaderlen=76), """\
1057Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1058 =?iso-8859-1?q?hore-Windkraftprojekte?=
1059
1060""")
1061 eq(msg.as_string(maxheaderlen=0), """\
1062Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001063
1064""")
1065
1066 def test_long_8bit_header_no_charset(self):
1067 eq = self.ndiffAssertEqual
1068 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001069 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1070 'f\xfcr Offshore-Windkraftprojekte '
1071 '<a-very-long-address@example.com>')
1072 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001073 eq(msg.as_string(maxheaderlen=78), """\
1074Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1075 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1076
1077""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001078 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001079 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001080 header_name='Reply-To')
1081 eq(msg.as_string(maxheaderlen=78), """\
1082Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1083 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001084
1085""")
1086
1087 def test_long_to_header(self):
1088 eq = self.ndiffAssertEqual
1089 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001090 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001091 '"Someone Test #B" <someone@umich.edu>, '
1092 '"Someone Test #C" <someone@eecs.umich.edu>, '
1093 '"Someone Test #D" <someone@eecs.umich.edu>')
1094 msg = Message()
1095 msg['To'] = to
1096 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001097To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001098 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001099 "Someone Test #C" <someone@eecs.umich.edu>,
1100 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001101
1102''')
1103
1104 def test_long_line_after_append(self):
1105 eq = self.ndiffAssertEqual
1106 s = 'This is an example of string which has almost the limit of header length.'
1107 h = Header(s)
1108 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001109 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001110This is an example of string which has almost the limit of header length.
1111 Add another line.""")
1112
1113 def test_shorter_line_with_append(self):
1114 eq = self.ndiffAssertEqual
1115 s = 'This is a shorter line.'
1116 h = Header(s)
1117 h.append('Add another sentence. (Surprise?)')
1118 eq(h.encode(),
1119 'This is a shorter line. Add another sentence. (Surprise?)')
1120
1121 def test_long_field_name(self):
1122 eq = self.ndiffAssertEqual
1123 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001124 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1125 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1126 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1127 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001128 h = Header(gs, 'iso-8859-1', header_name=fn)
1129 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001130 eq(h.encode(maxlinelen=76), """\
1131=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1132 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1133 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1134 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001135
1136 def test_long_received_header(self):
1137 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1138 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1139 'Wed, 05 Mar 2003 18:10:18 -0700')
1140 msg = Message()
1141 msg['Received-1'] = Header(h, continuation_ws='\t')
1142 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001143 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001144 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001145Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1146 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001147 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001148Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1149 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001150 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001151
1152""")
1153
1154 def test_string_headerinst_eq(self):
1155 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1156 'tu-muenchen.de> (David Bremner\'s message of '
1157 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1158 msg = Message()
1159 msg['Received-1'] = Header(h, header_name='Received-1',
1160 continuation_ws='\t')
1161 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001162 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001163 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001164Received-1:\x20
1165 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1166 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1167Received-2:\x20
1168 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1169 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001170
1171""")
1172
1173 def test_long_unbreakable_lines_with_continuation(self):
1174 eq = self.ndiffAssertEqual
1175 msg = Message()
1176 t = """\
1177iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1178 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1179 msg['Face-1'] = t
1180 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001181 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001182 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001183 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001184 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001185Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001186 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001188Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001189 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001190 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001191Face-3:\x20
1192 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1193 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194
1195""")
1196
1197 def test_another_long_multiline_header(self):
1198 eq = self.ndiffAssertEqual
1199 m = ('Received: from siimage.com '
1200 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001201 'Microsoft SMTPSVC(5.0.2195.4905); '
1202 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001203 msg = email.message_from_string(m)
1204 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001205Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1206 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001207
1208''')
1209
1210 def test_long_lines_with_different_header(self):
1211 eq = self.ndiffAssertEqual
1212 h = ('List-Unsubscribe: '
1213 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1214 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1215 '?subject=unsubscribe>')
1216 msg = Message()
1217 msg['List'] = h
1218 msg['List'] = Header(h, header_name='List')
1219 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001220List: List-Unsubscribe:
1221 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001222 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001223List: List-Unsubscribe:
1224 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001225 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001226
1227""")
1228
R. David Murray6f0022d2011-01-07 21:57:25 +00001229 def test_long_rfc2047_header_with_embedded_fws(self):
1230 h = Header(textwrap.dedent("""\
1231 We're going to pretend this header is in a non-ascii character set
1232 \tto see if line wrapping with encoded words and embedded
1233 folding white space works"""),
1234 charset='utf-8',
1235 header_name='Test')
1236 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1237 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1238 =?utf-8?q?cter_set?=
1239 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1240 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1241
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001242
Ezio Melottib3aedd42010-11-20 19:04:17 +00001243
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001244# Test mangling of "From " lines in the body of a message
1245class TestFromMangling(unittest.TestCase):
1246 def setUp(self):
1247 self.msg = Message()
1248 self.msg['From'] = 'aaa@bbb.org'
1249 self.msg.set_payload("""\
1250From the desk of A.A.A.:
1251Blah blah blah
1252""")
1253
1254 def test_mangled_from(self):
1255 s = StringIO()
1256 g = Generator(s, mangle_from_=True)
1257 g.flatten(self.msg)
1258 self.assertEqual(s.getvalue(), """\
1259From: aaa@bbb.org
1260
1261>From the desk of A.A.A.:
1262Blah blah blah
1263""")
1264
1265 def test_dont_mangle_from(self):
1266 s = StringIO()
1267 g = Generator(s, mangle_from_=False)
1268 g.flatten(self.msg)
1269 self.assertEqual(s.getvalue(), """\
1270From: aaa@bbb.org
1271
1272From the desk of A.A.A.:
1273Blah blah blah
1274""")
1275
1276
Ezio Melottib3aedd42010-11-20 19:04:17 +00001277
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001278# Test the basic MIMEAudio class
1279class TestMIMEAudio(unittest.TestCase):
1280 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001281 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001282 self._audiodata = fp.read()
1283 self._au = MIMEAudio(self._audiodata)
1284
1285 def test_guess_minor_type(self):
1286 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1287
1288 def test_encoding(self):
1289 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001290 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1291 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001292
1293 def test_checkSetMinor(self):
1294 au = MIMEAudio(self._audiodata, 'fish')
1295 self.assertEqual(au.get_content_type(), 'audio/fish')
1296
1297 def test_add_header(self):
1298 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001299 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001300 self._au.add_header('Content-Disposition', 'attachment',
1301 filename='audiotest.au')
1302 eq(self._au['content-disposition'],
1303 'attachment; filename="audiotest.au"')
1304 eq(self._au.get_params(header='content-disposition'),
1305 [('attachment', ''), ('filename', 'audiotest.au')])
1306 eq(self._au.get_param('filename', header='content-disposition'),
1307 'audiotest.au')
1308 missing = []
1309 eq(self._au.get_param('attachment', header='content-disposition'), '')
1310 unless(self._au.get_param('foo', failobj=missing,
1311 header='content-disposition') is missing)
1312 # Try some missing stuff
1313 unless(self._au.get_param('foobar', missing) is missing)
1314 unless(self._au.get_param('attachment', missing,
1315 header='foobar') is missing)
1316
1317
Ezio Melottib3aedd42010-11-20 19:04:17 +00001318
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001319# Test the basic MIMEImage class
1320class TestMIMEImage(unittest.TestCase):
1321 def setUp(self):
1322 with openfile('PyBanner048.gif', 'rb') as fp:
1323 self._imgdata = fp.read()
1324 self._im = MIMEImage(self._imgdata)
1325
1326 def test_guess_minor_type(self):
1327 self.assertEqual(self._im.get_content_type(), 'image/gif')
1328
1329 def test_encoding(self):
1330 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001331 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1332 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001333
1334 def test_checkSetMinor(self):
1335 im = MIMEImage(self._imgdata, 'fish')
1336 self.assertEqual(im.get_content_type(), 'image/fish')
1337
1338 def test_add_header(self):
1339 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001340 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001341 self._im.add_header('Content-Disposition', 'attachment',
1342 filename='dingusfish.gif')
1343 eq(self._im['content-disposition'],
1344 'attachment; filename="dingusfish.gif"')
1345 eq(self._im.get_params(header='content-disposition'),
1346 [('attachment', ''), ('filename', 'dingusfish.gif')])
1347 eq(self._im.get_param('filename', header='content-disposition'),
1348 'dingusfish.gif')
1349 missing = []
1350 eq(self._im.get_param('attachment', header='content-disposition'), '')
1351 unless(self._im.get_param('foo', failobj=missing,
1352 header='content-disposition') is missing)
1353 # Try some missing stuff
1354 unless(self._im.get_param('foobar', missing) is missing)
1355 unless(self._im.get_param('attachment', missing,
1356 header='foobar') is missing)
1357
1358
Ezio Melottib3aedd42010-11-20 19:04:17 +00001359
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001360# Test the basic MIMEApplication class
1361class TestMIMEApplication(unittest.TestCase):
1362 def test_headers(self):
1363 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001364 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365 eq(msg.get_content_type(), 'application/octet-stream')
1366 eq(msg['content-transfer-encoding'], 'base64')
1367
1368 def test_body(self):
1369 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001370 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1371 msg = MIMEApplication(bytesdata)
1372 # whitespace in the cte encoded block is RFC-irrelevant.
1373 eq(msg.get_payload().strip(), '+vv8/f7/')
1374 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001375
1376
Ezio Melottib3aedd42010-11-20 19:04:17 +00001377
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001378# Test the basic MIMEText class
1379class TestMIMEText(unittest.TestCase):
1380 def setUp(self):
1381 self._msg = MIMEText('hello there')
1382
1383 def test_types(self):
1384 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001385 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001386 eq(self._msg.get_content_type(), 'text/plain')
1387 eq(self._msg.get_param('charset'), 'us-ascii')
1388 missing = []
1389 unless(self._msg.get_param('foobar', missing) is missing)
1390 unless(self._msg.get_param('charset', missing, header='foobar')
1391 is missing)
1392
1393 def test_payload(self):
1394 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001395 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001396
1397 def test_charset(self):
1398 eq = self.assertEqual
1399 msg = MIMEText('hello there', _charset='us-ascii')
1400 eq(msg.get_charset().input_charset, 'us-ascii')
1401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1402
R. David Murray850fc852010-06-03 01:58:28 +00001403 def test_7bit_input(self):
1404 eq = self.assertEqual
1405 msg = MIMEText('hello there', _charset='us-ascii')
1406 eq(msg.get_charset().input_charset, 'us-ascii')
1407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1408
1409 def test_7bit_input_no_charset(self):
1410 eq = self.assertEqual
1411 msg = MIMEText('hello there')
1412 eq(msg.get_charset(), 'us-ascii')
1413 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1414 self.assertTrue('hello there' in msg.as_string())
1415
1416 def test_utf8_input(self):
1417 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1418 eq = self.assertEqual
1419 msg = MIMEText(teststr, _charset='utf-8')
1420 eq(msg.get_charset().output_charset, 'utf-8')
1421 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1422 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1423
1424 @unittest.skip("can't fix because of backward compat in email5, "
1425 "will fix in email6")
1426 def test_utf8_input_no_charset(self):
1427 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1428 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1429
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001430
Ezio Melottib3aedd42010-11-20 19:04:17 +00001431
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001432# Test complicated multipart/* messages
1433class TestMultipart(TestEmailBase):
1434 def setUp(self):
1435 with openfile('PyBanner048.gif', 'rb') as fp:
1436 data = fp.read()
1437 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1438 image = MIMEImage(data, name='dingusfish.gif')
1439 image.add_header('content-disposition', 'attachment',
1440 filename='dingusfish.gif')
1441 intro = MIMEText('''\
1442Hi there,
1443
1444This is the dingus fish.
1445''')
1446 container.attach(intro)
1447 container.attach(image)
1448 container['From'] = 'Barry <barry@digicool.com>'
1449 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1450 container['Subject'] = 'Here is your dingus fish'
1451
1452 now = 987809702.54848599
1453 timetuple = time.localtime(now)
1454 if timetuple[-1] == 0:
1455 tzsecs = time.timezone
1456 else:
1457 tzsecs = time.altzone
1458 if tzsecs > 0:
1459 sign = '-'
1460 else:
1461 sign = '+'
1462 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1463 container['Date'] = time.strftime(
1464 '%a, %d %b %Y %H:%M:%S',
1465 time.localtime(now)) + tzoffset
1466 self._msg = container
1467 self._im = image
1468 self._txt = intro
1469
1470 def test_hierarchy(self):
1471 # convenience
1472 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001473 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001474 raises = self.assertRaises
1475 # tests
1476 m = self._msg
1477 unless(m.is_multipart())
1478 eq(m.get_content_type(), 'multipart/mixed')
1479 eq(len(m.get_payload()), 2)
1480 raises(IndexError, m.get_payload, 2)
1481 m0 = m.get_payload(0)
1482 m1 = m.get_payload(1)
1483 unless(m0 is self._txt)
1484 unless(m1 is self._im)
1485 eq(m.get_payload(), [m0, m1])
1486 unless(not m0.is_multipart())
1487 unless(not m1.is_multipart())
1488
1489 def test_empty_multipart_idempotent(self):
1490 text = """\
1491Content-Type: multipart/mixed; boundary="BOUNDARY"
1492MIME-Version: 1.0
1493Subject: A subject
1494To: aperson@dom.ain
1495From: bperson@dom.ain
1496
1497
1498--BOUNDARY
1499
1500
1501--BOUNDARY--
1502"""
1503 msg = Parser().parsestr(text)
1504 self.ndiffAssertEqual(text, msg.as_string())
1505
1506 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1507 outer = MIMEBase('multipart', 'mixed')
1508 outer['Subject'] = 'A subject'
1509 outer['To'] = 'aperson@dom.ain'
1510 outer['From'] = 'bperson@dom.ain'
1511 outer.set_boundary('BOUNDARY')
1512 self.ndiffAssertEqual(outer.as_string(), '''\
1513Content-Type: multipart/mixed; boundary="BOUNDARY"
1514MIME-Version: 1.0
1515Subject: A subject
1516To: aperson@dom.ain
1517From: bperson@dom.ain
1518
1519--BOUNDARY
1520
1521--BOUNDARY--''')
1522
1523 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1524 outer = MIMEBase('multipart', 'mixed')
1525 outer['Subject'] = 'A subject'
1526 outer['To'] = 'aperson@dom.ain'
1527 outer['From'] = 'bperson@dom.ain'
1528 outer.preamble = ''
1529 outer.epilogue = ''
1530 outer.set_boundary('BOUNDARY')
1531 self.ndiffAssertEqual(outer.as_string(), '''\
1532Content-Type: multipart/mixed; boundary="BOUNDARY"
1533MIME-Version: 1.0
1534Subject: A subject
1535To: aperson@dom.ain
1536From: bperson@dom.ain
1537
1538
1539--BOUNDARY
1540
1541--BOUNDARY--
1542''')
1543
1544 def test_one_part_in_a_multipart(self):
1545 eq = self.ndiffAssertEqual
1546 outer = MIMEBase('multipart', 'mixed')
1547 outer['Subject'] = 'A subject'
1548 outer['To'] = 'aperson@dom.ain'
1549 outer['From'] = 'bperson@dom.ain'
1550 outer.set_boundary('BOUNDARY')
1551 msg = MIMEText('hello world')
1552 outer.attach(msg)
1553 eq(outer.as_string(), '''\
1554Content-Type: multipart/mixed; boundary="BOUNDARY"
1555MIME-Version: 1.0
1556Subject: A subject
1557To: aperson@dom.ain
1558From: bperson@dom.ain
1559
1560--BOUNDARY
1561Content-Type: text/plain; charset="us-ascii"
1562MIME-Version: 1.0
1563Content-Transfer-Encoding: 7bit
1564
1565hello world
1566--BOUNDARY--''')
1567
1568 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1569 eq = self.ndiffAssertEqual
1570 outer = MIMEBase('multipart', 'mixed')
1571 outer['Subject'] = 'A subject'
1572 outer['To'] = 'aperson@dom.ain'
1573 outer['From'] = 'bperson@dom.ain'
1574 outer.preamble = ''
1575 msg = MIMEText('hello world')
1576 outer.attach(msg)
1577 outer.set_boundary('BOUNDARY')
1578 eq(outer.as_string(), '''\
1579Content-Type: multipart/mixed; boundary="BOUNDARY"
1580MIME-Version: 1.0
1581Subject: A subject
1582To: aperson@dom.ain
1583From: bperson@dom.ain
1584
1585
1586--BOUNDARY
1587Content-Type: text/plain; charset="us-ascii"
1588MIME-Version: 1.0
1589Content-Transfer-Encoding: 7bit
1590
1591hello world
1592--BOUNDARY--''')
1593
1594
1595 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1596 eq = self.ndiffAssertEqual
1597 outer = MIMEBase('multipart', 'mixed')
1598 outer['Subject'] = 'A subject'
1599 outer['To'] = 'aperson@dom.ain'
1600 outer['From'] = 'bperson@dom.ain'
1601 outer.preamble = None
1602 msg = MIMEText('hello world')
1603 outer.attach(msg)
1604 outer.set_boundary('BOUNDARY')
1605 eq(outer.as_string(), '''\
1606Content-Type: multipart/mixed; boundary="BOUNDARY"
1607MIME-Version: 1.0
1608Subject: A subject
1609To: aperson@dom.ain
1610From: bperson@dom.ain
1611
1612--BOUNDARY
1613Content-Type: text/plain; charset="us-ascii"
1614MIME-Version: 1.0
1615Content-Transfer-Encoding: 7bit
1616
1617hello world
1618--BOUNDARY--''')
1619
1620
1621 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1622 eq = self.ndiffAssertEqual
1623 outer = MIMEBase('multipart', 'mixed')
1624 outer['Subject'] = 'A subject'
1625 outer['To'] = 'aperson@dom.ain'
1626 outer['From'] = 'bperson@dom.ain'
1627 outer.epilogue = None
1628 msg = MIMEText('hello world')
1629 outer.attach(msg)
1630 outer.set_boundary('BOUNDARY')
1631 eq(outer.as_string(), '''\
1632Content-Type: multipart/mixed; boundary="BOUNDARY"
1633MIME-Version: 1.0
1634Subject: A subject
1635To: aperson@dom.ain
1636From: bperson@dom.ain
1637
1638--BOUNDARY
1639Content-Type: text/plain; charset="us-ascii"
1640MIME-Version: 1.0
1641Content-Transfer-Encoding: 7bit
1642
1643hello world
1644--BOUNDARY--''')
1645
1646
1647 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1648 eq = self.ndiffAssertEqual
1649 outer = MIMEBase('multipart', 'mixed')
1650 outer['Subject'] = 'A subject'
1651 outer['To'] = 'aperson@dom.ain'
1652 outer['From'] = 'bperson@dom.ain'
1653 outer.epilogue = ''
1654 msg = MIMEText('hello world')
1655 outer.attach(msg)
1656 outer.set_boundary('BOUNDARY')
1657 eq(outer.as_string(), '''\
1658Content-Type: multipart/mixed; boundary="BOUNDARY"
1659MIME-Version: 1.0
1660Subject: A subject
1661To: aperson@dom.ain
1662From: bperson@dom.ain
1663
1664--BOUNDARY
1665Content-Type: text/plain; charset="us-ascii"
1666MIME-Version: 1.0
1667Content-Transfer-Encoding: 7bit
1668
1669hello world
1670--BOUNDARY--
1671''')
1672
1673
1674 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1675 eq = self.ndiffAssertEqual
1676 outer = MIMEBase('multipart', 'mixed')
1677 outer['Subject'] = 'A subject'
1678 outer['To'] = 'aperson@dom.ain'
1679 outer['From'] = 'bperson@dom.ain'
1680 outer.epilogue = '\n'
1681 msg = MIMEText('hello world')
1682 outer.attach(msg)
1683 outer.set_boundary('BOUNDARY')
1684 eq(outer.as_string(), '''\
1685Content-Type: multipart/mixed; boundary="BOUNDARY"
1686MIME-Version: 1.0
1687Subject: A subject
1688To: aperson@dom.ain
1689From: bperson@dom.ain
1690
1691--BOUNDARY
1692Content-Type: text/plain; charset="us-ascii"
1693MIME-Version: 1.0
1694Content-Transfer-Encoding: 7bit
1695
1696hello world
1697--BOUNDARY--
1698
1699''')
1700
1701 def test_message_external_body(self):
1702 eq = self.assertEqual
1703 msg = self._msgobj('msg_36.txt')
1704 eq(len(msg.get_payload()), 2)
1705 msg1 = msg.get_payload(1)
1706 eq(msg1.get_content_type(), 'multipart/alternative')
1707 eq(len(msg1.get_payload()), 2)
1708 for subpart in msg1.get_payload():
1709 eq(subpart.get_content_type(), 'message/external-body')
1710 eq(len(subpart.get_payload()), 1)
1711 subsubpart = subpart.get_payload(0)
1712 eq(subsubpart.get_content_type(), 'text/plain')
1713
1714 def test_double_boundary(self):
1715 # msg_37.txt is a multipart that contains two dash-boundary's in a
1716 # row. Our interpretation of RFC 2046 calls for ignoring the second
1717 # and subsequent boundaries.
1718 msg = self._msgobj('msg_37.txt')
1719 self.assertEqual(len(msg.get_payload()), 3)
1720
1721 def test_nested_inner_contains_outer_boundary(self):
1722 eq = self.ndiffAssertEqual
1723 # msg_38.txt has an inner part that contains outer boundaries. My
1724 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1725 # these are illegal and should be interpreted as unterminated inner
1726 # parts.
1727 msg = self._msgobj('msg_38.txt')
1728 sfp = StringIO()
1729 iterators._structure(msg, sfp)
1730 eq(sfp.getvalue(), """\
1731multipart/mixed
1732 multipart/mixed
1733 multipart/alternative
1734 text/plain
1735 text/plain
1736 text/plain
1737 text/plain
1738""")
1739
1740 def test_nested_with_same_boundary(self):
1741 eq = self.ndiffAssertEqual
1742 # msg 39.txt is similarly evil in that it's got inner parts that use
1743 # the same boundary as outer parts. Again, I believe the way this is
1744 # parsed is closest to the spirit of RFC 2046
1745 msg = self._msgobj('msg_39.txt')
1746 sfp = StringIO()
1747 iterators._structure(msg, sfp)
1748 eq(sfp.getvalue(), """\
1749multipart/mixed
1750 multipart/mixed
1751 multipart/alternative
1752 application/octet-stream
1753 application/octet-stream
1754 text/plain
1755""")
1756
1757 def test_boundary_in_non_multipart(self):
1758 msg = self._msgobj('msg_40.txt')
1759 self.assertEqual(msg.as_string(), '''\
1760MIME-Version: 1.0
1761Content-Type: text/html; boundary="--961284236552522269"
1762
1763----961284236552522269
1764Content-Type: text/html;
1765Content-Transfer-Encoding: 7Bit
1766
1767<html></html>
1768
1769----961284236552522269--
1770''')
1771
1772 def test_boundary_with_leading_space(self):
1773 eq = self.assertEqual
1774 msg = email.message_from_string('''\
1775MIME-Version: 1.0
1776Content-Type: multipart/mixed; boundary=" XXXX"
1777
1778-- XXXX
1779Content-Type: text/plain
1780
1781
1782-- XXXX
1783Content-Type: text/plain
1784
1785-- XXXX--
1786''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001787 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001788 eq(msg.get_boundary(), ' XXXX')
1789 eq(len(msg.get_payload()), 2)
1790
1791 def test_boundary_without_trailing_newline(self):
1792 m = Parser().parsestr("""\
1793Content-Type: multipart/mixed; boundary="===============0012394164=="
1794MIME-Version: 1.0
1795
1796--===============0012394164==
1797Content-Type: image/file1.jpg
1798MIME-Version: 1.0
1799Content-Transfer-Encoding: base64
1800
1801YXNkZg==
1802--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001803 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001804
1805
Ezio Melottib3aedd42010-11-20 19:04:17 +00001806
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001807# Test some badly formatted messages
R David Murray3edd22a2011-04-18 13:59:37 -04001808class TestNonConformantBase:
1809
1810 def _msgobj(self, filename):
1811 with openfile(filename) as fp:
1812 return email.message_from_file(fp, policy=self.policy)
1813
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001814 def test_parse_missing_minor_type(self):
1815 eq = self.assertEqual
1816 msg = self._msgobj('msg_14.txt')
1817 eq(msg.get_content_type(), 'text/plain')
1818 eq(msg.get_content_maintype(), 'text')
1819 eq(msg.get_content_subtype(), 'plain')
1820
1821 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001822 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823 msg = self._msgobj('msg_15.txt')
1824 # XXX We can probably eventually do better
1825 inner = msg.get_payload(0)
1826 unless(hasattr(inner, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001827 self.assertEqual(len(self.get_defects(inner)), 1)
1828 unless(isinstance(self.get_defects(inner)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001829 errors.StartBoundaryNotFoundDefect))
1830
1831 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001832 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001833 msg = self._msgobj('msg_25.txt')
1834 unless(isinstance(msg.get_payload(), str))
R David Murray3edd22a2011-04-18 13:59:37 -04001835 self.assertEqual(len(self.get_defects(msg)), 2)
1836 unless(isinstance(self.get_defects(msg)[0],
1837 errors.NoBoundaryInMultipartDefect))
1838 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001839 errors.MultipartInvariantViolationDefect))
1840
R David Murray749073a2011-06-22 13:47:53 -04001841 multipart_msg = textwrap.dedent("""\
1842 Date: Wed, 14 Nov 2007 12:56:23 GMT
1843 From: foo@bar.invalid
1844 To: foo@bar.invalid
1845 Subject: Content-Transfer-Encoding: base64 and multipart
1846 MIME-Version: 1.0
1847 Content-Type: multipart/mixed;
1848 boundary="===============3344438784458119861=="{}
1849
1850 --===============3344438784458119861==
1851 Content-Type: text/plain
1852
1853 Test message
1854
1855 --===============3344438784458119861==
1856 Content-Type: application/octet-stream
1857 Content-Transfer-Encoding: base64
1858
1859 YWJj
1860
1861 --===============3344438784458119861==--
1862 """)
1863
1864 def test_multipart_invalid_cte(self):
1865 msg = email.message_from_string(
1866 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"),
1867 policy = self.policy)
1868 self.assertEqual(len(self.get_defects(msg)), 1)
1869 self.assertIsInstance(self.get_defects(msg)[0],
1870 errors.InvalidMultipartContentTransferEncodingDefect)
1871
1872 def test_multipart_no_cte_no_defect(self):
1873 msg = email.message_from_string(
1874 self.multipart_msg.format(''),
1875 policy = self.policy)
1876 self.assertEqual(len(self.get_defects(msg)), 0)
1877
1878 def test_multipart_valid_cte_no_defect(self):
1879 for cte in ('7bit', '8bit', 'BINary'):
1880 msg = email.message_from_string(
1881 self.multipart_msg.format(
1882 "\nContent-Transfer-Encoding: {}".format(cte)),
1883 policy = self.policy)
1884 self.assertEqual(len(self.get_defects(msg)), 0)
1885
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001886 def test_invalid_content_type(self):
1887 eq = self.assertEqual
1888 neq = self.ndiffAssertEqual
1889 msg = Message()
1890 # RFC 2045, $5.2 says invalid yields text/plain
1891 msg['Content-Type'] = 'text'
1892 eq(msg.get_content_maintype(), 'text')
1893 eq(msg.get_content_subtype(), 'plain')
1894 eq(msg.get_content_type(), 'text/plain')
1895 # Clear the old value and try something /really/ invalid
1896 del msg['content-type']
1897 msg['Content-Type'] = 'foo'
1898 eq(msg.get_content_maintype(), 'text')
1899 eq(msg.get_content_subtype(), 'plain')
1900 eq(msg.get_content_type(), 'text/plain')
1901 # Still, make sure that the message is idempotently generated
1902 s = StringIO()
1903 g = Generator(s)
1904 g.flatten(msg)
1905 neq(s.getvalue(), 'Content-Type: foo\n\n')
1906
1907 def test_no_start_boundary(self):
1908 eq = self.ndiffAssertEqual
1909 msg = self._msgobj('msg_31.txt')
1910 eq(msg.get_payload(), """\
1911--BOUNDARY
1912Content-Type: text/plain
1913
1914message 1
1915
1916--BOUNDARY
1917Content-Type: text/plain
1918
1919message 2
1920
1921--BOUNDARY--
1922""")
1923
1924 def test_no_separating_blank_line(self):
1925 eq = self.ndiffAssertEqual
1926 msg = self._msgobj('msg_35.txt')
1927 eq(msg.as_string(), """\
1928From: aperson@dom.ain
1929To: bperson@dom.ain
1930Subject: here's something interesting
1931
1932counter to RFC 2822, there's no separating newline here
1933""")
1934
1935 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001936 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001937 msg = self._msgobj('msg_41.txt')
1938 unless(hasattr(msg, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001939 self.assertEqual(len(self.get_defects(msg)), 2)
1940 unless(isinstance(self.get_defects(msg)[0],
1941 errors.NoBoundaryInMultipartDefect))
1942 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001943 errors.MultipartInvariantViolationDefect))
1944
1945 def test_missing_start_boundary(self):
1946 outer = self._msgobj('msg_42.txt')
1947 # The message structure is:
1948 #
1949 # multipart/mixed
1950 # text/plain
1951 # message/rfc822
1952 # multipart/mixed [*]
1953 #
1954 # [*] This message is missing its start boundary
1955 bad = outer.get_payload(1).get_payload(0)
R David Murray3edd22a2011-04-18 13:59:37 -04001956 self.assertEqual(len(self.get_defects(bad)), 1)
1957 self.assertTrue(isinstance(self.get_defects(bad)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958 errors.StartBoundaryNotFoundDefect))
1959
1960 def test_first_line_is_continuation_header(self):
1961 eq = self.assertEqual
1962 m = ' Line 1\nLine 2\nLine 3'
R David Murray3edd22a2011-04-18 13:59:37 -04001963 msg = email.message_from_string(m, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001964 eq(msg.keys(), [])
1965 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murray3edd22a2011-04-18 13:59:37 -04001966 eq(len(self.get_defects(msg)), 1)
1967 self.assertTrue(isinstance(self.get_defects(msg)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001968 errors.FirstHeaderLineIsContinuationDefect))
R David Murray3edd22a2011-04-18 13:59:37 -04001969 eq(self.get_defects(msg)[0].line, ' Line 1\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001970
1971
R David Murray3edd22a2011-04-18 13:59:37 -04001972class TestNonConformant(TestNonConformantBase, TestEmailBase):
1973
1974 policy=email.policy.default
1975
1976 def get_defects(self, obj):
1977 return obj.defects
1978
1979
1980class TestNonConformantCapture(TestNonConformantBase, TestEmailBase):
1981
1982 class CapturePolicy(email.policy.Policy):
1983 captured = None
1984 def register_defect(self, obj, defect):
1985 self.captured.append(defect)
1986
1987 def setUp(self):
1988 self.policy = self.CapturePolicy(captured=list())
1989
1990 def get_defects(self, obj):
1991 return self.policy.captured
1992
1993
1994class TestRaisingDefects(TestEmailBase):
1995
1996 def _msgobj(self, filename):
1997 with openfile(filename) as fp:
1998 return email.message_from_file(fp, policy=email.policy.strict)
1999
2000 def test_same_boundary_inner_outer(self):
2001 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2002 self._msgobj('msg_15.txt')
2003
2004 def test_multipart_no_boundary(self):
2005 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
2006 self._msgobj('msg_25.txt')
2007
2008 def test_lying_multipart(self):
2009 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
2010 self._msgobj('msg_41.txt')
2011
2012
2013 def test_missing_start_boundary(self):
2014 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2015 self._msgobj('msg_42.txt')
2016
2017 def test_first_line_is_continuation_header(self):
2018 m = ' Line 1\nLine 2\nLine 3'
2019 with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
2020 msg = email.message_from_string(m, policy=email.policy.strict)
2021
Ezio Melottib3aedd42010-11-20 19:04:17 +00002022
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002023# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002024class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002025 def test_rfc2047_multiline(self):
2026 eq = self.assertEqual
2027 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2028 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2029 dh = decode_header(s)
2030 eq(dh, [
2031 (b'Re:', None),
2032 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2033 (b'baz foo bar', None),
2034 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2035 header = make_header(dh)
2036 eq(str(header),
2037 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002038 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002039Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2040 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002041
2042 def test_whitespace_eater_unicode(self):
2043 eq = self.assertEqual
2044 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2045 dh = decode_header(s)
2046 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2047 (b'Pirard <pirard@dom.ain>', None)])
2048 header = str(make_header(dh))
2049 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2050
2051 def test_whitespace_eater_unicode_2(self):
2052 eq = self.assertEqual
2053 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2054 dh = decode_header(s)
2055 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2056 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2057 hu = str(make_header(dh))
2058 eq(hu, 'The quick brown fox jumped over the lazy dog')
2059
2060 def test_rfc2047_missing_whitespace(self):
2061 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2062 dh = decode_header(s)
2063 self.assertEqual(dh, [(s, None)])
2064
2065 def test_rfc2047_with_whitespace(self):
2066 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2067 dh = decode_header(s)
2068 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2069 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2070 (b'sbord', None)])
2071
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002072 def test_rfc2047_B_bad_padding(self):
2073 s = '=?iso-8859-1?B?%s?='
2074 data = [ # only test complete bytes
2075 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2076 ('dmk=', b'vi'), ('dmk', b'vi')
2077 ]
2078 for q, a in data:
2079 dh = decode_header(s % q)
2080 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081
R. David Murray31e984c2010-10-01 15:40:20 +00002082 def test_rfc2047_Q_invalid_digits(self):
2083 # issue 10004.
2084 s = '=?iso-8659-1?Q?andr=e9=zz?='
2085 self.assertEqual(decode_header(s),
2086 [(b'andr\xe9=zz', 'iso-8659-1')])
2087
Ezio Melottib3aedd42010-11-20 19:04:17 +00002088
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089# Test the MIMEMessage class
2090class TestMIMEMessage(TestEmailBase):
2091 def setUp(self):
2092 with openfile('msg_11.txt') as fp:
2093 self._text = fp.read()
2094
2095 def test_type_error(self):
2096 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2097
2098 def test_valid_argument(self):
2099 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002100 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101 subject = 'A sub-message'
2102 m = Message()
2103 m['Subject'] = subject
2104 r = MIMEMessage(m)
2105 eq(r.get_content_type(), 'message/rfc822')
2106 payload = r.get_payload()
2107 unless(isinstance(payload, list))
2108 eq(len(payload), 1)
2109 subpart = payload[0]
2110 unless(subpart is m)
2111 eq(subpart['subject'], subject)
2112
2113 def test_bad_multipart(self):
2114 eq = self.assertEqual
2115 msg1 = Message()
2116 msg1['Subject'] = 'subpart 1'
2117 msg2 = Message()
2118 msg2['Subject'] = 'subpart 2'
2119 r = MIMEMessage(msg1)
2120 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2121
2122 def test_generate(self):
2123 # First craft the message to be encapsulated
2124 m = Message()
2125 m['Subject'] = 'An enclosed message'
2126 m.set_payload('Here is the body of the message.\n')
2127 r = MIMEMessage(m)
2128 r['Subject'] = 'The enclosing message'
2129 s = StringIO()
2130 g = Generator(s)
2131 g.flatten(r)
2132 self.assertEqual(s.getvalue(), """\
2133Content-Type: message/rfc822
2134MIME-Version: 1.0
2135Subject: The enclosing message
2136
2137Subject: An enclosed message
2138
2139Here is the body of the message.
2140""")
2141
2142 def test_parse_message_rfc822(self):
2143 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002144 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145 msg = self._msgobj('msg_11.txt')
2146 eq(msg.get_content_type(), 'message/rfc822')
2147 payload = msg.get_payload()
2148 unless(isinstance(payload, list))
2149 eq(len(payload), 1)
2150 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002151 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002152 eq(submsg['subject'], 'An enclosed message')
2153 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2154
2155 def test_dsn(self):
2156 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002157 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002158 # msg 16 is a Delivery Status Notification, see RFC 1894
2159 msg = self._msgobj('msg_16.txt')
2160 eq(msg.get_content_type(), 'multipart/report')
2161 unless(msg.is_multipart())
2162 eq(len(msg.get_payload()), 3)
2163 # Subpart 1 is a text/plain, human readable section
2164 subpart = msg.get_payload(0)
2165 eq(subpart.get_content_type(), 'text/plain')
2166 eq(subpart.get_payload(), """\
2167This report relates to a message you sent with the following header fields:
2168
2169 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2170 Date: Sun, 23 Sep 2001 20:10:55 -0700
2171 From: "Ian T. Henry" <henryi@oxy.edu>
2172 To: SoCal Raves <scr@socal-raves.org>
2173 Subject: [scr] yeah for Ians!!
2174
2175Your message cannot be delivered to the following recipients:
2176
2177 Recipient address: jangel1@cougar.noc.ucla.edu
2178 Reason: recipient reached disk quota
2179
2180""")
2181 # Subpart 2 contains the machine parsable DSN information. It
2182 # consists of two blocks of headers, represented by two nested Message
2183 # objects.
2184 subpart = msg.get_payload(1)
2185 eq(subpart.get_content_type(), 'message/delivery-status')
2186 eq(len(subpart.get_payload()), 2)
2187 # message/delivery-status should treat each block as a bunch of
2188 # headers, i.e. a bunch of Message objects.
2189 dsn1 = subpart.get_payload(0)
2190 unless(isinstance(dsn1, Message))
2191 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2192 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2193 # Try a missing one <wink>
2194 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2195 dsn2 = subpart.get_payload(1)
2196 unless(isinstance(dsn2, Message))
2197 eq(dsn2['action'], 'failed')
2198 eq(dsn2.get_params(header='original-recipient'),
2199 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2200 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2201 # Subpart 3 is the original message
2202 subpart = msg.get_payload(2)
2203 eq(subpart.get_content_type(), 'message/rfc822')
2204 payload = subpart.get_payload()
2205 unless(isinstance(payload, list))
2206 eq(len(payload), 1)
2207 subsubpart = payload[0]
2208 unless(isinstance(subsubpart, Message))
2209 eq(subsubpart.get_content_type(), 'text/plain')
2210 eq(subsubpart['message-id'],
2211 '<002001c144a6$8752e060$56104586@oxy.edu>')
2212
2213 def test_epilogue(self):
2214 eq = self.ndiffAssertEqual
2215 with openfile('msg_21.txt') as fp:
2216 text = fp.read()
2217 msg = Message()
2218 msg['From'] = 'aperson@dom.ain'
2219 msg['To'] = 'bperson@dom.ain'
2220 msg['Subject'] = 'Test'
2221 msg.preamble = 'MIME message'
2222 msg.epilogue = 'End of MIME message\n'
2223 msg1 = MIMEText('One')
2224 msg2 = MIMEText('Two')
2225 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2226 msg.attach(msg1)
2227 msg.attach(msg2)
2228 sfp = StringIO()
2229 g = Generator(sfp)
2230 g.flatten(msg)
2231 eq(sfp.getvalue(), text)
2232
2233 def test_no_nl_preamble(self):
2234 eq = self.ndiffAssertEqual
2235 msg = Message()
2236 msg['From'] = 'aperson@dom.ain'
2237 msg['To'] = 'bperson@dom.ain'
2238 msg['Subject'] = 'Test'
2239 msg.preamble = 'MIME message'
2240 msg.epilogue = ''
2241 msg1 = MIMEText('One')
2242 msg2 = MIMEText('Two')
2243 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2244 msg.attach(msg1)
2245 msg.attach(msg2)
2246 eq(msg.as_string(), """\
2247From: aperson@dom.ain
2248To: bperson@dom.ain
2249Subject: Test
2250Content-Type: multipart/mixed; boundary="BOUNDARY"
2251
2252MIME message
2253--BOUNDARY
2254Content-Type: text/plain; charset="us-ascii"
2255MIME-Version: 1.0
2256Content-Transfer-Encoding: 7bit
2257
2258One
2259--BOUNDARY
2260Content-Type: text/plain; charset="us-ascii"
2261MIME-Version: 1.0
2262Content-Transfer-Encoding: 7bit
2263
2264Two
2265--BOUNDARY--
2266""")
2267
2268 def test_default_type(self):
2269 eq = self.assertEqual
2270 with openfile('msg_30.txt') as fp:
2271 msg = email.message_from_file(fp)
2272 container1 = msg.get_payload(0)
2273 eq(container1.get_default_type(), 'message/rfc822')
2274 eq(container1.get_content_type(), 'message/rfc822')
2275 container2 = msg.get_payload(1)
2276 eq(container2.get_default_type(), 'message/rfc822')
2277 eq(container2.get_content_type(), 'message/rfc822')
2278 container1a = container1.get_payload(0)
2279 eq(container1a.get_default_type(), 'text/plain')
2280 eq(container1a.get_content_type(), 'text/plain')
2281 container2a = container2.get_payload(0)
2282 eq(container2a.get_default_type(), 'text/plain')
2283 eq(container2a.get_content_type(), 'text/plain')
2284
2285 def test_default_type_with_explicit_container_type(self):
2286 eq = self.assertEqual
2287 with openfile('msg_28.txt') as fp:
2288 msg = email.message_from_file(fp)
2289 container1 = msg.get_payload(0)
2290 eq(container1.get_default_type(), 'message/rfc822')
2291 eq(container1.get_content_type(), 'message/rfc822')
2292 container2 = msg.get_payload(1)
2293 eq(container2.get_default_type(), 'message/rfc822')
2294 eq(container2.get_content_type(), 'message/rfc822')
2295 container1a = container1.get_payload(0)
2296 eq(container1a.get_default_type(), 'text/plain')
2297 eq(container1a.get_content_type(), 'text/plain')
2298 container2a = container2.get_payload(0)
2299 eq(container2a.get_default_type(), 'text/plain')
2300 eq(container2a.get_content_type(), 'text/plain')
2301
2302 def test_default_type_non_parsed(self):
2303 eq = self.assertEqual
2304 neq = self.ndiffAssertEqual
2305 # Set up container
2306 container = MIMEMultipart('digest', 'BOUNDARY')
2307 container.epilogue = ''
2308 # Set up subparts
2309 subpart1a = MIMEText('message 1\n')
2310 subpart2a = MIMEText('message 2\n')
2311 subpart1 = MIMEMessage(subpart1a)
2312 subpart2 = MIMEMessage(subpart2a)
2313 container.attach(subpart1)
2314 container.attach(subpart2)
2315 eq(subpart1.get_content_type(), 'message/rfc822')
2316 eq(subpart1.get_default_type(), 'message/rfc822')
2317 eq(subpart2.get_content_type(), 'message/rfc822')
2318 eq(subpart2.get_default_type(), 'message/rfc822')
2319 neq(container.as_string(0), '''\
2320Content-Type: multipart/digest; boundary="BOUNDARY"
2321MIME-Version: 1.0
2322
2323--BOUNDARY
2324Content-Type: message/rfc822
2325MIME-Version: 1.0
2326
2327Content-Type: text/plain; charset="us-ascii"
2328MIME-Version: 1.0
2329Content-Transfer-Encoding: 7bit
2330
2331message 1
2332
2333--BOUNDARY
2334Content-Type: message/rfc822
2335MIME-Version: 1.0
2336
2337Content-Type: text/plain; charset="us-ascii"
2338MIME-Version: 1.0
2339Content-Transfer-Encoding: 7bit
2340
2341message 2
2342
2343--BOUNDARY--
2344''')
2345 del subpart1['content-type']
2346 del subpart1['mime-version']
2347 del subpart2['content-type']
2348 del subpart2['mime-version']
2349 eq(subpart1.get_content_type(), 'message/rfc822')
2350 eq(subpart1.get_default_type(), 'message/rfc822')
2351 eq(subpart2.get_content_type(), 'message/rfc822')
2352 eq(subpart2.get_default_type(), 'message/rfc822')
2353 neq(container.as_string(0), '''\
2354Content-Type: multipart/digest; boundary="BOUNDARY"
2355MIME-Version: 1.0
2356
2357--BOUNDARY
2358
2359Content-Type: text/plain; charset="us-ascii"
2360MIME-Version: 1.0
2361Content-Transfer-Encoding: 7bit
2362
2363message 1
2364
2365--BOUNDARY
2366
2367Content-Type: text/plain; charset="us-ascii"
2368MIME-Version: 1.0
2369Content-Transfer-Encoding: 7bit
2370
2371message 2
2372
2373--BOUNDARY--
2374''')
2375
2376 def test_mime_attachments_in_constructor(self):
2377 eq = self.assertEqual
2378 text1 = MIMEText('')
2379 text2 = MIMEText('')
2380 msg = MIMEMultipart(_subparts=(text1, text2))
2381 eq(len(msg.get_payload()), 2)
2382 eq(msg.get_payload(0), text1)
2383 eq(msg.get_payload(1), text2)
2384
Christian Heimes587c2bf2008-01-19 16:21:02 +00002385 def test_default_multipart_constructor(self):
2386 msg = MIMEMultipart()
2387 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002388
Ezio Melottib3aedd42010-11-20 19:04:17 +00002389
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002390# A general test of parser->model->generator idempotency. IOW, read a message
2391# in, parse it into a message object tree, then without touching the tree,
2392# regenerate the plain text. The original text and the transformed text
2393# should be identical. Note: that we ignore the Unix-From since that may
2394# contain a changed date.
2395class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002396
2397 linesep = '\n'
2398
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002399 def _msgobj(self, filename):
2400 with openfile(filename) as fp:
2401 data = fp.read()
2402 msg = email.message_from_string(data)
2403 return msg, data
2404
R. David Murray719a4492010-11-21 16:53:48 +00002405 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002406 eq = self.ndiffAssertEqual
2407 s = StringIO()
2408 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002409 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002410 eq(text, s.getvalue())
2411
2412 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002413 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 msg, text = self._msgobj('msg_01.txt')
2415 eq(msg.get_content_type(), 'text/plain')
2416 eq(msg.get_content_maintype(), 'text')
2417 eq(msg.get_content_subtype(), 'plain')
2418 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2419 eq(msg.get_param('charset'), 'us-ascii')
2420 eq(msg.preamble, None)
2421 eq(msg.epilogue, None)
2422 self._idempotent(msg, text)
2423
2424 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002425 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002426 msg, text = self._msgobj('msg_03.txt')
2427 eq(msg.get_content_type(), 'text/plain')
2428 eq(msg.get_params(), None)
2429 eq(msg.get_param('charset'), None)
2430 self._idempotent(msg, text)
2431
2432 def test_simple_multipart(self):
2433 msg, text = self._msgobj('msg_04.txt')
2434 self._idempotent(msg, text)
2435
2436 def test_MIME_digest(self):
2437 msg, text = self._msgobj('msg_02.txt')
2438 self._idempotent(msg, text)
2439
2440 def test_long_header(self):
2441 msg, text = self._msgobj('msg_27.txt')
2442 self._idempotent(msg, text)
2443
2444 def test_MIME_digest_with_part_headers(self):
2445 msg, text = self._msgobj('msg_28.txt')
2446 self._idempotent(msg, text)
2447
2448 def test_mixed_with_image(self):
2449 msg, text = self._msgobj('msg_06.txt')
2450 self._idempotent(msg, text)
2451
2452 def test_multipart_report(self):
2453 msg, text = self._msgobj('msg_05.txt')
2454 self._idempotent(msg, text)
2455
2456 def test_dsn(self):
2457 msg, text = self._msgobj('msg_16.txt')
2458 self._idempotent(msg, text)
2459
2460 def test_preamble_epilogue(self):
2461 msg, text = self._msgobj('msg_21.txt')
2462 self._idempotent(msg, text)
2463
2464 def test_multipart_one_part(self):
2465 msg, text = self._msgobj('msg_23.txt')
2466 self._idempotent(msg, text)
2467
2468 def test_multipart_no_parts(self):
2469 msg, text = self._msgobj('msg_24.txt')
2470 self._idempotent(msg, text)
2471
2472 def test_no_start_boundary(self):
2473 msg, text = self._msgobj('msg_31.txt')
2474 self._idempotent(msg, text)
2475
2476 def test_rfc2231_charset(self):
2477 msg, text = self._msgobj('msg_32.txt')
2478 self._idempotent(msg, text)
2479
2480 def test_more_rfc2231_parameters(self):
2481 msg, text = self._msgobj('msg_33.txt')
2482 self._idempotent(msg, text)
2483
2484 def test_text_plain_in_a_multipart_digest(self):
2485 msg, text = self._msgobj('msg_34.txt')
2486 self._idempotent(msg, text)
2487
2488 def test_nested_multipart_mixeds(self):
2489 msg, text = self._msgobj('msg_12a.txt')
2490 self._idempotent(msg, text)
2491
2492 def test_message_external_body_idempotent(self):
2493 msg, text = self._msgobj('msg_36.txt')
2494 self._idempotent(msg, text)
2495
R. David Murray719a4492010-11-21 16:53:48 +00002496 def test_message_delivery_status(self):
2497 msg, text = self._msgobj('msg_43.txt')
2498 self._idempotent(msg, text, unixfrom=True)
2499
R. David Murray96fd54e2010-10-08 15:55:28 +00002500 def test_message_signed_idempotent(self):
2501 msg, text = self._msgobj('msg_45.txt')
2502 self._idempotent(msg, text)
2503
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002504 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002505 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002506 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002507 # Get a message object and reset the seek pointer for other tests
2508 msg, text = self._msgobj('msg_05.txt')
2509 eq(msg.get_content_type(), 'multipart/report')
2510 # Test the Content-Type: parameters
2511 params = {}
2512 for pk, pv in msg.get_params():
2513 params[pk] = pv
2514 eq(params['report-type'], 'delivery-status')
2515 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002516 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2517 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518 eq(len(msg.get_payload()), 3)
2519 # Make sure the subparts are what we expect
2520 msg1 = msg.get_payload(0)
2521 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002522 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523 msg2 = msg.get_payload(1)
2524 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002525 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 msg3 = msg.get_payload(2)
2527 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002528 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002529 payload = msg3.get_payload()
2530 unless(isinstance(payload, list))
2531 eq(len(payload), 1)
2532 msg4 = payload[0]
2533 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002534 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002535
2536 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002537 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002538 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002539 msg, text = self._msgobj('msg_06.txt')
2540 # Check some of the outer headers
2541 eq(msg.get_content_type(), 'message/rfc822')
2542 # Make sure the payload is a list of exactly one sub-Message, and that
2543 # that submessage has a type of text/plain
2544 payload = msg.get_payload()
2545 unless(isinstance(payload, list))
2546 eq(len(payload), 1)
2547 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002548 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002549 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002550 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002551 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002552
2553
Ezio Melottib3aedd42010-11-20 19:04:17 +00002554
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555# Test various other bits of the package's functionality
2556class TestMiscellaneous(TestEmailBase):
2557 def test_message_from_string(self):
2558 with openfile('msg_01.txt') as fp:
2559 text = fp.read()
2560 msg = email.message_from_string(text)
2561 s = StringIO()
2562 # Don't wrap/continue long headers since we're trying to test
2563 # idempotency.
2564 g = Generator(s, maxheaderlen=0)
2565 g.flatten(msg)
2566 self.assertEqual(text, s.getvalue())
2567
2568 def test_message_from_file(self):
2569 with openfile('msg_01.txt') as fp:
2570 text = fp.read()
2571 fp.seek(0)
2572 msg = email.message_from_file(fp)
2573 s = StringIO()
2574 # Don't wrap/continue long headers since we're trying to test
2575 # idempotency.
2576 g = Generator(s, maxheaderlen=0)
2577 g.flatten(msg)
2578 self.assertEqual(text, s.getvalue())
2579
2580 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002581 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002582 with openfile('msg_01.txt') as fp:
2583 text = fp.read()
2584
2585 # Create a subclass
2586 class MyMessage(Message):
2587 pass
2588
2589 msg = email.message_from_string(text, MyMessage)
2590 unless(isinstance(msg, MyMessage))
2591 # Try something more complicated
2592 with openfile('msg_02.txt') as fp:
2593 text = fp.read()
2594 msg = email.message_from_string(text, MyMessage)
2595 for subpart in msg.walk():
2596 unless(isinstance(subpart, MyMessage))
2597
2598 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002599 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002600 # Create a subclass
2601 class MyMessage(Message):
2602 pass
2603
2604 with openfile('msg_01.txt') as fp:
2605 msg = email.message_from_file(fp, MyMessage)
2606 unless(isinstance(msg, MyMessage))
2607 # Try something more complicated
2608 with openfile('msg_02.txt') as fp:
2609 msg = email.message_from_file(fp, MyMessage)
2610 for subpart in msg.walk():
2611 unless(isinstance(subpart, MyMessage))
2612
2613 def test__all__(self):
2614 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002615 self.assertEqual(sorted(module.__all__), [
2616 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2617 'generator', 'header', 'iterators', 'message',
2618 'message_from_binary_file', 'message_from_bytes',
2619 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002620 'quoprimime', 'utils',
2621 ])
2622
2623 def test_formatdate(self):
2624 now = time.time()
2625 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2626 time.gmtime(now)[:6])
2627
2628 def test_formatdate_localtime(self):
2629 now = time.time()
2630 self.assertEqual(
2631 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2632 time.localtime(now)[:6])
2633
2634 def test_formatdate_usegmt(self):
2635 now = time.time()
2636 self.assertEqual(
2637 utils.formatdate(now, localtime=False),
2638 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2639 self.assertEqual(
2640 utils.formatdate(now, localtime=False, usegmt=True),
2641 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2642
2643 def test_parsedate_none(self):
2644 self.assertEqual(utils.parsedate(''), None)
2645
2646 def test_parsedate_compact(self):
2647 # The FWS after the comma is optional
2648 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2649 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2650
2651 def test_parsedate_no_dayofweek(self):
2652 eq = self.assertEqual
2653 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2654 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2655
2656 def test_parsedate_compact_no_dayofweek(self):
2657 eq = self.assertEqual
2658 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2659 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2660
R. David Murray4a62e892010-12-23 20:35:46 +00002661 def test_parsedate_no_space_before_positive_offset(self):
2662 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2663 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2664
2665 def test_parsedate_no_space_before_negative_offset(self):
2666 # Issue 1155362: we already handled '+' for this case.
2667 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2668 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2669
2670
R David Murrayaccd1c02011-03-13 20:06:23 -04002671 def test_parsedate_accepts_time_with_dots(self):
2672 eq = self.assertEqual
2673 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2674 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2675 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2676 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2677
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002678 def test_parsedate_acceptable_to_time_functions(self):
2679 eq = self.assertEqual
2680 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2681 t = int(time.mktime(timetup))
2682 eq(time.localtime(t)[:6], timetup[:6])
2683 eq(int(time.strftime('%Y', timetup)), 2003)
2684 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2685 t = int(time.mktime(timetup[:9]))
2686 eq(time.localtime(t)[:6], timetup[:6])
2687 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2688
R. David Murray219d1c82010-08-25 00:45:55 +00002689 def test_parsedate_y2k(self):
2690 """Test for parsing a date with a two-digit year.
2691
2692 Parsing a date with a two-digit year should return the correct
2693 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2694 obsoletes RFC822) requires four-digit years.
2695
2696 """
2697 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2698 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2699 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2700 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2701
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002702 def test_parseaddr_empty(self):
2703 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2704 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2705
2706 def test_noquote_dump(self):
2707 self.assertEqual(
2708 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2709 'A Silly Person <person@dom.ain>')
2710
2711 def test_escape_dump(self):
2712 self.assertEqual(
2713 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002714 r'"A (Very) Silly Person" <person@dom.ain>')
2715 self.assertEqual(
2716 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2717 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002718 a = r'A \(Special\) Person'
2719 b = 'person@dom.ain'
2720 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2721
2722 def test_escape_backslashes(self):
2723 self.assertEqual(
2724 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2725 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2726 a = r'Arthur \Backslash\ Foobar'
2727 b = 'person@dom.ain'
2728 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2729
R David Murray8debacb2011-04-06 09:35:57 -04002730 def test_quotes_unicode_names(self):
2731 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2732 name = "H\u00e4ns W\u00fcrst"
2733 addr = 'person@dom.ain'
2734 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2735 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2736 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2737 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2738 latin1_quopri)
2739
2740 def test_accepts_any_charset_like_object(self):
2741 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2742 name = "H\u00e4ns W\u00fcrst"
2743 addr = 'person@dom.ain'
2744 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2745 foobar = "FOOBAR"
2746 class CharsetMock:
2747 def header_encode(self, string):
2748 return foobar
2749 mock = CharsetMock()
2750 mock_expected = "%s <%s>" % (foobar, addr)
2751 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2752 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2753 utf8_base64)
2754
2755 def test_invalid_charset_like_object_raises_error(self):
2756 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2757 name = "H\u00e4ns W\u00fcrst"
2758 addr = 'person@dom.ain'
2759 # A object without a header_encode method:
2760 bad_charset = object()
2761 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2762 bad_charset)
2763
2764 def test_unicode_address_raises_error(self):
2765 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2766 addr = 'pers\u00f6n@dom.in'
2767 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2768 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2769
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002770 def test_name_with_dot(self):
2771 x = 'John X. Doe <jxd@example.com>'
2772 y = '"John X. Doe" <jxd@example.com>'
2773 a, b = ('John X. Doe', 'jxd@example.com')
2774 self.assertEqual(utils.parseaddr(x), (a, b))
2775 self.assertEqual(utils.parseaddr(y), (a, b))
2776 # formataddr() quotes the name if there's a dot in it
2777 self.assertEqual(utils.formataddr((a, b)), y)
2778
R. David Murray5397e862010-10-02 15:58:26 +00002779 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2780 # issue 10005. Note that in the third test the second pair of
2781 # backslashes is not actually a quoted pair because it is not inside a
2782 # comment or quoted string: the address being parsed has a quoted
2783 # string containing a quoted backslash, followed by 'example' and two
2784 # backslashes, followed by another quoted string containing a space and
2785 # the word 'example'. parseaddr copies those two backslashes
2786 # literally. Per rfc5322 this is not technically correct since a \ may
2787 # not appear in an address outside of a quoted string. It is probably
2788 # a sensible Postel interpretation, though.
2789 eq = self.assertEqual
2790 eq(utils.parseaddr('""example" example"@example.com'),
2791 ('', '""example" example"@example.com'))
2792 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2793 ('', '"\\"example\\" example"@example.com'))
2794 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2795 ('', '"\\\\"example\\\\" example"@example.com'))
2796
R. David Murray63563cd2010-12-18 18:25:38 +00002797 def test_parseaddr_preserves_spaces_in_local_part(self):
2798 # issue 9286. A normal RFC5322 local part should not contain any
2799 # folding white space, but legacy local parts can (they are a sequence
2800 # of atoms, not dotatoms). On the other hand we strip whitespace from
2801 # before the @ and around dots, on the assumption that the whitespace
2802 # around the punctuation is a mistake in what would otherwise be
2803 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2804 self.assertEqual(('', "merwok wok@xample.com"),
2805 utils.parseaddr("merwok wok@xample.com"))
2806 self.assertEqual(('', "merwok wok@xample.com"),
2807 utils.parseaddr("merwok wok@xample.com"))
2808 self.assertEqual(('', "merwok wok@xample.com"),
2809 utils.parseaddr(" merwok wok @xample.com"))
2810 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2811 utils.parseaddr('merwok"wok" wok@xample.com'))
2812 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2813 utils.parseaddr('merwok. wok . wok@xample.com'))
2814
R David Murrayb53319f2012-03-14 15:31:47 -04002815 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2816 addr = ("'foo@example.com' (foo@example.com)",
2817 'foo@example.com')
2818 addrstr = ('"\'foo@example.com\' '
2819 '(foo@example.com)" <foo@example.com>')
2820 self.assertEqual(utils.parseaddr(addrstr), addr)
2821 self.assertEqual(utils.formataddr(addr), addrstr)
2822
2823
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002824 def test_multiline_from_comment(self):
2825 x = """\
2826Foo
2827\tBar <foo@example.com>"""
2828 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2829
2830 def test_quote_dump(self):
2831 self.assertEqual(
2832 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2833 r'"A Silly; Person" <person@dom.ain>')
2834
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002835 def test_charset_richcomparisons(self):
2836 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002837 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002838 cset1 = Charset()
2839 cset2 = Charset()
2840 eq(cset1, 'us-ascii')
2841 eq(cset1, 'US-ASCII')
2842 eq(cset1, 'Us-AsCiI')
2843 eq('us-ascii', cset1)
2844 eq('US-ASCII', cset1)
2845 eq('Us-AsCiI', cset1)
2846 ne(cset1, 'usascii')
2847 ne(cset1, 'USASCII')
2848 ne(cset1, 'UsAsCiI')
2849 ne('usascii', cset1)
2850 ne('USASCII', cset1)
2851 ne('UsAsCiI', cset1)
2852 eq(cset1, cset2)
2853 eq(cset2, cset1)
2854
2855 def test_getaddresses(self):
2856 eq = self.assertEqual
2857 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2858 'Bud Person <bperson@dom.ain>']),
2859 [('Al Person', 'aperson@dom.ain'),
2860 ('Bud Person', 'bperson@dom.ain')])
2861
2862 def test_getaddresses_nasty(self):
2863 eq = self.assertEqual
2864 eq(utils.getaddresses(['foo: ;']), [('', '')])
2865 eq(utils.getaddresses(
2866 ['[]*-- =~$']),
2867 [('', ''), ('', ''), ('', '*--')])
2868 eq(utils.getaddresses(
2869 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2870 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2871
2872 def test_getaddresses_embedded_comment(self):
2873 """Test proper handling of a nested comment"""
2874 eq = self.assertEqual
2875 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2876 eq(addrs[0][1], 'foo@bar.com')
2877
2878 def test_utils_quote_unquote(self):
2879 eq = self.assertEqual
2880 msg = Message()
2881 msg.add_header('content-disposition', 'attachment',
2882 filename='foo\\wacky"name')
2883 eq(msg.get_filename(), 'foo\\wacky"name')
2884
2885 def test_get_body_encoding_with_bogus_charset(self):
2886 charset = Charset('not a charset')
2887 self.assertEqual(charset.get_body_encoding(), 'base64')
2888
2889 def test_get_body_encoding_with_uppercase_charset(self):
2890 eq = self.assertEqual
2891 msg = Message()
2892 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2893 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2894 charsets = msg.get_charsets()
2895 eq(len(charsets), 1)
2896 eq(charsets[0], 'utf-8')
2897 charset = Charset(charsets[0])
2898 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002899 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002900 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2901 eq(msg.get_payload(decode=True), b'hello world')
2902 eq(msg['content-transfer-encoding'], 'base64')
2903 # Try another one
2904 msg = Message()
2905 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2906 charsets = msg.get_charsets()
2907 eq(len(charsets), 1)
2908 eq(charsets[0], 'us-ascii')
2909 charset = Charset(charsets[0])
2910 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2911 msg.set_payload('hello world', charset=charset)
2912 eq(msg.get_payload(), 'hello world')
2913 eq(msg['content-transfer-encoding'], '7bit')
2914
2915 def test_charsets_case_insensitive(self):
2916 lc = Charset('us-ascii')
2917 uc = Charset('US-ASCII')
2918 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2919
2920 def test_partial_falls_inside_message_delivery_status(self):
2921 eq = self.ndiffAssertEqual
2922 # The Parser interface provides chunks of data to FeedParser in 8192
2923 # byte gulps. SF bug #1076485 found one of those chunks inside
2924 # message/delivery-status header block, which triggered an
2925 # unreadline() of NeedMoreData.
2926 msg = self._msgobj('msg_43.txt')
2927 sfp = StringIO()
2928 iterators._structure(msg, sfp)
2929 eq(sfp.getvalue(), """\
2930multipart/report
2931 text/plain
2932 message/delivery-status
2933 text/plain
2934 text/plain
2935 text/plain
2936 text/plain
2937 text/plain
2938 text/plain
2939 text/plain
2940 text/plain
2941 text/plain
2942 text/plain
2943 text/plain
2944 text/plain
2945 text/plain
2946 text/plain
2947 text/plain
2948 text/plain
2949 text/plain
2950 text/plain
2951 text/plain
2952 text/plain
2953 text/plain
2954 text/plain
2955 text/plain
2956 text/plain
2957 text/plain
2958 text/plain
2959 text/rfc822-headers
2960""")
2961
R. David Murraya0b44b52010-12-02 21:47:19 +00002962 def test_make_msgid_domain(self):
2963 self.assertEqual(
2964 email.utils.make_msgid(domain='testdomain-string')[-19:],
2965 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002966
Ezio Melottib3aedd42010-11-20 19:04:17 +00002967
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002968# Test the iterator/generators
2969class TestIterators(TestEmailBase):
2970 def test_body_line_iterator(self):
2971 eq = self.assertEqual
2972 neq = self.ndiffAssertEqual
2973 # First a simple non-multipart message
2974 msg = self._msgobj('msg_01.txt')
2975 it = iterators.body_line_iterator(msg)
2976 lines = list(it)
2977 eq(len(lines), 6)
2978 neq(EMPTYSTRING.join(lines), msg.get_payload())
2979 # Now a more complicated multipart
2980 msg = self._msgobj('msg_02.txt')
2981 it = iterators.body_line_iterator(msg)
2982 lines = list(it)
2983 eq(len(lines), 43)
2984 with openfile('msg_19.txt') as fp:
2985 neq(EMPTYSTRING.join(lines), fp.read())
2986
2987 def test_typed_subpart_iterator(self):
2988 eq = self.assertEqual
2989 msg = self._msgobj('msg_04.txt')
2990 it = iterators.typed_subpart_iterator(msg, 'text')
2991 lines = []
2992 subparts = 0
2993 for subpart in it:
2994 subparts += 1
2995 lines.append(subpart.get_payload())
2996 eq(subparts, 2)
2997 eq(EMPTYSTRING.join(lines), """\
2998a simple kind of mirror
2999to reflect upon our own
3000a simple kind of mirror
3001to reflect upon our own
3002""")
3003
3004 def test_typed_subpart_iterator_default_type(self):
3005 eq = self.assertEqual
3006 msg = self._msgobj('msg_03.txt')
3007 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3008 lines = []
3009 subparts = 0
3010 for subpart in it:
3011 subparts += 1
3012 lines.append(subpart.get_payload())
3013 eq(subparts, 1)
3014 eq(EMPTYSTRING.join(lines), """\
3015
3016Hi,
3017
3018Do you like this message?
3019
3020-Me
3021""")
3022
R. David Murray45bf773f2010-07-17 01:19:57 +00003023 def test_pushCR_LF(self):
3024 '''FeedParser BufferedSubFile.push() assumed it received complete
3025 line endings. A CR ending one push() followed by a LF starting
3026 the next push() added an empty line.
3027 '''
3028 imt = [
3029 ("a\r \n", 2),
3030 ("b", 0),
3031 ("c\n", 1),
3032 ("", 0),
3033 ("d\r\n", 1),
3034 ("e\r", 0),
3035 ("\nf", 1),
3036 ("\r\n", 1),
3037 ]
3038 from email.feedparser import BufferedSubFile, NeedMoreData
3039 bsf = BufferedSubFile()
3040 om = []
3041 nt = 0
3042 for il, n in imt:
3043 bsf.push(il)
3044 nt += n
3045 n1 = 0
3046 while True:
3047 ol = bsf.readline()
3048 if ol == NeedMoreData:
3049 break
3050 om.append(ol)
3051 n1 += 1
3052 self.assertTrue(n == n1)
3053 self.assertTrue(len(om) == nt)
3054 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003056
Ezio Melottib3aedd42010-11-20 19:04:17 +00003057
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003058class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003059
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003060 def test_header_parser(self):
3061 eq = self.assertEqual
3062 # Parse only the headers of a complex multipart MIME document
3063 with openfile('msg_02.txt') as fp:
3064 msg = HeaderParser().parse(fp)
3065 eq(msg['from'], 'ppp-request@zzz.org')
3066 eq(msg['to'], 'ppp@zzz.org')
3067 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003068 self.assertFalse(msg.is_multipart())
3069 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003070
R David Murrayb35c8502011-04-13 16:46:05 -04003071 def test_bytes_header_parser(self):
3072 eq = self.assertEqual
3073 # Parse only the headers of a complex multipart MIME document
3074 with openfile('msg_02.txt', 'rb') as fp:
3075 msg = email.parser.BytesHeaderParser().parse(fp)
3076 eq(msg['from'], 'ppp-request@zzz.org')
3077 eq(msg['to'], 'ppp@zzz.org')
3078 eq(msg.get_content_type(), 'multipart/mixed')
3079 self.assertFalse(msg.is_multipart())
3080 self.assertTrue(isinstance(msg.get_payload(), str))
3081 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3082
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083 def test_whitespace_continuation(self):
3084 eq = self.assertEqual
3085 # This message contains a line after the Subject: header that has only
3086 # whitespace, but it is not empty!
3087 msg = email.message_from_string("""\
3088From: aperson@dom.ain
3089To: bperson@dom.ain
3090Subject: the next line has a space on it
3091\x20
3092Date: Mon, 8 Apr 2002 15:09:19 -0400
3093Message-ID: spam
3094
3095Here's the message body
3096""")
3097 eq(msg['subject'], 'the next line has a space on it\n ')
3098 eq(msg['message-id'], 'spam')
3099 eq(msg.get_payload(), "Here's the message body\n")
3100
3101 def test_whitespace_continuation_last_header(self):
3102 eq = self.assertEqual
3103 # Like the previous test, but the subject line is the last
3104 # header.
3105 msg = email.message_from_string("""\
3106From: aperson@dom.ain
3107To: bperson@dom.ain
3108Date: Mon, 8 Apr 2002 15:09:19 -0400
3109Message-ID: spam
3110Subject: the next line has a space on it
3111\x20
3112
3113Here's the message body
3114""")
3115 eq(msg['subject'], 'the next line has a space on it\n ')
3116 eq(msg['message-id'], 'spam')
3117 eq(msg.get_payload(), "Here's the message body\n")
3118
3119 def test_crlf_separation(self):
3120 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003121 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003122 msg = Parser().parse(fp)
3123 eq(len(msg.get_payload()), 2)
3124 part1 = msg.get_payload(0)
3125 eq(part1.get_content_type(), 'text/plain')
3126 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3127 part2 = msg.get_payload(1)
3128 eq(part2.get_content_type(), 'application/riscos')
3129
R. David Murray8451c4b2010-10-23 22:19:56 +00003130 def test_crlf_flatten(self):
3131 # Using newline='\n' preserves the crlfs in this input file.
3132 with openfile('msg_26.txt', newline='\n') as fp:
3133 text = fp.read()
3134 msg = email.message_from_string(text)
3135 s = StringIO()
3136 g = Generator(s)
3137 g.flatten(msg, linesep='\r\n')
3138 self.assertEqual(s.getvalue(), text)
3139
R David Murray3edd22a2011-04-18 13:59:37 -04003140 def test_crlf_control_via_policy(self):
3141 with openfile('msg_26.txt', newline='\n') as fp:
3142 text = fp.read()
3143 msg = email.message_from_string(text)
3144 s = StringIO()
3145 g = email.generator.Generator(s, policy=email.policy.SMTP)
3146 g.flatten(msg)
3147 self.assertEqual(s.getvalue(), text)
3148
3149 def test_flatten_linesep_overrides_policy(self):
3150 # msg_27 is lf separated
3151 with openfile('msg_27.txt', newline='\n') as fp:
3152 text = fp.read()
3153 msg = email.message_from_string(text)
3154 s = StringIO()
3155 g = email.generator.Generator(s, policy=email.policy.SMTP)
3156 g.flatten(msg, linesep='\n')
3157 self.assertEqual(s.getvalue(), text)
3158
R. David Murray8451c4b2010-10-23 22:19:56 +00003159 maxDiff = None
3160
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003161 def test_multipart_digest_with_extra_mime_headers(self):
3162 eq = self.assertEqual
3163 neq = self.ndiffAssertEqual
3164 with openfile('msg_28.txt') as fp:
3165 msg = email.message_from_file(fp)
3166 # Structure is:
3167 # multipart/digest
3168 # message/rfc822
3169 # text/plain
3170 # message/rfc822
3171 # text/plain
3172 eq(msg.is_multipart(), 1)
3173 eq(len(msg.get_payload()), 2)
3174 part1 = msg.get_payload(0)
3175 eq(part1.get_content_type(), 'message/rfc822')
3176 eq(part1.is_multipart(), 1)
3177 eq(len(part1.get_payload()), 1)
3178 part1a = part1.get_payload(0)
3179 eq(part1a.is_multipart(), 0)
3180 eq(part1a.get_content_type(), 'text/plain')
3181 neq(part1a.get_payload(), 'message 1\n')
3182 # next message/rfc822
3183 part2 = msg.get_payload(1)
3184 eq(part2.get_content_type(), 'message/rfc822')
3185 eq(part2.is_multipart(), 1)
3186 eq(len(part2.get_payload()), 1)
3187 part2a = part2.get_payload(0)
3188 eq(part2a.is_multipart(), 0)
3189 eq(part2a.get_content_type(), 'text/plain')
3190 neq(part2a.get_payload(), 'message 2\n')
3191
3192 def test_three_lines(self):
3193 # A bug report by Andrew McNamara
3194 lines = ['From: Andrew Person <aperson@dom.ain',
3195 'Subject: Test',
3196 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3197 msg = email.message_from_string(NL.join(lines))
3198 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3199
3200 def test_strip_line_feed_and_carriage_return_in_headers(self):
3201 eq = self.assertEqual
3202 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3203 value1 = 'text'
3204 value2 = 'more text'
3205 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3206 value1, value2)
3207 msg = email.message_from_string(m)
3208 eq(msg.get('Header'), value1)
3209 eq(msg.get('Next-Header'), value2)
3210
3211 def test_rfc2822_header_syntax(self):
3212 eq = self.assertEqual
3213 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3214 msg = email.message_from_string(m)
3215 eq(len(msg), 3)
3216 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3217 eq(msg.get_payload(), 'body')
3218
3219 def test_rfc2822_space_not_allowed_in_header(self):
3220 eq = self.assertEqual
3221 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3222 msg = email.message_from_string(m)
3223 eq(len(msg.keys()), 0)
3224
3225 def test_rfc2822_one_character_header(self):
3226 eq = self.assertEqual
3227 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3228 msg = email.message_from_string(m)
3229 headers = msg.keys()
3230 headers.sort()
3231 eq(headers, ['A', 'B', 'CC'])
3232 eq(msg.get_payload(), 'body')
3233
R. David Murray45e0e142010-06-16 02:19:40 +00003234 def test_CRLFLF_at_end_of_part(self):
3235 # issue 5610: feedparser should not eat two chars from body part ending
3236 # with "\r\n\n".
3237 m = (
3238 "From: foo@bar.com\n"
3239 "To: baz\n"
3240 "Mime-Version: 1.0\n"
3241 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3242 "\n"
3243 "--BOUNDARY\n"
3244 "Content-Type: text/plain\n"
3245 "\n"
3246 "body ending with CRLF newline\r\n"
3247 "\n"
3248 "--BOUNDARY--\n"
3249 )
3250 msg = email.message_from_string(m)
3251 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003252
Ezio Melottib3aedd42010-11-20 19:04:17 +00003253
R. David Murray96fd54e2010-10-08 15:55:28 +00003254class Test8BitBytesHandling(unittest.TestCase):
3255 # In Python3 all input is string, but that doesn't work if the actual input
3256 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3257 # decode byte streams using the surrogateescape error handler, and
3258 # reconvert to binary at appropriate places if we detect surrogates. This
3259 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3260 # but it does allow us to parse and preserve them, and to decode body
3261 # parts that use an 8bit CTE.
3262
3263 bodytest_msg = textwrap.dedent("""\
3264 From: foo@bar.com
3265 To: baz
3266 Mime-Version: 1.0
3267 Content-Type: text/plain; charset={charset}
3268 Content-Transfer-Encoding: {cte}
3269
3270 {bodyline}
3271 """)
3272
3273 def test_known_8bit_CTE(self):
3274 m = self.bodytest_msg.format(charset='utf-8',
3275 cte='8bit',
3276 bodyline='pöstal').encode('utf-8')
3277 msg = email.message_from_bytes(m)
3278 self.assertEqual(msg.get_payload(), "pöstal\n")
3279 self.assertEqual(msg.get_payload(decode=True),
3280 "pöstal\n".encode('utf-8'))
3281
3282 def test_unknown_8bit_CTE(self):
3283 m = self.bodytest_msg.format(charset='notavalidcharset',
3284 cte='8bit',
3285 bodyline='pöstal').encode('utf-8')
3286 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003287 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003288 self.assertEqual(msg.get_payload(decode=True),
3289 "pöstal\n".encode('utf-8'))
3290
3291 def test_8bit_in_quopri_body(self):
3292 # This is non-RFC compliant data...without 'decode' the library code
3293 # decodes the body using the charset from the headers, and because the
3294 # source byte really is utf-8 this works. This is likely to fail
3295 # against real dirty data (ie: produce mojibake), but the data is
3296 # invalid anyway so it is as good a guess as any. But this means that
3297 # this test just confirms the current behavior; that behavior is not
3298 # necessarily the best possible behavior. With 'decode' it is
3299 # returning the raw bytes, so that test should be of correct behavior,
3300 # or at least produce the same result that email4 did.
3301 m = self.bodytest_msg.format(charset='utf-8',
3302 cte='quoted-printable',
3303 bodyline='p=C3=B6stál').encode('utf-8')
3304 msg = email.message_from_bytes(m)
3305 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3306 self.assertEqual(msg.get_payload(decode=True),
3307 'pöstál\n'.encode('utf-8'))
3308
3309 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3310 # This is similar to the previous test, but proves that if the 8bit
3311 # byte is undecodeable in the specified charset, it gets replaced
3312 # by the unicode 'unknown' character. Again, this may or may not
3313 # be the ideal behavior. Note that if decode=False none of the
3314 # decoders will get involved, so this is the only test we need
3315 # for this behavior.
3316 m = self.bodytest_msg.format(charset='ascii',
3317 cte='quoted-printable',
3318 bodyline='p=C3=B6stál').encode('utf-8')
3319 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003320 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003321 self.assertEqual(msg.get_payload(decode=True),
3322 'pöstál\n'.encode('utf-8'))
3323
3324 def test_8bit_in_base64_body(self):
3325 # Sticking an 8bit byte in a base64 block makes it undecodable by
3326 # normal means, so the block is returned undecoded, but as bytes.
3327 m = self.bodytest_msg.format(charset='utf-8',
3328 cte='base64',
3329 bodyline='cMO2c3RhbAá=').encode('utf-8')
3330 msg = email.message_from_bytes(m)
3331 self.assertEqual(msg.get_payload(decode=True),
3332 'cMO2c3RhbAá=\n'.encode('utf-8'))
3333
3334 def test_8bit_in_uuencode_body(self):
3335 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3336 # normal means, so the block is returned undecoded, but as bytes.
3337 m = self.bodytest_msg.format(charset='utf-8',
3338 cte='uuencode',
3339 bodyline='<,.V<W1A; á ').encode('utf-8')
3340 msg = email.message_from_bytes(m)
3341 self.assertEqual(msg.get_payload(decode=True),
3342 '<,.V<W1A; á \n'.encode('utf-8'))
3343
3344
R. David Murray92532142011-01-07 23:25:30 +00003345 headertest_headers = (
3346 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3347 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3348 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3349 '\tJean de Baddie',
3350 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3351 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3352 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3353 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3354 )
3355 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3356 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003357
3358 def test_get_8bit_header(self):
3359 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003360 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3361 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003362
3363 def test_print_8bit_headers(self):
3364 msg = email.message_from_bytes(self.headertest_msg)
3365 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003366 textwrap.dedent("""\
3367 From: {}
3368 To: {}
3369 Subject: {}
3370 From: {}
3371
3372 Yes, they are flying.
3373 """).format(*[expected[1] for (_, expected) in
3374 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003375
3376 def test_values_with_8bit_headers(self):
3377 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003378 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003379 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003380 'b\uFFFD\uFFFDz',
3381 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3382 'coll\uFFFD\uFFFDgue, le pouf '
3383 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003384 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003385 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003386
3387 def test_items_with_8bit_headers(self):
3388 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003389 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003390 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003391 ('To', 'b\uFFFD\uFFFDz'),
3392 ('Subject', 'Maintenant je vous '
3393 'pr\uFFFD\uFFFDsente '
3394 'mon coll\uFFFD\uFFFDgue, le pouf '
3395 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3396 '\tJean de Baddie'),
3397 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003398
3399 def test_get_all_with_8bit_headers(self):
3400 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003401 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003402 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003403 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003404
R David Murraya2150232011-03-16 21:11:23 -04003405 def test_get_content_type_with_8bit(self):
3406 msg = email.message_from_bytes(textwrap.dedent("""\
3407 Content-Type: text/pl\xA7in; charset=utf-8
3408 """).encode('latin-1'))
3409 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3410 self.assertEqual(msg.get_content_maintype(), "text")
3411 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3412
3413 def test_get_params_with_8bit(self):
3414 msg = email.message_from_bytes(
3415 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3416 self.assertEqual(msg.get_params(header='x-header'),
3417 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3418 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3419 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3420 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3421
3422 def test_get_rfc2231_params_with_8bit(self):
3423 msg = email.message_from_bytes(textwrap.dedent("""\
3424 Content-Type: text/plain; charset=us-ascii;
3425 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3426 ).encode('latin-1'))
3427 self.assertEqual(msg.get_param('title'),
3428 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3429
3430 def test_set_rfc2231_params_with_8bit(self):
3431 msg = email.message_from_bytes(textwrap.dedent("""\
3432 Content-Type: text/plain; charset=us-ascii;
3433 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3434 ).encode('latin-1'))
3435 msg.set_param('title', 'test')
3436 self.assertEqual(msg.get_param('title'), 'test')
3437
3438 def test_del_rfc2231_params_with_8bit(self):
3439 msg = email.message_from_bytes(textwrap.dedent("""\
3440 Content-Type: text/plain; charset=us-ascii;
3441 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3442 ).encode('latin-1'))
3443 msg.del_param('title')
3444 self.assertEqual(msg.get_param('title'), None)
3445 self.assertEqual(msg.get_content_maintype(), 'text')
3446
3447 def test_get_payload_with_8bit_cte_header(self):
3448 msg = email.message_from_bytes(textwrap.dedent("""\
3449 Content-Transfer-Encoding: b\xa7se64
3450 Content-Type: text/plain; charset=latin-1
3451
3452 payload
3453 """).encode('latin-1'))
3454 self.assertEqual(msg.get_payload(), 'payload\n')
3455 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3456
R. David Murray96fd54e2010-10-08 15:55:28 +00003457 non_latin_bin_msg = textwrap.dedent("""\
3458 From: foo@bar.com
3459 To: báz
3460 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3461 \tJean de Baddie
3462 Mime-Version: 1.0
3463 Content-Type: text/plain; charset="utf-8"
3464 Content-Transfer-Encoding: 8bit
3465
3466 Да, они летят.
3467 """).encode('utf-8')
3468
3469 def test_bytes_generator(self):
3470 msg = email.message_from_bytes(self.non_latin_bin_msg)
3471 out = BytesIO()
3472 email.generator.BytesGenerator(out).flatten(msg)
3473 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3474
R. David Murray7372a072011-01-26 21:21:32 +00003475 def test_bytes_generator_handles_None_body(self):
3476 #Issue 11019
3477 msg = email.message.Message()
3478 out = BytesIO()
3479 email.generator.BytesGenerator(out).flatten(msg)
3480 self.assertEqual(out.getvalue(), b"\n")
3481
R. David Murray92532142011-01-07 23:25:30 +00003482 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003483 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003484 To: =?unknown-8bit?q?b=C3=A1z?=
3485 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3486 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3487 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003488 Mime-Version: 1.0
3489 Content-Type: text/plain; charset="utf-8"
3490 Content-Transfer-Encoding: base64
3491
3492 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3493 """)
3494
3495 def test_generator_handles_8bit(self):
3496 msg = email.message_from_bytes(self.non_latin_bin_msg)
3497 out = StringIO()
3498 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003499 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003500
3501 def test_bytes_generator_with_unix_from(self):
3502 # The unixfrom contains a current date, so we can't check it
3503 # literally. Just make sure the first word is 'From' and the
3504 # rest of the message matches the input.
3505 msg = email.message_from_bytes(self.non_latin_bin_msg)
3506 out = BytesIO()
3507 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3508 lines = out.getvalue().split(b'\n')
3509 self.assertEqual(lines[0].split()[0], b'From')
3510 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3511
R. David Murray92532142011-01-07 23:25:30 +00003512 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3513 non_latin_bin_msg_as7bit[2:4] = [
3514 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3515 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3516 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3517
R. David Murray96fd54e2010-10-08 15:55:28 +00003518 def test_message_from_binary_file(self):
3519 fn = 'test.msg'
3520 self.addCleanup(unlink, fn)
3521 with open(fn, 'wb') as testfile:
3522 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003523 with open(fn, 'rb') as testfile:
3524 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003525 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3526
3527 latin_bin_msg = textwrap.dedent("""\
3528 From: foo@bar.com
3529 To: Dinsdale
3530 Subject: Nudge nudge, wink, wink
3531 Mime-Version: 1.0
3532 Content-Type: text/plain; charset="latin-1"
3533 Content-Transfer-Encoding: 8bit
3534
3535 oh là là, know what I mean, know what I mean?
3536 """).encode('latin-1')
3537
3538 latin_bin_msg_as7bit = textwrap.dedent("""\
3539 From: foo@bar.com
3540 To: Dinsdale
3541 Subject: Nudge nudge, wink, wink
3542 Mime-Version: 1.0
3543 Content-Type: text/plain; charset="iso-8859-1"
3544 Content-Transfer-Encoding: quoted-printable
3545
3546 oh l=E0 l=E0, know what I mean, know what I mean?
3547 """)
3548
3549 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3550 m = email.message_from_bytes(self.latin_bin_msg)
3551 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3552
3553 def test_decoded_generator_emits_unicode_body(self):
3554 m = email.message_from_bytes(self.latin_bin_msg)
3555 out = StringIO()
3556 email.generator.DecodedGenerator(out).flatten(m)
3557 #DecodedHeader output contains an extra blank line compared
3558 #to the input message. RDM: not sure if this is a bug or not,
3559 #but it is not specific to the 8bit->7bit conversion.
3560 self.assertEqual(out.getvalue(),
3561 self.latin_bin_msg.decode('latin-1')+'\n')
3562
3563 def test_bytes_feedparser(self):
3564 bfp = email.feedparser.BytesFeedParser()
3565 for i in range(0, len(self.latin_bin_msg), 10):
3566 bfp.feed(self.latin_bin_msg[i:i+10])
3567 m = bfp.close()
3568 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3569
R. David Murray8451c4b2010-10-23 22:19:56 +00003570 def test_crlf_flatten(self):
3571 with openfile('msg_26.txt', 'rb') as fp:
3572 text = fp.read()
3573 msg = email.message_from_bytes(text)
3574 s = BytesIO()
3575 g = email.generator.BytesGenerator(s)
3576 g.flatten(msg, linesep='\r\n')
3577 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003578
3579 def test_8bit_multipart(self):
3580 # Issue 11605
3581 source = textwrap.dedent("""\
3582 Date: Fri, 18 Mar 2011 17:15:43 +0100
3583 To: foo@example.com
3584 From: foodwatch-Newsletter <bar@example.com>
3585 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3586 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3587 MIME-Version: 1.0
3588 Content-Type: multipart/alternative;
3589 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3590
3591 --b1_76a486bee62b0d200f33dc2ca08220ad
3592 Content-Type: text/plain; charset="utf-8"
3593 Content-Transfer-Encoding: 8bit
3594
3595 Guten Tag, ,
3596
3597 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3598 Nachrichten aus Japan.
3599
3600
3601 --b1_76a486bee62b0d200f33dc2ca08220ad
3602 Content-Type: text/html; charset="utf-8"
3603 Content-Transfer-Encoding: 8bit
3604
3605 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3606 "http://www.w3.org/TR/html4/loose.dtd">
3607 <html lang="de">
3608 <head>
3609 <title>foodwatch - Newsletter</title>
3610 </head>
3611 <body>
3612 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3613 die Nachrichten aus Japan.</p>
3614 </body>
3615 </html>
3616 --b1_76a486bee62b0d200f33dc2ca08220ad--
3617
3618 """).encode('utf-8')
3619 msg = email.message_from_bytes(source)
3620 s = BytesIO()
3621 g = email.generator.BytesGenerator(s)
3622 g.flatten(msg)
3623 self.assertEqual(s.getvalue(), source)
3624
R David Murray9fd170e2012-03-14 14:05:03 -04003625 def test_bytes_generator_b_encoding_linesep(self):
3626 # Issue 14062: b encoding was tacking on an extra \n.
3627 m = Message()
3628 # This has enough non-ascii that it should always end up b encoded.
3629 m['Subject'] = Header('žluťoučký kůň')
3630 s = BytesIO()
3631 g = email.generator.BytesGenerator(s)
3632 g.flatten(m, linesep='\r\n')
3633 self.assertEqual(
3634 s.getvalue(),
3635 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3636
3637 def test_generator_b_encoding_linesep(self):
3638 # Since this broke in ByteGenerator, test Generator for completeness.
3639 m = Message()
3640 # This has enough non-ascii that it should always end up b encoded.
3641 m['Subject'] = Header('žluťoučký kůň')
3642 s = StringIO()
3643 g = email.generator.Generator(s)
3644 g.flatten(m, linesep='\r\n')
3645 self.assertEqual(
3646 s.getvalue(),
3647 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3648
R David Murray3edd22a2011-04-18 13:59:37 -04003649 def test_crlf_control_via_policy(self):
3650 # msg_26 is crlf terminated
3651 with openfile('msg_26.txt', 'rb') as fp:
3652 text = fp.read()
3653 msg = email.message_from_bytes(text)
3654 s = BytesIO()
3655 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3656 g.flatten(msg)
3657 self.assertEqual(s.getvalue(), text)
3658
3659 def test_flatten_linesep_overrides_policy(self):
3660 # msg_27 is lf separated
3661 with openfile('msg_27.txt', 'rb') as fp:
3662 text = fp.read()
3663 msg = email.message_from_bytes(text)
3664 s = BytesIO()
3665 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3666 g.flatten(msg, linesep='\n')
3667 self.assertEqual(s.getvalue(), text)
3668
3669 def test_must_be_7bit_handles_unknown_8bit(self):
3670 msg = email.message_from_bytes(self.non_latin_bin_msg)
3671 out = BytesIO()
3672 g = email.generator.BytesGenerator(out,
3673 policy=email.policy.default.clone(must_be_7bit=True))
3674 g.flatten(msg)
3675 self.assertEqual(out.getvalue(),
3676 self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
3677
3678 def test_must_be_7bit_transforms_8bit_cte(self):
3679 msg = email.message_from_bytes(self.latin_bin_msg)
3680 out = BytesIO()
3681 g = email.generator.BytesGenerator(out,
3682 policy=email.policy.default.clone(must_be_7bit=True))
3683 g.flatten(msg)
3684 self.assertEqual(out.getvalue(),
3685 self.latin_bin_msg_as7bit.encode('ascii'))
3686
R. David Murray8451c4b2010-10-23 22:19:56 +00003687 maxDiff = None
3688
Ezio Melottib3aedd42010-11-20 19:04:17 +00003689
R. David Murray719a4492010-11-21 16:53:48 +00003690class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003691
R. David Murraye5db2632010-11-20 15:10:13 +00003692 maxDiff = None
3693
R. David Murray96fd54e2010-10-08 15:55:28 +00003694 def _msgobj(self, filename):
3695 with openfile(filename, 'rb') as fp:
3696 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003697 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003698 msg = email.message_from_bytes(data)
3699 return msg, data
3700
R. David Murray719a4492010-11-21 16:53:48 +00003701 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003702 b = BytesIO()
3703 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003704 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003705 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003706
3707
R. David Murray719a4492010-11-21 16:53:48 +00003708class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3709 TestIdempotent):
3710 linesep = '\n'
3711 blinesep = b'\n'
3712 normalize_linesep_regex = re.compile(br'\r\n')
3713
3714
3715class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3716 TestIdempotent):
3717 linesep = '\r\n'
3718 blinesep = b'\r\n'
3719 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3720
Ezio Melottib3aedd42010-11-20 19:04:17 +00003721
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722class TestBase64(unittest.TestCase):
3723 def test_len(self):
3724 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003726 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003727 for size in range(15):
3728 if size == 0 : bsize = 0
3729 elif size <= 3 : bsize = 4
3730 elif size <= 6 : bsize = 8
3731 elif size <= 9 : bsize = 12
3732 elif size <= 12: bsize = 16
3733 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003734 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003735
3736 def test_decode(self):
3737 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003738 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003739 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740
3741 def test_encode(self):
3742 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003743 eq(base64mime.body_encode(b''), b'')
3744 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003745 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003746 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003747 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003748 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003749eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3750eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3751eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3752eHh4eCB4eHh4IA==
3753""")
3754 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003755 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003756 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3758eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3759eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3760eHh4eCB4eHh4IA==\r
3761""")
3762
3763 def test_header_encode(self):
3764 eq = self.assertEqual
3765 he = base64mime.header_encode
3766 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003767 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3768 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769 # Test the charset option
3770 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3771 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772
3773
Ezio Melottib3aedd42010-11-20 19:04:17 +00003774
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003775class TestQuopri(unittest.TestCase):
3776 def setUp(self):
3777 # Set of characters (as byte integers) that don't need to be encoded
3778 # in headers.
3779 self.hlit = list(chain(
3780 range(ord('a'), ord('z') + 1),
3781 range(ord('A'), ord('Z') + 1),
3782 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003783 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003784 # Set of characters (as byte integers) that do need to be encoded in
3785 # headers.
3786 self.hnon = [c for c in range(256) if c not in self.hlit]
3787 assert len(self.hlit) + len(self.hnon) == 256
3788 # Set of characters (as byte integers) that don't need to be encoded
3789 # in bodies.
3790 self.blit = list(range(ord(' '), ord('~') + 1))
3791 self.blit.append(ord('\t'))
3792 self.blit.remove(ord('='))
3793 # Set of characters (as byte integers) that do need to be encoded in
3794 # bodies.
3795 self.bnon = [c for c in range(256) if c not in self.blit]
3796 assert len(self.blit) + len(self.bnon) == 256
3797
Guido van Rossum9604e662007-08-30 03:46:43 +00003798 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003800 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003801 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003802 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003803 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003804 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003805
Guido van Rossum9604e662007-08-30 03:46:43 +00003806 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003807 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003808 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003809 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003810 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003811 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003812 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003813
3814 def test_header_quopri_len(self):
3815 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003816 eq(quoprimime.header_length(b'hello'), 5)
3817 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003818 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003819 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003820 # =?xxx?q?...?= means 10 extra characters
3821 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003822 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3823 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003824 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003825 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003826 # =?xxx?q?...?= means 10 extra characters
3827 10)
3828 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003829 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003830 'expected length 1 for %r' % chr(c))
3831 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003832 # Space is special; it's encoded to _
3833 if c == ord(' '):
3834 continue
3835 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003836 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003837 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003838
3839 def test_body_quopri_len(self):
3840 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003841 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003842 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003843 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003844 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003845
3846 def test_quote_unquote_idempotent(self):
3847 for x in range(256):
3848 c = chr(x)
3849 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3850
R David Murrayec1b5b82011-03-23 14:19:05 -04003851 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3852 if charset is None:
3853 encoded_header = quoprimime.header_encode(header)
3854 else:
3855 encoded_header = quoprimime.header_encode(header, charset)
3856 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003857
R David Murraycafd79d2011-03-23 15:25:55 -04003858 def test_header_encode_null(self):
3859 self._test_header_encode(b'', '')
3860
R David Murrayec1b5b82011-03-23 14:19:05 -04003861 def test_header_encode_one_word(self):
3862 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3863
3864 def test_header_encode_two_lines(self):
3865 self._test_header_encode(b'hello\nworld',
3866 '=?iso-8859-1?q?hello=0Aworld?=')
3867
3868 def test_header_encode_non_ascii(self):
3869 self._test_header_encode(b'hello\xc7there',
3870 '=?iso-8859-1?q?hello=C7there?=')
3871
3872 def test_header_encode_alt_charset(self):
3873 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3874 charset='iso-8859-2')
3875
3876 def _test_header_decode(self, encoded_header, expected_decoded_header):
3877 decoded_header = quoprimime.header_decode(encoded_header)
3878 self.assertEqual(decoded_header, expected_decoded_header)
3879
3880 def test_header_decode_null(self):
3881 self._test_header_decode('', '')
3882
3883 def test_header_decode_one_word(self):
3884 self._test_header_decode('hello', 'hello')
3885
3886 def test_header_decode_two_lines(self):
3887 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3888
3889 def test_header_decode_non_ascii(self):
3890 self._test_header_decode('hello=C7there', 'hello\xc7there')
3891
3892 def _test_decode(self, encoded, expected_decoded, eol=None):
3893 if eol is None:
3894 decoded = quoprimime.decode(encoded)
3895 else:
3896 decoded = quoprimime.decode(encoded, eol=eol)
3897 self.assertEqual(decoded, expected_decoded)
3898
3899 def test_decode_null_word(self):
3900 self._test_decode('', '')
3901
3902 def test_decode_null_line_null_word(self):
3903 self._test_decode('\r\n', '\n')
3904
3905 def test_decode_one_word(self):
3906 self._test_decode('hello', 'hello')
3907
3908 def test_decode_one_word_eol(self):
3909 self._test_decode('hello', 'hello', eol='X')
3910
3911 def test_decode_one_line(self):
3912 self._test_decode('hello\r\n', 'hello\n')
3913
3914 def test_decode_one_line_lf(self):
3915 self._test_decode('hello\n', 'hello\n')
3916
R David Murraycafd79d2011-03-23 15:25:55 -04003917 def test_decode_one_line_cr(self):
3918 self._test_decode('hello\r', 'hello\n')
3919
3920 def test_decode_one_line_nl(self):
3921 self._test_decode('hello\n', 'helloX', eol='X')
3922
3923 def test_decode_one_line_crnl(self):
3924 self._test_decode('hello\r\n', 'helloX', eol='X')
3925
R David Murrayec1b5b82011-03-23 14:19:05 -04003926 def test_decode_one_line_one_word(self):
3927 self._test_decode('hello\r\nworld', 'hello\nworld')
3928
3929 def test_decode_one_line_one_word_eol(self):
3930 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3931
3932 def test_decode_two_lines(self):
3933 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3934
R David Murraycafd79d2011-03-23 15:25:55 -04003935 def test_decode_two_lines_eol(self):
3936 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3937
R David Murrayec1b5b82011-03-23 14:19:05 -04003938 def test_decode_one_long_line(self):
3939 self._test_decode('Spam' * 250, 'Spam' * 250)
3940
3941 def test_decode_one_space(self):
3942 self._test_decode(' ', '')
3943
3944 def test_decode_multiple_spaces(self):
3945 self._test_decode(' ' * 5, '')
3946
3947 def test_decode_one_line_trailing_spaces(self):
3948 self._test_decode('hello \r\n', 'hello\n')
3949
3950 def test_decode_two_lines_trailing_spaces(self):
3951 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3952
3953 def test_decode_quoted_word(self):
3954 self._test_decode('=22quoted=20words=22', '"quoted words"')
3955
3956 def test_decode_uppercase_quoting(self):
3957 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3958
3959 def test_decode_lowercase_quoting(self):
3960 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3961
3962 def test_decode_soft_line_break(self):
3963 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3964
3965 def test_decode_false_quoting(self):
3966 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3967
3968 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3969 kwargs = {}
3970 if maxlinelen is None:
3971 # Use body_encode's default.
3972 maxlinelen = 76
3973 else:
3974 kwargs['maxlinelen'] = maxlinelen
3975 if eol is None:
3976 # Use body_encode's default.
3977 eol = '\n'
3978 else:
3979 kwargs['eol'] = eol
3980 encoded_body = quoprimime.body_encode(body, **kwargs)
3981 self.assertEqual(encoded_body, expected_encoded_body)
3982 if eol == '\n' or eol == '\r\n':
3983 # We know how to split the result back into lines, so maxlinelen
3984 # can be checked.
3985 for line in encoded_body.splitlines():
3986 self.assertLessEqual(len(line), maxlinelen)
3987
3988 def test_encode_null(self):
3989 self._test_encode('', '')
3990
3991 def test_encode_null_lines(self):
3992 self._test_encode('\n\n', '\n\n')
3993
3994 def test_encode_one_line(self):
3995 self._test_encode('hello\n', 'hello\n')
3996
3997 def test_encode_one_line_crlf(self):
3998 self._test_encode('hello\r\n', 'hello\n')
3999
4000 def test_encode_one_line_eol(self):
4001 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4002
4003 def test_encode_one_space(self):
4004 self._test_encode(' ', '=20')
4005
4006 def test_encode_one_line_one_space(self):
4007 self._test_encode(' \n', '=20\n')
4008
R David Murrayb938c8c2011-03-24 12:19:26 -04004009# XXX: body_encode() expect strings, but uses ord(char) from these strings
4010# to index into a 256-entry list. For code points above 255, this will fail.
4011# Should there be a check for 8-bit only ord() values in body, or at least
4012# a comment about the expected input?
4013
4014 def test_encode_two_lines_one_space(self):
4015 self._test_encode(' \n \n', '=20\n=20\n')
4016
R David Murrayec1b5b82011-03-23 14:19:05 -04004017 def test_encode_one_word_trailing_spaces(self):
4018 self._test_encode('hello ', 'hello =20')
4019
4020 def test_encode_one_line_trailing_spaces(self):
4021 self._test_encode('hello \n', 'hello =20\n')
4022
4023 def test_encode_one_word_trailing_tab(self):
4024 self._test_encode('hello \t', 'hello =09')
4025
4026 def test_encode_one_line_trailing_tab(self):
4027 self._test_encode('hello \t\n', 'hello =09\n')
4028
4029 def test_encode_trailing_space_before_maxlinelen(self):
4030 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4031
R David Murrayb938c8c2011-03-24 12:19:26 -04004032 def test_encode_trailing_space_at_maxlinelen(self):
4033 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4034
R David Murrayec1b5b82011-03-23 14:19:05 -04004035 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004036 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4037
4038 def test_encode_whitespace_lines(self):
4039 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004040
4041 def test_encode_quoted_equals(self):
4042 self._test_encode('a = b', 'a =3D b')
4043
4044 def test_encode_one_long_string(self):
4045 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4046
4047 def test_encode_one_long_line(self):
4048 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4049
4050 def test_encode_one_very_long_line(self):
4051 self._test_encode('x' * 200 + '\n',
4052 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4053
4054 def test_encode_one_long_line(self):
4055 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4056
4057 def test_encode_shortest_maxlinelen(self):
4058 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004059
R David Murrayb938c8c2011-03-24 12:19:26 -04004060 def test_encode_maxlinelen_too_small(self):
4061 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4062
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004063 def test_encode(self):
4064 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004065 eq(quoprimime.body_encode(''), '')
4066 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004067 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004068 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004069 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004070 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004071xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4072 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4073x xxxx xxxx xxxx xxxx=20""")
4074 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004075 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4076 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004077xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4078 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4079x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004080 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004081one line
4082
4083two line"""), """\
4084one line
4085
4086two line""")
4087
4088
Ezio Melottib3aedd42010-11-20 19:04:17 +00004089
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004090# Test the Charset class
4091class TestCharset(unittest.TestCase):
4092 def tearDown(self):
4093 from email import charset as CharsetModule
4094 try:
4095 del CharsetModule.CHARSETS['fake']
4096 except KeyError:
4097 pass
4098
Guido van Rossum9604e662007-08-30 03:46:43 +00004099 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004100 eq = self.assertEqual
4101 # Make sure us-ascii = no Unicode conversion
4102 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004103 eq(c.header_encode('Hello World!'), 'Hello World!')
4104 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004105 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004106 self.assertRaises(UnicodeError, c.header_encode, s)
4107 c = Charset('utf-8')
4108 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004109
4110 def test_body_encode(self):
4111 eq = self.assertEqual
4112 # Try a charset with QP body encoding
4113 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004114 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004115 # Try a charset with Base64 body encoding
4116 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004117 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004118 # Try a charset with None body encoding
4119 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004120 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004121 # Try the convert argument, where input codec != output codec
4122 c = Charset('euc-jp')
4123 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004124 # XXX FIXME
4125## try:
4126## eq('\x1b$B5FCO;~IW\x1b(B',
4127## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4128## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4129## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4130## except LookupError:
4131## # We probably don't have the Japanese codecs installed
4132## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004133 # Testing SF bug #625509, which we have to fake, since there are no
4134 # built-in encodings where the header encoding is QP but the body
4135 # encoding is not.
4136 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004137 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004138 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004139 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004140
4141 def test_unicode_charset_name(self):
4142 charset = Charset('us-ascii')
4143 self.assertEqual(str(charset), 'us-ascii')
4144 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4145
4146
Ezio Melottib3aedd42010-11-20 19:04:17 +00004147
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004148# Test multilingual MIME headers.
4149class TestHeader(TestEmailBase):
4150 def test_simple(self):
4151 eq = self.ndiffAssertEqual
4152 h = Header('Hello World!')
4153 eq(h.encode(), 'Hello World!')
4154 h.append(' Goodbye World!')
4155 eq(h.encode(), 'Hello World! Goodbye World!')
4156
4157 def test_simple_surprise(self):
4158 eq = self.ndiffAssertEqual
4159 h = Header('Hello World!')
4160 eq(h.encode(), 'Hello World!')
4161 h.append('Goodbye World!')
4162 eq(h.encode(), 'Hello World! Goodbye World!')
4163
4164 def test_header_needs_no_decoding(self):
4165 h = 'no decoding needed'
4166 self.assertEqual(decode_header(h), [(h, None)])
4167
4168 def test_long(self):
4169 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4170 maxlinelen=76)
4171 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004172 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004173
4174 def test_multilingual(self):
4175 eq = self.ndiffAssertEqual
4176 g = Charset("iso-8859-1")
4177 cz = Charset("iso-8859-2")
4178 utf8 = Charset("utf-8")
4179 g_head = (b'Die Mieter treten hier ein werden mit einem '
4180 b'Foerderband komfortabel den Korridor entlang, '
4181 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4182 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4183 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4184 b'd\xf9vtipu.. ')
4185 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4186 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4187 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4188 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4189 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4190 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4191 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4192 '\u3044\u307e\u3059\u3002')
4193 h = Header(g_head, g)
4194 h.append(cz_head, cz)
4195 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004196 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004197 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004198=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4199 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4200 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4201 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004202 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4203 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4204 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4205 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004206 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4207 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4208 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4209 decoded = decode_header(enc)
4210 eq(len(decoded), 3)
4211 eq(decoded[0], (g_head, 'iso-8859-1'))
4212 eq(decoded[1], (cz_head, 'iso-8859-2'))
4213 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004214 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004215 eq(ustr,
4216 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4217 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4218 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4219 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4220 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4221 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4222 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4223 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4224 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4225 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4226 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4227 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4228 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4229 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4230 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4231 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4232 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004233 # Test make_header()
4234 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004235 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004236
4237 def test_empty_header_encode(self):
4238 h = Header()
4239 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004240
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004241 def test_header_ctor_default_args(self):
4242 eq = self.ndiffAssertEqual
4243 h = Header()
4244 eq(h, '')
4245 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004246 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004247
4248 def test_explicit_maxlinelen(self):
4249 eq = self.ndiffAssertEqual
4250 hstr = ('A very long line that must get split to something other '
4251 'than at the 76th character boundary to test the non-default '
4252 'behavior')
4253 h = Header(hstr)
4254 eq(h.encode(), '''\
4255A very long line that must get split to something other than at the 76th
4256 character boundary to test the non-default behavior''')
4257 eq(str(h), hstr)
4258 h = Header(hstr, header_name='Subject')
4259 eq(h.encode(), '''\
4260A very long line that must get split to something other than at the
4261 76th character boundary to test the non-default behavior''')
4262 eq(str(h), hstr)
4263 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4264 eq(h.encode(), hstr)
4265 eq(str(h), hstr)
4266
Guido van Rossum9604e662007-08-30 03:46:43 +00004267 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004268 eq = self.ndiffAssertEqual
4269 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004270 x = 'xxxx ' * 20
4271 h.append(x)
4272 s = h.encode()
4273 eq(s, """\
4274=?iso-8859-1?q?xxx?=
4275 =?iso-8859-1?q?x_?=
4276 =?iso-8859-1?q?xx?=
4277 =?iso-8859-1?q?xx?=
4278 =?iso-8859-1?q?_x?=
4279 =?iso-8859-1?q?xx?=
4280 =?iso-8859-1?q?x_?=
4281 =?iso-8859-1?q?xx?=
4282 =?iso-8859-1?q?xx?=
4283 =?iso-8859-1?q?_x?=
4284 =?iso-8859-1?q?xx?=
4285 =?iso-8859-1?q?x_?=
4286 =?iso-8859-1?q?xx?=
4287 =?iso-8859-1?q?xx?=
4288 =?iso-8859-1?q?_x?=
4289 =?iso-8859-1?q?xx?=
4290 =?iso-8859-1?q?x_?=
4291 =?iso-8859-1?q?xx?=
4292 =?iso-8859-1?q?xx?=
4293 =?iso-8859-1?q?_x?=
4294 =?iso-8859-1?q?xx?=
4295 =?iso-8859-1?q?x_?=
4296 =?iso-8859-1?q?xx?=
4297 =?iso-8859-1?q?xx?=
4298 =?iso-8859-1?q?_x?=
4299 =?iso-8859-1?q?xx?=
4300 =?iso-8859-1?q?x_?=
4301 =?iso-8859-1?q?xx?=
4302 =?iso-8859-1?q?xx?=
4303 =?iso-8859-1?q?_x?=
4304 =?iso-8859-1?q?xx?=
4305 =?iso-8859-1?q?x_?=
4306 =?iso-8859-1?q?xx?=
4307 =?iso-8859-1?q?xx?=
4308 =?iso-8859-1?q?_x?=
4309 =?iso-8859-1?q?xx?=
4310 =?iso-8859-1?q?x_?=
4311 =?iso-8859-1?q?xx?=
4312 =?iso-8859-1?q?xx?=
4313 =?iso-8859-1?q?_x?=
4314 =?iso-8859-1?q?xx?=
4315 =?iso-8859-1?q?x_?=
4316 =?iso-8859-1?q?xx?=
4317 =?iso-8859-1?q?xx?=
4318 =?iso-8859-1?q?_x?=
4319 =?iso-8859-1?q?xx?=
4320 =?iso-8859-1?q?x_?=
4321 =?iso-8859-1?q?xx?=
4322 =?iso-8859-1?q?xx?=
4323 =?iso-8859-1?q?_?=""")
4324 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004325 h = Header(charset='iso-8859-1', maxlinelen=40)
4326 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004327 s = h.encode()
4328 eq(s, """\
4329=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4330 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4331 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4332 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4333 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4334 eq(x, str(make_header(decode_header(s))))
4335
4336 def test_base64_splittable(self):
4337 eq = self.ndiffAssertEqual
4338 h = Header(charset='koi8-r', maxlinelen=20)
4339 x = 'xxxx ' * 20
4340 h.append(x)
4341 s = h.encode()
4342 eq(s, """\
4343=?koi8-r?b?eHh4?=
4344 =?koi8-r?b?eCB4?=
4345 =?koi8-r?b?eHh4?=
4346 =?koi8-r?b?IHh4?=
4347 =?koi8-r?b?eHgg?=
4348 =?koi8-r?b?eHh4?=
4349 =?koi8-r?b?eCB4?=
4350 =?koi8-r?b?eHh4?=
4351 =?koi8-r?b?IHh4?=
4352 =?koi8-r?b?eHgg?=
4353 =?koi8-r?b?eHh4?=
4354 =?koi8-r?b?eCB4?=
4355 =?koi8-r?b?eHh4?=
4356 =?koi8-r?b?IHh4?=
4357 =?koi8-r?b?eHgg?=
4358 =?koi8-r?b?eHh4?=
4359 =?koi8-r?b?eCB4?=
4360 =?koi8-r?b?eHh4?=
4361 =?koi8-r?b?IHh4?=
4362 =?koi8-r?b?eHgg?=
4363 =?koi8-r?b?eHh4?=
4364 =?koi8-r?b?eCB4?=
4365 =?koi8-r?b?eHh4?=
4366 =?koi8-r?b?IHh4?=
4367 =?koi8-r?b?eHgg?=
4368 =?koi8-r?b?eHh4?=
4369 =?koi8-r?b?eCB4?=
4370 =?koi8-r?b?eHh4?=
4371 =?koi8-r?b?IHh4?=
4372 =?koi8-r?b?eHgg?=
4373 =?koi8-r?b?eHh4?=
4374 =?koi8-r?b?eCB4?=
4375 =?koi8-r?b?eHh4?=
4376 =?koi8-r?b?IA==?=""")
4377 eq(x, str(make_header(decode_header(s))))
4378 h = Header(charset='koi8-r', maxlinelen=40)
4379 h.append(x)
4380 s = h.encode()
4381 eq(s, """\
4382=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4383 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4384 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4385 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4386 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4387 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4388 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004389
4390 def test_us_ascii_header(self):
4391 eq = self.assertEqual
4392 s = 'hello'
4393 x = decode_header(s)
4394 eq(x, [('hello', None)])
4395 h = make_header(x)
4396 eq(s, h.encode())
4397
4398 def test_string_charset(self):
4399 eq = self.assertEqual
4400 h = Header()
4401 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004402 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004403
4404## def test_unicode_error(self):
4405## raises = self.assertRaises
4406## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4407## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4408## h = Header()
4409## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4410## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4411## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4412
4413 def test_utf8_shortest(self):
4414 eq = self.assertEqual
4415 h = Header('p\xf6stal', 'utf-8')
4416 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4417 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4418 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4419
4420 def test_bad_8bit_header(self):
4421 raises = self.assertRaises
4422 eq = self.assertEqual
4423 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4424 raises(UnicodeError, Header, x)
4425 h = Header()
4426 raises(UnicodeError, h.append, x)
4427 e = x.decode('utf-8', 'replace')
4428 eq(str(Header(x, errors='replace')), e)
4429 h.append(x, errors='replace')
4430 eq(str(h), e)
4431
R David Murray041015c2011-03-25 15:10:55 -04004432 def test_escaped_8bit_header(self):
4433 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004434 e = x.decode('ascii', 'surrogateescape')
4435 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004436 self.assertEqual(str(h),
4437 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4438 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4439
R David Murraye5e366c2011-06-18 12:57:28 -04004440 def test_header_handles_binary_unknown8bit(self):
4441 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4442 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4443 self.assertEqual(str(h),
4444 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4445 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4446
4447 def test_make_header_handles_binary_unknown8bit(self):
4448 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4449 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4450 h2 = email.header.make_header(email.header.decode_header(h))
4451 self.assertEqual(str(h2),
4452 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4453 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4454
R David Murray041015c2011-03-25 15:10:55 -04004455 def test_modify_returned_list_does_not_change_header(self):
4456 h = Header('test')
4457 chunks = email.header.decode_header(h)
4458 chunks.append(('ascii', 'test2'))
4459 self.assertEqual(str(h), 'test')
4460
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004461 def test_encoded_adjacent_nonencoded(self):
4462 eq = self.assertEqual
4463 h = Header()
4464 h.append('hello', 'iso-8859-1')
4465 h.append('world')
4466 s = h.encode()
4467 eq(s, '=?iso-8859-1?q?hello?= world')
4468 h = make_header(decode_header(s))
4469 eq(h.encode(), s)
4470
4471 def test_whitespace_eater(self):
4472 eq = self.assertEqual
4473 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4474 parts = decode_header(s)
4475 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4476 hdr = make_header(parts)
4477 eq(hdr.encode(),
4478 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4479
4480 def test_broken_base64_header(self):
4481 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004482 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004483 raises(errors.HeaderParseError, decode_header, s)
4484
R. David Murray477efb32011-01-05 01:39:32 +00004485 def test_shift_jis_charset(self):
4486 h = Header('文', charset='shift_jis')
4487 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4488
R David Murrayde912762011-03-16 18:26:23 -04004489 def test_flatten_header_with_no_value(self):
4490 # Issue 11401 (regression from email 4.x) Note that the space after
4491 # the header doesn't reflect the input, but this is also the way
4492 # email 4.x behaved. At some point it would be nice to fix that.
4493 msg = email.message_from_string("EmptyHeader:")
4494 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4495
R David Murray01581ee2011-04-18 10:04:34 -04004496 def test_encode_preserves_leading_ws_on_value(self):
4497 msg = Message()
4498 msg['SomeHeader'] = ' value with leading ws'
4499 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4500
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004501
Ezio Melottib3aedd42010-11-20 19:04:17 +00004502
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004503# Test RFC 2231 header parameters (en/de)coding
4504class TestRFC2231(TestEmailBase):
4505 def test_get_param(self):
4506 eq = self.assertEqual
4507 msg = self._msgobj('msg_29.txt')
4508 eq(msg.get_param('title'),
4509 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4510 eq(msg.get_param('title', unquote=False),
4511 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4512
4513 def test_set_param(self):
4514 eq = self.ndiffAssertEqual
4515 msg = Message()
4516 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4517 charset='us-ascii')
4518 eq(msg.get_param('title'),
4519 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4520 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4521 charset='us-ascii', language='en')
4522 eq(msg.get_param('title'),
4523 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4524 msg = self._msgobj('msg_01.txt')
4525 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4526 charset='us-ascii', language='en')
4527 eq(msg.as_string(maxheaderlen=78), """\
4528Return-Path: <bbb@zzz.org>
4529Delivered-To: bbb@zzz.org
4530Received: by mail.zzz.org (Postfix, from userid 889)
4531\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4532MIME-Version: 1.0
4533Content-Transfer-Encoding: 7bit
4534Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4535From: bbb@ddd.com (John X. Doe)
4536To: bbb@zzz.org
4537Subject: This is a test message
4538Date: Fri, 4 May 2001 14:05:44 -0400
4539Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004540 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004541
4542
4543Hi,
4544
4545Do you like this message?
4546
4547-Me
4548""")
4549
R David Murraya2860e82011-04-16 09:20:30 -04004550 def test_set_param_requote(self):
4551 msg = Message()
4552 msg.set_param('title', 'foo')
4553 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4554 msg.set_param('title', 'bar', requote=False)
4555 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4556 # tspecial is still quoted.
4557 msg.set_param('title', "(bar)bell", requote=False)
4558 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4559
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004560 def test_del_param(self):
4561 eq = self.ndiffAssertEqual
4562 msg = self._msgobj('msg_01.txt')
4563 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4564 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4565 charset='us-ascii', language='en')
4566 msg.del_param('foo', header='Content-Type')
4567 eq(msg.as_string(maxheaderlen=78), """\
4568Return-Path: <bbb@zzz.org>
4569Delivered-To: bbb@zzz.org
4570Received: by mail.zzz.org (Postfix, from userid 889)
4571\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4572MIME-Version: 1.0
4573Content-Transfer-Encoding: 7bit
4574Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4575From: bbb@ddd.com (John X. Doe)
4576To: bbb@zzz.org
4577Subject: This is a test message
4578Date: Fri, 4 May 2001 14:05:44 -0400
4579Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004580 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004581
4582
4583Hi,
4584
4585Do you like this message?
4586
4587-Me
4588""")
4589
4590 def test_rfc2231_get_content_charset(self):
4591 eq = self.assertEqual
4592 msg = self._msgobj('msg_32.txt')
4593 eq(msg.get_content_charset(), 'us-ascii')
4594
R. David Murraydfd7eb02010-12-24 22:36:49 +00004595 def test_rfc2231_parse_rfc_quoting(self):
4596 m = textwrap.dedent('''\
4597 Content-Disposition: inline;
4598 \tfilename*0*=''This%20is%20even%20more%20;
4599 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4600 \tfilename*2="is it not.pdf"
4601
4602 ''')
4603 msg = email.message_from_string(m)
4604 self.assertEqual(msg.get_filename(),
4605 'This is even more ***fun*** is it not.pdf')
4606 self.assertEqual(m, msg.as_string())
4607
4608 def test_rfc2231_parse_extra_quoting(self):
4609 m = textwrap.dedent('''\
4610 Content-Disposition: inline;
4611 \tfilename*0*="''This%20is%20even%20more%20";
4612 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4613 \tfilename*2="is it not.pdf"
4614
4615 ''')
4616 msg = email.message_from_string(m)
4617 self.assertEqual(msg.get_filename(),
4618 'This is even more ***fun*** is it not.pdf')
4619 self.assertEqual(m, msg.as_string())
4620
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004621 def test_rfc2231_no_language_or_charset(self):
4622 m = '''\
4623Content-Transfer-Encoding: 8bit
4624Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4625Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4626
4627'''
4628 msg = email.message_from_string(m)
4629 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004630 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004631 self.assertEqual(
4632 param,
4633 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4634
4635 def test_rfc2231_no_language_or_charset_in_filename(self):
4636 m = '''\
4637Content-Disposition: inline;
4638\tfilename*0*="''This%20is%20even%20more%20";
4639\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4640\tfilename*2="is it not.pdf"
4641
4642'''
4643 msg = email.message_from_string(m)
4644 self.assertEqual(msg.get_filename(),
4645 'This is even more ***fun*** is it not.pdf')
4646
4647 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4648 m = '''\
4649Content-Disposition: inline;
4650\tfilename*0*="''This%20is%20even%20more%20";
4651\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4652\tfilename*2="is it not.pdf"
4653
4654'''
4655 msg = email.message_from_string(m)
4656 self.assertEqual(msg.get_filename(),
4657 'This is even more ***fun*** is it not.pdf')
4658
4659 def test_rfc2231_partly_encoded(self):
4660 m = '''\
4661Content-Disposition: inline;
4662\tfilename*0="''This%20is%20even%20more%20";
4663\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4664\tfilename*2="is it not.pdf"
4665
4666'''
4667 msg = email.message_from_string(m)
4668 self.assertEqual(
4669 msg.get_filename(),
4670 'This%20is%20even%20more%20***fun*** is it not.pdf')
4671
4672 def test_rfc2231_partly_nonencoded(self):
4673 m = '''\
4674Content-Disposition: inline;
4675\tfilename*0="This%20is%20even%20more%20";
4676\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4677\tfilename*2="is it not.pdf"
4678
4679'''
4680 msg = email.message_from_string(m)
4681 self.assertEqual(
4682 msg.get_filename(),
4683 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4684
4685 def test_rfc2231_no_language_or_charset_in_boundary(self):
4686 m = '''\
4687Content-Type: multipart/alternative;
4688\tboundary*0*="''This%20is%20even%20more%20";
4689\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4690\tboundary*2="is it not.pdf"
4691
4692'''
4693 msg = email.message_from_string(m)
4694 self.assertEqual(msg.get_boundary(),
4695 'This is even more ***fun*** is it not.pdf')
4696
4697 def test_rfc2231_no_language_or_charset_in_charset(self):
4698 # This is a nonsensical charset value, but tests the code anyway
4699 m = '''\
4700Content-Type: text/plain;
4701\tcharset*0*="This%20is%20even%20more%20";
4702\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4703\tcharset*2="is it not.pdf"
4704
4705'''
4706 msg = email.message_from_string(m)
4707 self.assertEqual(msg.get_content_charset(),
4708 'this is even more ***fun*** is it not.pdf')
4709
4710 def test_rfc2231_bad_encoding_in_filename(self):
4711 m = '''\
4712Content-Disposition: inline;
4713\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4714\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4715\tfilename*2="is it not.pdf"
4716
4717'''
4718 msg = email.message_from_string(m)
4719 self.assertEqual(msg.get_filename(),
4720 'This is even more ***fun*** is it not.pdf')
4721
4722 def test_rfc2231_bad_encoding_in_charset(self):
4723 m = """\
4724Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4725
4726"""
4727 msg = email.message_from_string(m)
4728 # This should return None because non-ascii characters in the charset
4729 # are not allowed.
4730 self.assertEqual(msg.get_content_charset(), None)
4731
4732 def test_rfc2231_bad_character_in_charset(self):
4733 m = """\
4734Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4735
4736"""
4737 msg = email.message_from_string(m)
4738 # This should return None because non-ascii characters in the charset
4739 # are not allowed.
4740 self.assertEqual(msg.get_content_charset(), None)
4741
4742 def test_rfc2231_bad_character_in_filename(self):
4743 m = '''\
4744Content-Disposition: inline;
4745\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4746\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4747\tfilename*2*="is it not.pdf%E2"
4748
4749'''
4750 msg = email.message_from_string(m)
4751 self.assertEqual(msg.get_filename(),
4752 'This is even more ***fun*** is it not.pdf\ufffd')
4753
4754 def test_rfc2231_unknown_encoding(self):
4755 m = """\
4756Content-Transfer-Encoding: 8bit
4757Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4758
4759"""
4760 msg = email.message_from_string(m)
4761 self.assertEqual(msg.get_filename(), 'myfile.txt')
4762
4763 def test_rfc2231_single_tick_in_filename_extended(self):
4764 eq = self.assertEqual
4765 m = """\
4766Content-Type: application/x-foo;
4767\tname*0*=\"Frank's\"; name*1*=\" Document\"
4768
4769"""
4770 msg = email.message_from_string(m)
4771 charset, language, s = msg.get_param('name')
4772 eq(charset, None)
4773 eq(language, None)
4774 eq(s, "Frank's Document")
4775
4776 def test_rfc2231_single_tick_in_filename(self):
4777 m = """\
4778Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4779
4780"""
4781 msg = email.message_from_string(m)
4782 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004783 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004784 self.assertEqual(param, "Frank's Document")
4785
4786 def test_rfc2231_tick_attack_extended(self):
4787 eq = self.assertEqual
4788 m = """\
4789Content-Type: application/x-foo;
4790\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4791
4792"""
4793 msg = email.message_from_string(m)
4794 charset, language, s = msg.get_param('name')
4795 eq(charset, 'us-ascii')
4796 eq(language, 'en-us')
4797 eq(s, "Frank's Document")
4798
4799 def test_rfc2231_tick_attack(self):
4800 m = """\
4801Content-Type: application/x-foo;
4802\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4803
4804"""
4805 msg = email.message_from_string(m)
4806 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004807 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004808 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4809
4810 def test_rfc2231_no_extended_values(self):
4811 eq = self.assertEqual
4812 m = """\
4813Content-Type: application/x-foo; name=\"Frank's Document\"
4814
4815"""
4816 msg = email.message_from_string(m)
4817 eq(msg.get_param('name'), "Frank's Document")
4818
4819 def test_rfc2231_encoded_then_unencoded_segments(self):
4820 eq = self.assertEqual
4821 m = """\
4822Content-Type: application/x-foo;
4823\tname*0*=\"us-ascii'en-us'My\";
4824\tname*1=\" Document\";
4825\tname*2*=\" For You\"
4826
4827"""
4828 msg = email.message_from_string(m)
4829 charset, language, s = msg.get_param('name')
4830 eq(charset, 'us-ascii')
4831 eq(language, 'en-us')
4832 eq(s, 'My Document For You')
4833
4834 def test_rfc2231_unencoded_then_encoded_segments(self):
4835 eq = self.assertEqual
4836 m = """\
4837Content-Type: application/x-foo;
4838\tname*0=\"us-ascii'en-us'My\";
4839\tname*1*=\" Document\";
4840\tname*2*=\" For You\"
4841
4842"""
4843 msg = email.message_from_string(m)
4844 charset, language, s = msg.get_param('name')
4845 eq(charset, 'us-ascii')
4846 eq(language, 'en-us')
4847 eq(s, 'My Document For You')
4848
4849
Ezio Melottib3aedd42010-11-20 19:04:17 +00004850
R. David Murraya8f480f2010-01-16 18:30:03 +00004851# Tests to ensure that signed parts of an email are completely preserved, as
4852# required by RFC1847 section 2.1. Note that these are incomplete, because the
4853# email package does not currently always preserve the body. See issue 1670765.
4854class TestSigned(TestEmailBase):
4855
4856 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004857 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004858 original = fp.read()
4859 msg = email.message_from_string(original)
4860 return original, msg
4861
4862 def _signed_parts_eq(self, original, result):
4863 # Extract the first mime part of each message
4864 import re
4865 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4866 inpart = repart.search(original).group(2)
4867 outpart = repart.search(result).group(2)
4868 self.assertEqual(outpart, inpart)
4869
4870 def test_long_headers_as_string(self):
4871 original, msg = self._msg_and_obj('msg_45.txt')
4872 result = msg.as_string()
4873 self._signed_parts_eq(original, result)
4874
4875 def test_long_headers_as_string_maxheaderlen(self):
4876 original, msg = self._msg_and_obj('msg_45.txt')
4877 result = msg.as_string(maxheaderlen=60)
4878 self._signed_parts_eq(original, result)
4879
4880 def test_long_headers_flatten(self):
4881 original, msg = self._msg_and_obj('msg_45.txt')
4882 fp = StringIO()
4883 Generator(fp).flatten(msg)
4884 result = fp.getvalue()
4885 self._signed_parts_eq(original, result)
4886
4887
Ezio Melottib3aedd42010-11-20 19:04:17 +00004888
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004889if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004890 unittest.main()