blob: 1f354c2b66028728e5137f3bc22c69698c988647 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
R David Murraya2860e82011-04-16 09:20:30 -0400224 def test_get_payload_n_raises_on_non_multipart(self):
225 msg = Message()
226 self.assertRaises(TypeError, msg.get_payload, 1)
227
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
R David Murraya2860e82011-04-16 09:20:30 -0400383 def test_del_param_on_nonexistent_header(self):
384 msg = Message()
385 msg.del_param('filename', 'content-disposition')
386
387 def test_del_nonexistent_param(self):
388 msg = Message()
389 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
390 existing_header = msg['Content-Type']
391 msg.del_param('foobar', header='Content-Type')
392 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
393
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R David Murraya2860e82011-04-16 09:20:30 -0400524 def test_broken_unicode_payload(self):
525 # This test improves coverage but is not a compliance test.
526 # The behavior in this situation is currently undefined by the API.
527 x = 'this is a br\xf6ken thing to do'
528 msg = Message()
529 msg['content-type'] = 'text/plain'
530 msg['content-transfer-encoding'] = '8bit'
531 msg.set_payload(x)
532 self.assertEqual(msg.get_payload(decode=True),
533 bytes(x, 'raw-unicode-escape'))
534
535 def test_questionable_bytes_payload(self):
536 # This test improves coverage but is not a compliance test,
537 # since it involves poking inside the black box.
538 x = 'this is a quéstionable thing to do'.encode('utf-8')
539 msg = Message()
540 msg['content-type'] = 'text/plain; charset="utf-8"'
541 msg['content-transfer-encoding'] = '8bit'
542 msg._payload = x
543 self.assertEqual(msg.get_payload(decode=True), x)
544
R. David Murray7ec754b2010-12-13 23:51:19 +0000545 # Issue 1078919
546 def test_ascii_add_header(self):
547 msg = Message()
548 msg.add_header('Content-Disposition', 'attachment',
549 filename='bud.gif')
550 self.assertEqual('attachment; filename="bud.gif"',
551 msg['Content-Disposition'])
552
553 def test_noascii_add_header(self):
554 msg = Message()
555 msg.add_header('Content-Disposition', 'attachment',
556 filename="Fußballer.ppt")
557 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000558 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000559 msg['Content-Disposition'])
560
561 def test_nonascii_add_header_via_triple(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename=('iso-8859-1', '', 'Fußballer.ppt'))
565 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000566 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
567 msg['Content-Disposition'])
568
569 def test_ascii_add_header_with_tspecial(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="windows [filename].ppt")
573 self.assertEqual(
574 'attachment; filename="windows [filename].ppt"',
575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_with_tspecial(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename="Fußballer [filename].ppt")
581 self.assertEqual(
582 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584
R David Murraya2860e82011-04-16 09:20:30 -0400585 def test_add_header_with_name_only_param(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
588 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
589
590 def test_add_header_with_no_value(self):
591 msg = Message()
592 msg.add_header('X-Status', None)
593 self.assertEqual('', msg['X-Status'])
594
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000595 # Issue 5871: reject an attempt to embed a header inside a header value
596 # (header injection attack).
597 def test_embeded_header_via_Header_rejected(self):
598 msg = Message()
599 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
600 self.assertRaises(errors.HeaderParseError, msg.as_string)
601
602 def test_embeded_header_via_string_rejected(self):
603 msg = Message()
604 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
R David Murray7441a7a2012-03-14 02:59:51 -0400607 def test_unicode_header_defaults_to_utf8_encoding(self):
608 # Issue 14291
609 m = MIMEText('abc\n')
610 m['Subject'] = 'É test'
611 self.assertEqual(str(m),textwrap.dedent("""\
612 Content-Type: text/plain; charset="us-ascii"
613 MIME-Version: 1.0
614 Content-Transfer-Encoding: 7bit
615 Subject: =?utf-8?q?=C3=89_test?=
616
617 abc
618 """))
619
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000620# Test the email.encoders module
621class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400622
623 def test_EncodersEncode_base64(self):
624 with openfile('PyBanner048.gif', 'rb') as fp:
625 bindata = fp.read()
626 mimed = email.mime.image.MIMEImage(bindata)
627 base64ed = mimed.get_payload()
628 # the transfer-encoded body lines should all be <=76 characters
629 lines = base64ed.split('\n')
630 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
631
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000632 def test_encode_empty_payload(self):
633 eq = self.assertEqual
634 msg = Message()
635 msg.set_charset('us-ascii')
636 eq(msg['content-transfer-encoding'], '7bit')
637
638 def test_default_cte(self):
639 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000640 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000641 msg = MIMEText('hello world')
642 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000643 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000644 msg = MIMEText('hello \xf8 world')
645 eq(msg['content-transfer-encoding'], '8bit')
646 # And now with a different charset
647 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
648 eq(msg['content-transfer-encoding'], 'quoted-printable')
649
R. David Murraye85200d2010-05-06 01:41:14 +0000650 def test_encode7or8bit(self):
651 # Make sure a charset whose input character set is 8bit but
652 # whose output character set is 7bit gets a transfer-encoding
653 # of 7bit.
654 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000655 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000656 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657
Ezio Melottib3aedd42010-11-20 19:04:17 +0000658
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659# Test long header wrapping
660class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400661
662 maxDiff = None
663
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000664 def test_split_long_continuation(self):
665 eq = self.ndiffAssertEqual
666 msg = email.message_from_string("""\
667Subject: bug demonstration
668\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
669\tmore text
670
671test
672""")
673 sfp = StringIO()
674 g = Generator(sfp)
675 g.flatten(msg)
676 eq(sfp.getvalue(), """\
677Subject: bug demonstration
678\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
679\tmore text
680
681test
682""")
683
684 def test_another_long_almost_unsplittable_header(self):
685 eq = self.ndiffAssertEqual
686 hstr = """\
687bug demonstration
688\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
689\tmore text"""
690 h = Header(hstr, continuation_ws='\t')
691 eq(h.encode(), """\
692bug demonstration
693\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
694\tmore text""")
695 h = Header(hstr.replace('\t', ' '))
696 eq(h.encode(), """\
697bug demonstration
698 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
699 more text""")
700
701 def test_long_nonstring(self):
702 eq = self.ndiffAssertEqual
703 g = Charset("iso-8859-1")
704 cz = Charset("iso-8859-2")
705 utf8 = Charset("utf-8")
706 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
707 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
708 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
709 b'bef\xf6rdert. ')
710 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
711 b'd\xf9vtipu.. ')
712 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
713 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
714 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
715 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
716 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
717 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
718 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
719 '\u3044\u307e\u3059\u3002')
720 h = Header(g_head, g, header_name='Subject')
721 h.append(cz_head, cz)
722 h.append(utf8_head, utf8)
723 msg = Message()
724 msg['Subject'] = h
725 sfp = StringIO()
726 g = Generator(sfp)
727 g.flatten(msg)
728 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000729Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
730 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
731 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
732 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
733 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
734 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
735 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
736 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
737 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
738 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
739 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000740
741""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000742 eq(h.encode(maxlinelen=76), """\
743=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
744 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
745 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
746 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
747 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
748 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
749 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
750 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
751 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
752 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
753 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000754
755 def test_long_header_encode(self):
756 eq = self.ndiffAssertEqual
757 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
758 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
759 header_name='X-Foobar-Spoink-Defrobnit')
760 eq(h.encode(), '''\
761wasnipoop; giraffes="very-long-necked-animals";
762 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
763
764 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
765 eq = self.ndiffAssertEqual
766 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
767 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
768 header_name='X-Foobar-Spoink-Defrobnit',
769 continuation_ws='\t')
770 eq(h.encode(), '''\
771wasnipoop; giraffes="very-long-necked-animals";
772 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
773
774 def test_long_header_encode_with_tab_continuation(self):
775 eq = self.ndiffAssertEqual
776 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
777 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
778 header_name='X-Foobar-Spoink-Defrobnit',
779 continuation_ws='\t')
780 eq(h.encode(), '''\
781wasnipoop; giraffes="very-long-necked-animals";
782\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
783
R David Murray3a6152f2011-03-14 21:13:03 -0400784 def test_header_encode_with_different_output_charset(self):
785 h = Header('文', 'euc-jp')
786 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
787
788 def test_long_header_encode_with_different_output_charset(self):
789 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
790 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
791 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
792 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
793 res = """\
794=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
795 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
796 self.assertEqual(h.encode(), res)
797
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798 def test_header_splitter(self):
799 eq = self.ndiffAssertEqual
800 msg = MIMEText('')
801 # It'd be great if we could use add_header() here, but that doesn't
802 # guarantee an order of the parameters.
803 msg['X-Foobar-Spoink-Defrobnit'] = (
804 'wasnipoop; giraffes="very-long-necked-animals"; '
805 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
806 sfp = StringIO()
807 g = Generator(sfp)
808 g.flatten(msg)
809 eq(sfp.getvalue(), '''\
810Content-Type: text/plain; charset="us-ascii"
811MIME-Version: 1.0
812Content-Transfer-Encoding: 7bit
813X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
814 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
815
816''')
817
818 def test_no_semis_header_splitter(self):
819 eq = self.ndiffAssertEqual
820 msg = Message()
821 msg['From'] = 'test@dom.ain'
822 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
823 msg.set_payload('Test')
824 sfp = StringIO()
825 g = Generator(sfp)
826 g.flatten(msg)
827 eq(sfp.getvalue(), """\
828From: test@dom.ain
829References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
830 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
831
832Test""")
833
R David Murray7da4db12011-04-07 20:37:17 -0400834 def test_last_split_chunk_does_not_fit(self):
835 eq = self.ndiffAssertEqual
836 h = Header('Subject: the first part of this is short, but_the_second'
837 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
838 '_all_by_itself')
839 eq(h.encode(), """\
840Subject: the first part of this is short,
841 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
842
843 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
844 eq = self.ndiffAssertEqual
845 h = Header(', but_the_second'
846 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
847 '_all_by_itself')
848 eq(h.encode(), """\
849,
850 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
851
852 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
853 eq = self.ndiffAssertEqual
854 h = Header(', , but_the_second'
855 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
856 '_all_by_itself')
857 eq(h.encode(), """\
858, ,
859 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
860
861 def test_trailing_splitable_on_overlong_unsplitable(self):
862 eq = self.ndiffAssertEqual
863 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
864 'be_on_a_line_all_by_itself;')
865 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
866 "be_on_a_line_all_by_itself;")
867
868 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
869 eq = self.ndiffAssertEqual
870 h = Header('; '
871 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400872 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400873 eq(h.encode(), """\
874;
R David Murray01581ee2011-04-18 10:04:34 -0400875 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400876
R David Murraye1292a22011-04-07 20:54:03 -0400877 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400878 eq = self.ndiffAssertEqual
879 h = Header('This is a long line that has two whitespaces in a row. '
880 'This used to cause truncation of the header when folded')
881 eq(h.encode(), """\
882This is a long line that has two whitespaces in a row. This used to cause
883 truncation of the header when folded""")
884
R David Murray01581ee2011-04-18 10:04:34 -0400885 def test_splitter_split_on_punctuation_only_if_fws(self):
886 eq = self.ndiffAssertEqual
887 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
888 'they;arenotlegal;fold,points')
889 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
890 "arenotlegal;fold,points")
891
892 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
893 eq = self.ndiffAssertEqual
894 h = Header('this is a test where we need to have more than one line '
895 'before; our final line that is just too big to fit;; '
896 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
897 'be_on_a_line_all_by_itself;')
898 eq(h.encode(), """\
899this is a test where we need to have more than one line before;
900 our final line that is just too big to fit;;
901 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
902
903 def test_overlong_last_part_followed_by_split_point(self):
904 eq = self.ndiffAssertEqual
905 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
906 'be_on_a_line_all_by_itself ')
907 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
908 "should_be_on_a_line_all_by_itself ")
909
910 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
911 eq = self.ndiffAssertEqual
912 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
913 'before_our_final_line_; ; '
914 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
915 'be_on_a_line_all_by_itself; ')
916 eq(h.encode(), """\
917this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
918 ;
919 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
920
921 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
922 eq = self.ndiffAssertEqual
923 h = Header('this is a test where we need to have more than one line '
924 'before our final line; ; '
925 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
926 'be_on_a_line_all_by_itself; ')
927 eq(h.encode(), """\
928this is a test where we need to have more than one line before our final line;
929 ;
930 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
931
932 def test_long_header_with_whitespace_runs(self):
933 eq = self.ndiffAssertEqual
934 msg = Message()
935 msg['From'] = 'test@dom.ain'
936 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
937 msg.set_payload('Test')
938 sfp = StringIO()
939 g = Generator(sfp)
940 g.flatten(msg)
941 eq(sfp.getvalue(), """\
942From: test@dom.ain
943References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
944 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
945 <foo@dom.ain> <foo@dom.ain>\x20\x20
946
947Test""")
948
949 def test_long_run_with_semi_header_splitter(self):
950 eq = self.ndiffAssertEqual
951 msg = Message()
952 msg['From'] = 'test@dom.ain'
953 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
954 msg.set_payload('Test')
955 sfp = StringIO()
956 g = Generator(sfp)
957 g.flatten(msg)
958 eq(sfp.getvalue(), """\
959From: test@dom.ain
960References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
961 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
962 <foo@dom.ain>; abc
963
964Test""")
965
966 def test_splitter_split_on_punctuation_only_if_fws(self):
967 eq = self.ndiffAssertEqual
968 msg = Message()
969 msg['From'] = 'test@dom.ain'
970 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
971 'they;arenotlegal;fold,points')
972 msg.set_payload('Test')
973 sfp = StringIO()
974 g = Generator(sfp)
975 g.flatten(msg)
976 # XXX the space after the header should not be there.
977 eq(sfp.getvalue(), """\
978From: test@dom.ain
979References:\x20
980 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
981
982Test""")
983
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984 def test_no_split_long_header(self):
985 eq = self.ndiffAssertEqual
986 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000987 h = Header(hstr)
988 # These come on two lines because Headers are really field value
989 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000990 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000991References:
992 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
993 h = Header('x' * 80)
994 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000995
996 def test_splitting_multiple_long_lines(self):
997 eq = self.ndiffAssertEqual
998 hstr = """\
999from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1000\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1001\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1002"""
1003 h = Header(hstr, continuation_ws='\t')
1004 eq(h.encode(), """\
1005from babylon.socal-raves.org (localhost [127.0.0.1]);
1006 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1007 for <mailman-admin@babylon.socal-raves.org>;
1008 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1009\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1010 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1011 for <mailman-admin@babylon.socal-raves.org>;
1012 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1014 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1015 for <mailman-admin@babylon.socal-raves.org>;
1016 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1017
1018 def test_splitting_first_line_only_is_long(self):
1019 eq = self.ndiffAssertEqual
1020 hstr = """\
1021from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1022\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1023\tid 17k4h5-00034i-00
1024\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1025 h = Header(hstr, maxlinelen=78, header_name='Received',
1026 continuation_ws='\t')
1027 eq(h.encode(), """\
1028from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1029 helo=cthulhu.gerg.ca)
1030\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1031\tid 17k4h5-00034i-00
1032\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1033
1034 def test_long_8bit_header(self):
1035 eq = self.ndiffAssertEqual
1036 msg = Message()
1037 h = Header('Britische Regierung gibt', 'iso-8859-1',
1038 header_name='Subject')
1039 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001040 eq(h.encode(maxlinelen=76), """\
1041=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1042 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001043 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001044 eq(msg.as_string(maxheaderlen=76), """\
1045Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1046 =?iso-8859-1?q?hore-Windkraftprojekte?=
1047
1048""")
1049 eq(msg.as_string(maxheaderlen=0), """\
1050Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001051
1052""")
1053
1054 def test_long_8bit_header_no_charset(self):
1055 eq = self.ndiffAssertEqual
1056 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001057 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1058 'f\xfcr Offshore-Windkraftprojekte '
1059 '<a-very-long-address@example.com>')
1060 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001061 eq(msg.as_string(maxheaderlen=78), """\
1062Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1063 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1064
1065""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001066 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001067 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001068 header_name='Reply-To')
1069 eq(msg.as_string(maxheaderlen=78), """\
1070Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1071 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001072
1073""")
1074
1075 def test_long_to_header(self):
1076 eq = self.ndiffAssertEqual
1077 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001078 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 '"Someone Test #B" <someone@umich.edu>, '
1080 '"Someone Test #C" <someone@eecs.umich.edu>, '
1081 '"Someone Test #D" <someone@eecs.umich.edu>')
1082 msg = Message()
1083 msg['To'] = to
1084 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001085To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001086 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001087 "Someone Test #C" <someone@eecs.umich.edu>,
1088 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089
1090''')
1091
1092 def test_long_line_after_append(self):
1093 eq = self.ndiffAssertEqual
1094 s = 'This is an example of string which has almost the limit of header length.'
1095 h = Header(s)
1096 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001097 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001098This is an example of string which has almost the limit of header length.
1099 Add another line.""")
1100
1101 def test_shorter_line_with_append(self):
1102 eq = self.ndiffAssertEqual
1103 s = 'This is a shorter line.'
1104 h = Header(s)
1105 h.append('Add another sentence. (Surprise?)')
1106 eq(h.encode(),
1107 'This is a shorter line. Add another sentence. (Surprise?)')
1108
1109 def test_long_field_name(self):
1110 eq = self.ndiffAssertEqual
1111 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001112 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1113 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1114 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1115 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116 h = Header(gs, 'iso-8859-1', header_name=fn)
1117 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001118 eq(h.encode(maxlinelen=76), """\
1119=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1120 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1121 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1122 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001123
1124 def test_long_received_header(self):
1125 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1126 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1127 'Wed, 05 Mar 2003 18:10:18 -0700')
1128 msg = Message()
1129 msg['Received-1'] = Header(h, continuation_ws='\t')
1130 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001131 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001132 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001133Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1134 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001135 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001136Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1137 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001138 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001139
1140""")
1141
1142 def test_string_headerinst_eq(self):
1143 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1144 'tu-muenchen.de> (David Bremner\'s message of '
1145 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1146 msg = Message()
1147 msg['Received-1'] = Header(h, header_name='Received-1',
1148 continuation_ws='\t')
1149 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001150 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001151 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001152Received-1:\x20
1153 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1154 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1155Received-2:\x20
1156 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1157 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158
1159""")
1160
1161 def test_long_unbreakable_lines_with_continuation(self):
1162 eq = self.ndiffAssertEqual
1163 msg = Message()
1164 t = """\
1165iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1166 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1167 msg['Face-1'] = t
1168 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001169 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001170 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001171 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001172 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001173Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001174 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001176Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001177 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001178 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001179Face-3:\x20
1180 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1181 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182
1183""")
1184
1185 def test_another_long_multiline_header(self):
1186 eq = self.ndiffAssertEqual
1187 m = ('Received: from siimage.com '
1188 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001189 'Microsoft SMTPSVC(5.0.2195.4905); '
1190 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 msg = email.message_from_string(m)
1192 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001193Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1194 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196''')
1197
1198 def test_long_lines_with_different_header(self):
1199 eq = self.ndiffAssertEqual
1200 h = ('List-Unsubscribe: '
1201 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1202 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1203 '?subject=unsubscribe>')
1204 msg = Message()
1205 msg['List'] = h
1206 msg['List'] = Header(h, header_name='List')
1207 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001208List: List-Unsubscribe:
1209 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001210 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001211List: List-Unsubscribe:
1212 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214
1215""")
1216
R. David Murray6f0022d2011-01-07 21:57:25 +00001217 def test_long_rfc2047_header_with_embedded_fws(self):
1218 h = Header(textwrap.dedent("""\
1219 We're going to pretend this header is in a non-ascii character set
1220 \tto see if line wrapping with encoded words and embedded
1221 folding white space works"""),
1222 charset='utf-8',
1223 header_name='Test')
1224 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1225 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1226 =?utf-8?q?cter_set?=
1227 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1228 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1229
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230
Ezio Melottib3aedd42010-11-20 19:04:17 +00001231
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001232# Test mangling of "From " lines in the body of a message
1233class TestFromMangling(unittest.TestCase):
1234 def setUp(self):
1235 self.msg = Message()
1236 self.msg['From'] = 'aaa@bbb.org'
1237 self.msg.set_payload("""\
1238From the desk of A.A.A.:
1239Blah blah blah
1240""")
1241
1242 def test_mangled_from(self):
1243 s = StringIO()
1244 g = Generator(s, mangle_from_=True)
1245 g.flatten(self.msg)
1246 self.assertEqual(s.getvalue(), """\
1247From: aaa@bbb.org
1248
1249>From the desk of A.A.A.:
1250Blah blah blah
1251""")
1252
1253 def test_dont_mangle_from(self):
1254 s = StringIO()
1255 g = Generator(s, mangle_from_=False)
1256 g.flatten(self.msg)
1257 self.assertEqual(s.getvalue(), """\
1258From: aaa@bbb.org
1259
1260From the desk of A.A.A.:
1261Blah blah blah
1262""")
1263
1264
Ezio Melottib3aedd42010-11-20 19:04:17 +00001265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266# Test the basic MIMEAudio class
1267class TestMIMEAudio(unittest.TestCase):
1268 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001269 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001270 self._audiodata = fp.read()
1271 self._au = MIMEAudio(self._audiodata)
1272
1273 def test_guess_minor_type(self):
1274 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1275
1276 def test_encoding(self):
1277 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001278 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1279 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001280
1281 def test_checkSetMinor(self):
1282 au = MIMEAudio(self._audiodata, 'fish')
1283 self.assertEqual(au.get_content_type(), 'audio/fish')
1284
1285 def test_add_header(self):
1286 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001287 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001288 self._au.add_header('Content-Disposition', 'attachment',
1289 filename='audiotest.au')
1290 eq(self._au['content-disposition'],
1291 'attachment; filename="audiotest.au"')
1292 eq(self._au.get_params(header='content-disposition'),
1293 [('attachment', ''), ('filename', 'audiotest.au')])
1294 eq(self._au.get_param('filename', header='content-disposition'),
1295 'audiotest.au')
1296 missing = []
1297 eq(self._au.get_param('attachment', header='content-disposition'), '')
1298 unless(self._au.get_param('foo', failobj=missing,
1299 header='content-disposition') is missing)
1300 # Try some missing stuff
1301 unless(self._au.get_param('foobar', missing) is missing)
1302 unless(self._au.get_param('attachment', missing,
1303 header='foobar') is missing)
1304
1305
Ezio Melottib3aedd42010-11-20 19:04:17 +00001306
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001307# Test the basic MIMEImage class
1308class TestMIMEImage(unittest.TestCase):
1309 def setUp(self):
1310 with openfile('PyBanner048.gif', 'rb') as fp:
1311 self._imgdata = fp.read()
1312 self._im = MIMEImage(self._imgdata)
1313
1314 def test_guess_minor_type(self):
1315 self.assertEqual(self._im.get_content_type(), 'image/gif')
1316
1317 def test_encoding(self):
1318 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001319 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1320 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
1322 def test_checkSetMinor(self):
1323 im = MIMEImage(self._imgdata, 'fish')
1324 self.assertEqual(im.get_content_type(), 'image/fish')
1325
1326 def test_add_header(self):
1327 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001328 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329 self._im.add_header('Content-Disposition', 'attachment',
1330 filename='dingusfish.gif')
1331 eq(self._im['content-disposition'],
1332 'attachment; filename="dingusfish.gif"')
1333 eq(self._im.get_params(header='content-disposition'),
1334 [('attachment', ''), ('filename', 'dingusfish.gif')])
1335 eq(self._im.get_param('filename', header='content-disposition'),
1336 'dingusfish.gif')
1337 missing = []
1338 eq(self._im.get_param('attachment', header='content-disposition'), '')
1339 unless(self._im.get_param('foo', failobj=missing,
1340 header='content-disposition') is missing)
1341 # Try some missing stuff
1342 unless(self._im.get_param('foobar', missing) is missing)
1343 unless(self._im.get_param('attachment', missing,
1344 header='foobar') is missing)
1345
1346
Ezio Melottib3aedd42010-11-20 19:04:17 +00001347
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001348# Test the basic MIMEApplication class
1349class TestMIMEApplication(unittest.TestCase):
1350 def test_headers(self):
1351 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001352 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001353 eq(msg.get_content_type(), 'application/octet-stream')
1354 eq(msg['content-transfer-encoding'], 'base64')
1355
1356 def test_body(self):
1357 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001358 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1359 msg = MIMEApplication(bytesdata)
1360 # whitespace in the cte encoded block is RFC-irrelevant.
1361 eq(msg.get_payload().strip(), '+vv8/f7/')
1362 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001363
1364
Ezio Melottib3aedd42010-11-20 19:04:17 +00001365
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366# Test the basic MIMEText class
1367class TestMIMEText(unittest.TestCase):
1368 def setUp(self):
1369 self._msg = MIMEText('hello there')
1370
1371 def test_types(self):
1372 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001373 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001374 eq(self._msg.get_content_type(), 'text/plain')
1375 eq(self._msg.get_param('charset'), 'us-ascii')
1376 missing = []
1377 unless(self._msg.get_param('foobar', missing) is missing)
1378 unless(self._msg.get_param('charset', missing, header='foobar')
1379 is missing)
1380
1381 def test_payload(self):
1382 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001383 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384
1385 def test_charset(self):
1386 eq = self.assertEqual
1387 msg = MIMEText('hello there', _charset='us-ascii')
1388 eq(msg.get_charset().input_charset, 'us-ascii')
1389 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1390
R. David Murray850fc852010-06-03 01:58:28 +00001391 def test_7bit_input(self):
1392 eq = self.assertEqual
1393 msg = MIMEText('hello there', _charset='us-ascii')
1394 eq(msg.get_charset().input_charset, 'us-ascii')
1395 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1396
1397 def test_7bit_input_no_charset(self):
1398 eq = self.assertEqual
1399 msg = MIMEText('hello there')
1400 eq(msg.get_charset(), 'us-ascii')
1401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1402 self.assertTrue('hello there' in msg.as_string())
1403
1404 def test_utf8_input(self):
1405 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1406 eq = self.assertEqual
1407 msg = MIMEText(teststr, _charset='utf-8')
1408 eq(msg.get_charset().output_charset, 'utf-8')
1409 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1410 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1411
1412 @unittest.skip("can't fix because of backward compat in email5, "
1413 "will fix in email6")
1414 def test_utf8_input_no_charset(self):
1415 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1416 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1417
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001418
Ezio Melottib3aedd42010-11-20 19:04:17 +00001419
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001420# Test complicated multipart/* messages
1421class TestMultipart(TestEmailBase):
1422 def setUp(self):
1423 with openfile('PyBanner048.gif', 'rb') as fp:
1424 data = fp.read()
1425 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1426 image = MIMEImage(data, name='dingusfish.gif')
1427 image.add_header('content-disposition', 'attachment',
1428 filename='dingusfish.gif')
1429 intro = MIMEText('''\
1430Hi there,
1431
1432This is the dingus fish.
1433''')
1434 container.attach(intro)
1435 container.attach(image)
1436 container['From'] = 'Barry <barry@digicool.com>'
1437 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1438 container['Subject'] = 'Here is your dingus fish'
1439
1440 now = 987809702.54848599
1441 timetuple = time.localtime(now)
1442 if timetuple[-1] == 0:
1443 tzsecs = time.timezone
1444 else:
1445 tzsecs = time.altzone
1446 if tzsecs > 0:
1447 sign = '-'
1448 else:
1449 sign = '+'
1450 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1451 container['Date'] = time.strftime(
1452 '%a, %d %b %Y %H:%M:%S',
1453 time.localtime(now)) + tzoffset
1454 self._msg = container
1455 self._im = image
1456 self._txt = intro
1457
1458 def test_hierarchy(self):
1459 # convenience
1460 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001461 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001462 raises = self.assertRaises
1463 # tests
1464 m = self._msg
1465 unless(m.is_multipart())
1466 eq(m.get_content_type(), 'multipart/mixed')
1467 eq(len(m.get_payload()), 2)
1468 raises(IndexError, m.get_payload, 2)
1469 m0 = m.get_payload(0)
1470 m1 = m.get_payload(1)
1471 unless(m0 is self._txt)
1472 unless(m1 is self._im)
1473 eq(m.get_payload(), [m0, m1])
1474 unless(not m0.is_multipart())
1475 unless(not m1.is_multipart())
1476
1477 def test_empty_multipart_idempotent(self):
1478 text = """\
1479Content-Type: multipart/mixed; boundary="BOUNDARY"
1480MIME-Version: 1.0
1481Subject: A subject
1482To: aperson@dom.ain
1483From: bperson@dom.ain
1484
1485
1486--BOUNDARY
1487
1488
1489--BOUNDARY--
1490"""
1491 msg = Parser().parsestr(text)
1492 self.ndiffAssertEqual(text, msg.as_string())
1493
1494 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1495 outer = MIMEBase('multipart', 'mixed')
1496 outer['Subject'] = 'A subject'
1497 outer['To'] = 'aperson@dom.ain'
1498 outer['From'] = 'bperson@dom.ain'
1499 outer.set_boundary('BOUNDARY')
1500 self.ndiffAssertEqual(outer.as_string(), '''\
1501Content-Type: multipart/mixed; boundary="BOUNDARY"
1502MIME-Version: 1.0
1503Subject: A subject
1504To: aperson@dom.ain
1505From: bperson@dom.ain
1506
1507--BOUNDARY
1508
1509--BOUNDARY--''')
1510
1511 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.preamble = ''
1517 outer.epilogue = ''
1518 outer.set_boundary('BOUNDARY')
1519 self.ndiffAssertEqual(outer.as_string(), '''\
1520Content-Type: multipart/mixed; boundary="BOUNDARY"
1521MIME-Version: 1.0
1522Subject: A subject
1523To: aperson@dom.ain
1524From: bperson@dom.ain
1525
1526
1527--BOUNDARY
1528
1529--BOUNDARY--
1530''')
1531
1532 def test_one_part_in_a_multipart(self):
1533 eq = self.ndiffAssertEqual
1534 outer = MIMEBase('multipart', 'mixed')
1535 outer['Subject'] = 'A subject'
1536 outer['To'] = 'aperson@dom.ain'
1537 outer['From'] = 'bperson@dom.ain'
1538 outer.set_boundary('BOUNDARY')
1539 msg = MIMEText('hello world')
1540 outer.attach(msg)
1541 eq(outer.as_string(), '''\
1542Content-Type: multipart/mixed; boundary="BOUNDARY"
1543MIME-Version: 1.0
1544Subject: A subject
1545To: aperson@dom.ain
1546From: bperson@dom.ain
1547
1548--BOUNDARY
1549Content-Type: text/plain; charset="us-ascii"
1550MIME-Version: 1.0
1551Content-Transfer-Encoding: 7bit
1552
1553hello world
1554--BOUNDARY--''')
1555
1556 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1557 eq = self.ndiffAssertEqual
1558 outer = MIMEBase('multipart', 'mixed')
1559 outer['Subject'] = 'A subject'
1560 outer['To'] = 'aperson@dom.ain'
1561 outer['From'] = 'bperson@dom.ain'
1562 outer.preamble = ''
1563 msg = MIMEText('hello world')
1564 outer.attach(msg)
1565 outer.set_boundary('BOUNDARY')
1566 eq(outer.as_string(), '''\
1567Content-Type: multipart/mixed; boundary="BOUNDARY"
1568MIME-Version: 1.0
1569Subject: A subject
1570To: aperson@dom.ain
1571From: bperson@dom.ain
1572
1573
1574--BOUNDARY
1575Content-Type: text/plain; charset="us-ascii"
1576MIME-Version: 1.0
1577Content-Transfer-Encoding: 7bit
1578
1579hello world
1580--BOUNDARY--''')
1581
1582
1583 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1584 eq = self.ndiffAssertEqual
1585 outer = MIMEBase('multipart', 'mixed')
1586 outer['Subject'] = 'A subject'
1587 outer['To'] = 'aperson@dom.ain'
1588 outer['From'] = 'bperson@dom.ain'
1589 outer.preamble = None
1590 msg = MIMEText('hello world')
1591 outer.attach(msg)
1592 outer.set_boundary('BOUNDARY')
1593 eq(outer.as_string(), '''\
1594Content-Type: multipart/mixed; boundary="BOUNDARY"
1595MIME-Version: 1.0
1596Subject: A subject
1597To: aperson@dom.ain
1598From: bperson@dom.ain
1599
1600--BOUNDARY
1601Content-Type: text/plain; charset="us-ascii"
1602MIME-Version: 1.0
1603Content-Transfer-Encoding: 7bit
1604
1605hello world
1606--BOUNDARY--''')
1607
1608
1609 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1610 eq = self.ndiffAssertEqual
1611 outer = MIMEBase('multipart', 'mixed')
1612 outer['Subject'] = 'A subject'
1613 outer['To'] = 'aperson@dom.ain'
1614 outer['From'] = 'bperson@dom.ain'
1615 outer.epilogue = None
1616 msg = MIMEText('hello world')
1617 outer.attach(msg)
1618 outer.set_boundary('BOUNDARY')
1619 eq(outer.as_string(), '''\
1620Content-Type: multipart/mixed; boundary="BOUNDARY"
1621MIME-Version: 1.0
1622Subject: A subject
1623To: aperson@dom.ain
1624From: bperson@dom.ain
1625
1626--BOUNDARY
1627Content-Type: text/plain; charset="us-ascii"
1628MIME-Version: 1.0
1629Content-Transfer-Encoding: 7bit
1630
1631hello world
1632--BOUNDARY--''')
1633
1634
1635 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1636 eq = self.ndiffAssertEqual
1637 outer = MIMEBase('multipart', 'mixed')
1638 outer['Subject'] = 'A subject'
1639 outer['To'] = 'aperson@dom.ain'
1640 outer['From'] = 'bperson@dom.ain'
1641 outer.epilogue = ''
1642 msg = MIMEText('hello world')
1643 outer.attach(msg)
1644 outer.set_boundary('BOUNDARY')
1645 eq(outer.as_string(), '''\
1646Content-Type: multipart/mixed; boundary="BOUNDARY"
1647MIME-Version: 1.0
1648Subject: A subject
1649To: aperson@dom.ain
1650From: bperson@dom.ain
1651
1652--BOUNDARY
1653Content-Type: text/plain; charset="us-ascii"
1654MIME-Version: 1.0
1655Content-Transfer-Encoding: 7bit
1656
1657hello world
1658--BOUNDARY--
1659''')
1660
1661
1662 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1663 eq = self.ndiffAssertEqual
1664 outer = MIMEBase('multipart', 'mixed')
1665 outer['Subject'] = 'A subject'
1666 outer['To'] = 'aperson@dom.ain'
1667 outer['From'] = 'bperson@dom.ain'
1668 outer.epilogue = '\n'
1669 msg = MIMEText('hello world')
1670 outer.attach(msg)
1671 outer.set_boundary('BOUNDARY')
1672 eq(outer.as_string(), '''\
1673Content-Type: multipart/mixed; boundary="BOUNDARY"
1674MIME-Version: 1.0
1675Subject: A subject
1676To: aperson@dom.ain
1677From: bperson@dom.ain
1678
1679--BOUNDARY
1680Content-Type: text/plain; charset="us-ascii"
1681MIME-Version: 1.0
1682Content-Transfer-Encoding: 7bit
1683
1684hello world
1685--BOUNDARY--
1686
1687''')
1688
1689 def test_message_external_body(self):
1690 eq = self.assertEqual
1691 msg = self._msgobj('msg_36.txt')
1692 eq(len(msg.get_payload()), 2)
1693 msg1 = msg.get_payload(1)
1694 eq(msg1.get_content_type(), 'multipart/alternative')
1695 eq(len(msg1.get_payload()), 2)
1696 for subpart in msg1.get_payload():
1697 eq(subpart.get_content_type(), 'message/external-body')
1698 eq(len(subpart.get_payload()), 1)
1699 subsubpart = subpart.get_payload(0)
1700 eq(subsubpart.get_content_type(), 'text/plain')
1701
1702 def test_double_boundary(self):
1703 # msg_37.txt is a multipart that contains two dash-boundary's in a
1704 # row. Our interpretation of RFC 2046 calls for ignoring the second
1705 # and subsequent boundaries.
1706 msg = self._msgobj('msg_37.txt')
1707 self.assertEqual(len(msg.get_payload()), 3)
1708
1709 def test_nested_inner_contains_outer_boundary(self):
1710 eq = self.ndiffAssertEqual
1711 # msg_38.txt has an inner part that contains outer boundaries. My
1712 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1713 # these are illegal and should be interpreted as unterminated inner
1714 # parts.
1715 msg = self._msgobj('msg_38.txt')
1716 sfp = StringIO()
1717 iterators._structure(msg, sfp)
1718 eq(sfp.getvalue(), """\
1719multipart/mixed
1720 multipart/mixed
1721 multipart/alternative
1722 text/plain
1723 text/plain
1724 text/plain
1725 text/plain
1726""")
1727
1728 def test_nested_with_same_boundary(self):
1729 eq = self.ndiffAssertEqual
1730 # msg 39.txt is similarly evil in that it's got inner parts that use
1731 # the same boundary as outer parts. Again, I believe the way this is
1732 # parsed is closest to the spirit of RFC 2046
1733 msg = self._msgobj('msg_39.txt')
1734 sfp = StringIO()
1735 iterators._structure(msg, sfp)
1736 eq(sfp.getvalue(), """\
1737multipart/mixed
1738 multipart/mixed
1739 multipart/alternative
1740 application/octet-stream
1741 application/octet-stream
1742 text/plain
1743""")
1744
1745 def test_boundary_in_non_multipart(self):
1746 msg = self._msgobj('msg_40.txt')
1747 self.assertEqual(msg.as_string(), '''\
1748MIME-Version: 1.0
1749Content-Type: text/html; boundary="--961284236552522269"
1750
1751----961284236552522269
1752Content-Type: text/html;
1753Content-Transfer-Encoding: 7Bit
1754
1755<html></html>
1756
1757----961284236552522269--
1758''')
1759
1760 def test_boundary_with_leading_space(self):
1761 eq = self.assertEqual
1762 msg = email.message_from_string('''\
1763MIME-Version: 1.0
1764Content-Type: multipart/mixed; boundary=" XXXX"
1765
1766-- XXXX
1767Content-Type: text/plain
1768
1769
1770-- XXXX
1771Content-Type: text/plain
1772
1773-- XXXX--
1774''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001775 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001776 eq(msg.get_boundary(), ' XXXX')
1777 eq(len(msg.get_payload()), 2)
1778
1779 def test_boundary_without_trailing_newline(self):
1780 m = Parser().parsestr("""\
1781Content-Type: multipart/mixed; boundary="===============0012394164=="
1782MIME-Version: 1.0
1783
1784--===============0012394164==
1785Content-Type: image/file1.jpg
1786MIME-Version: 1.0
1787Content-Transfer-Encoding: base64
1788
1789YXNkZg==
1790--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001791 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001792
1793
Ezio Melottib3aedd42010-11-20 19:04:17 +00001794
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001795# Test some badly formatted messages
R David Murray3edd22a2011-04-18 13:59:37 -04001796class TestNonConformantBase:
1797
1798 def _msgobj(self, filename):
1799 with openfile(filename) as fp:
1800 return email.message_from_file(fp, policy=self.policy)
1801
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001802 def test_parse_missing_minor_type(self):
1803 eq = self.assertEqual
1804 msg = self._msgobj('msg_14.txt')
1805 eq(msg.get_content_type(), 'text/plain')
1806 eq(msg.get_content_maintype(), 'text')
1807 eq(msg.get_content_subtype(), 'plain')
1808
1809 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001810 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001811 msg = self._msgobj('msg_15.txt')
1812 # XXX We can probably eventually do better
1813 inner = msg.get_payload(0)
1814 unless(hasattr(inner, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001815 self.assertEqual(len(self.get_defects(inner)), 1)
1816 unless(isinstance(self.get_defects(inner)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001817 errors.StartBoundaryNotFoundDefect))
1818
1819 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001820 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001821 msg = self._msgobj('msg_25.txt')
1822 unless(isinstance(msg.get_payload(), str))
R David Murray3edd22a2011-04-18 13:59:37 -04001823 self.assertEqual(len(self.get_defects(msg)), 2)
1824 unless(isinstance(self.get_defects(msg)[0],
1825 errors.NoBoundaryInMultipartDefect))
1826 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 errors.MultipartInvariantViolationDefect))
1828
R David Murray749073a2011-06-22 13:47:53 -04001829 multipart_msg = textwrap.dedent("""\
1830 Date: Wed, 14 Nov 2007 12:56:23 GMT
1831 From: foo@bar.invalid
1832 To: foo@bar.invalid
1833 Subject: Content-Transfer-Encoding: base64 and multipart
1834 MIME-Version: 1.0
1835 Content-Type: multipart/mixed;
1836 boundary="===============3344438784458119861=="{}
1837
1838 --===============3344438784458119861==
1839 Content-Type: text/plain
1840
1841 Test message
1842
1843 --===============3344438784458119861==
1844 Content-Type: application/octet-stream
1845 Content-Transfer-Encoding: base64
1846
1847 YWJj
1848
1849 --===============3344438784458119861==--
1850 """)
1851
1852 def test_multipart_invalid_cte(self):
1853 msg = email.message_from_string(
1854 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"),
1855 policy = self.policy)
1856 self.assertEqual(len(self.get_defects(msg)), 1)
1857 self.assertIsInstance(self.get_defects(msg)[0],
1858 errors.InvalidMultipartContentTransferEncodingDefect)
1859
1860 def test_multipart_no_cte_no_defect(self):
1861 msg = email.message_from_string(
1862 self.multipart_msg.format(''),
1863 policy = self.policy)
1864 self.assertEqual(len(self.get_defects(msg)), 0)
1865
1866 def test_multipart_valid_cte_no_defect(self):
1867 for cte in ('7bit', '8bit', 'BINary'):
1868 msg = email.message_from_string(
1869 self.multipart_msg.format(
1870 "\nContent-Transfer-Encoding: {}".format(cte)),
1871 policy = self.policy)
1872 self.assertEqual(len(self.get_defects(msg)), 0)
1873
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001874 def test_invalid_content_type(self):
1875 eq = self.assertEqual
1876 neq = self.ndiffAssertEqual
1877 msg = Message()
1878 # RFC 2045, $5.2 says invalid yields text/plain
1879 msg['Content-Type'] = 'text'
1880 eq(msg.get_content_maintype(), 'text')
1881 eq(msg.get_content_subtype(), 'plain')
1882 eq(msg.get_content_type(), 'text/plain')
1883 # Clear the old value and try something /really/ invalid
1884 del msg['content-type']
1885 msg['Content-Type'] = 'foo'
1886 eq(msg.get_content_maintype(), 'text')
1887 eq(msg.get_content_subtype(), 'plain')
1888 eq(msg.get_content_type(), 'text/plain')
1889 # Still, make sure that the message is idempotently generated
1890 s = StringIO()
1891 g = Generator(s)
1892 g.flatten(msg)
1893 neq(s.getvalue(), 'Content-Type: foo\n\n')
1894
1895 def test_no_start_boundary(self):
1896 eq = self.ndiffAssertEqual
1897 msg = self._msgobj('msg_31.txt')
1898 eq(msg.get_payload(), """\
1899--BOUNDARY
1900Content-Type: text/plain
1901
1902message 1
1903
1904--BOUNDARY
1905Content-Type: text/plain
1906
1907message 2
1908
1909--BOUNDARY--
1910""")
1911
1912 def test_no_separating_blank_line(self):
1913 eq = self.ndiffAssertEqual
1914 msg = self._msgobj('msg_35.txt')
1915 eq(msg.as_string(), """\
1916From: aperson@dom.ain
1917To: bperson@dom.ain
1918Subject: here's something interesting
1919
1920counter to RFC 2822, there's no separating newline here
1921""")
1922
1923 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001924 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001925 msg = self._msgobj('msg_41.txt')
1926 unless(hasattr(msg, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001927 self.assertEqual(len(self.get_defects(msg)), 2)
1928 unless(isinstance(self.get_defects(msg)[0],
1929 errors.NoBoundaryInMultipartDefect))
1930 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001931 errors.MultipartInvariantViolationDefect))
1932
1933 def test_missing_start_boundary(self):
1934 outer = self._msgobj('msg_42.txt')
1935 # The message structure is:
1936 #
1937 # multipart/mixed
1938 # text/plain
1939 # message/rfc822
1940 # multipart/mixed [*]
1941 #
1942 # [*] This message is missing its start boundary
1943 bad = outer.get_payload(1).get_payload(0)
R David Murray3edd22a2011-04-18 13:59:37 -04001944 self.assertEqual(len(self.get_defects(bad)), 1)
1945 self.assertTrue(isinstance(self.get_defects(bad)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 errors.StartBoundaryNotFoundDefect))
1947
1948 def test_first_line_is_continuation_header(self):
1949 eq = self.assertEqual
1950 m = ' Line 1\nLine 2\nLine 3'
R David Murray3edd22a2011-04-18 13:59:37 -04001951 msg = email.message_from_string(m, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001952 eq(msg.keys(), [])
1953 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murray3edd22a2011-04-18 13:59:37 -04001954 eq(len(self.get_defects(msg)), 1)
1955 self.assertTrue(isinstance(self.get_defects(msg)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001956 errors.FirstHeaderLineIsContinuationDefect))
R David Murray3edd22a2011-04-18 13:59:37 -04001957 eq(self.get_defects(msg)[0].line, ' Line 1\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958
1959
R David Murray3edd22a2011-04-18 13:59:37 -04001960class TestNonConformant(TestNonConformantBase, TestEmailBase):
1961
1962 policy=email.policy.default
1963
1964 def get_defects(self, obj):
1965 return obj.defects
1966
1967
1968class TestNonConformantCapture(TestNonConformantBase, TestEmailBase):
1969
1970 class CapturePolicy(email.policy.Policy):
1971 captured = None
1972 def register_defect(self, obj, defect):
1973 self.captured.append(defect)
1974
1975 def setUp(self):
1976 self.policy = self.CapturePolicy(captured=list())
1977
1978 def get_defects(self, obj):
1979 return self.policy.captured
1980
1981
1982class TestRaisingDefects(TestEmailBase):
1983
1984 def _msgobj(self, filename):
1985 with openfile(filename) as fp:
1986 return email.message_from_file(fp, policy=email.policy.strict)
1987
1988 def test_same_boundary_inner_outer(self):
1989 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
1990 self._msgobj('msg_15.txt')
1991
1992 def test_multipart_no_boundary(self):
1993 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1994 self._msgobj('msg_25.txt')
1995
1996 def test_lying_multipart(self):
1997 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1998 self._msgobj('msg_41.txt')
1999
2000
2001 def test_missing_start_boundary(self):
2002 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2003 self._msgobj('msg_42.txt')
2004
2005 def test_first_line_is_continuation_header(self):
2006 m = ' Line 1\nLine 2\nLine 3'
2007 with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
2008 msg = email.message_from_string(m, policy=email.policy.strict)
2009
Ezio Melottib3aedd42010-11-20 19:04:17 +00002010
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002012class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013 def test_rfc2047_multiline(self):
2014 eq = self.assertEqual
2015 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2016 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2017 dh = decode_header(s)
2018 eq(dh, [
2019 (b'Re:', None),
2020 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2021 (b'baz foo bar', None),
2022 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2023 header = make_header(dh)
2024 eq(str(header),
2025 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002026 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002027Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2028 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002029
2030 def test_whitespace_eater_unicode(self):
2031 eq = self.assertEqual
2032 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2033 dh = decode_header(s)
2034 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2035 (b'Pirard <pirard@dom.ain>', None)])
2036 header = str(make_header(dh))
2037 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2038
2039 def test_whitespace_eater_unicode_2(self):
2040 eq = self.assertEqual
2041 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2042 dh = decode_header(s)
2043 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2044 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2045 hu = str(make_header(dh))
2046 eq(hu, 'The quick brown fox jumped over the lazy dog')
2047
2048 def test_rfc2047_missing_whitespace(self):
2049 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2050 dh = decode_header(s)
2051 self.assertEqual(dh, [(s, None)])
2052
2053 def test_rfc2047_with_whitespace(self):
2054 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2055 dh = decode_header(s)
2056 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2057 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2058 (b'sbord', None)])
2059
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002060 def test_rfc2047_B_bad_padding(self):
2061 s = '=?iso-8859-1?B?%s?='
2062 data = [ # only test complete bytes
2063 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2064 ('dmk=', b'vi'), ('dmk', b'vi')
2065 ]
2066 for q, a in data:
2067 dh = decode_header(s % q)
2068 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002069
R. David Murray31e984c2010-10-01 15:40:20 +00002070 def test_rfc2047_Q_invalid_digits(self):
2071 # issue 10004.
2072 s = '=?iso-8659-1?Q?andr=e9=zz?='
2073 self.assertEqual(decode_header(s),
2074 [(b'andr\xe9=zz', 'iso-8659-1')])
2075
Ezio Melottib3aedd42010-11-20 19:04:17 +00002076
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002077# Test the MIMEMessage class
2078class TestMIMEMessage(TestEmailBase):
2079 def setUp(self):
2080 with openfile('msg_11.txt') as fp:
2081 self._text = fp.read()
2082
2083 def test_type_error(self):
2084 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2085
2086 def test_valid_argument(self):
2087 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002088 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089 subject = 'A sub-message'
2090 m = Message()
2091 m['Subject'] = subject
2092 r = MIMEMessage(m)
2093 eq(r.get_content_type(), 'message/rfc822')
2094 payload = r.get_payload()
2095 unless(isinstance(payload, list))
2096 eq(len(payload), 1)
2097 subpart = payload[0]
2098 unless(subpart is m)
2099 eq(subpart['subject'], subject)
2100
2101 def test_bad_multipart(self):
2102 eq = self.assertEqual
2103 msg1 = Message()
2104 msg1['Subject'] = 'subpart 1'
2105 msg2 = Message()
2106 msg2['Subject'] = 'subpart 2'
2107 r = MIMEMessage(msg1)
2108 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2109
2110 def test_generate(self):
2111 # First craft the message to be encapsulated
2112 m = Message()
2113 m['Subject'] = 'An enclosed message'
2114 m.set_payload('Here is the body of the message.\n')
2115 r = MIMEMessage(m)
2116 r['Subject'] = 'The enclosing message'
2117 s = StringIO()
2118 g = Generator(s)
2119 g.flatten(r)
2120 self.assertEqual(s.getvalue(), """\
2121Content-Type: message/rfc822
2122MIME-Version: 1.0
2123Subject: The enclosing message
2124
2125Subject: An enclosed message
2126
2127Here is the body of the message.
2128""")
2129
2130 def test_parse_message_rfc822(self):
2131 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002132 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002133 msg = self._msgobj('msg_11.txt')
2134 eq(msg.get_content_type(), 'message/rfc822')
2135 payload = msg.get_payload()
2136 unless(isinstance(payload, list))
2137 eq(len(payload), 1)
2138 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002139 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140 eq(submsg['subject'], 'An enclosed message')
2141 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2142
2143 def test_dsn(self):
2144 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002145 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002146 # msg 16 is a Delivery Status Notification, see RFC 1894
2147 msg = self._msgobj('msg_16.txt')
2148 eq(msg.get_content_type(), 'multipart/report')
2149 unless(msg.is_multipart())
2150 eq(len(msg.get_payload()), 3)
2151 # Subpart 1 is a text/plain, human readable section
2152 subpart = msg.get_payload(0)
2153 eq(subpart.get_content_type(), 'text/plain')
2154 eq(subpart.get_payload(), """\
2155This report relates to a message you sent with the following header fields:
2156
2157 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2158 Date: Sun, 23 Sep 2001 20:10:55 -0700
2159 From: "Ian T. Henry" <henryi@oxy.edu>
2160 To: SoCal Raves <scr@socal-raves.org>
2161 Subject: [scr] yeah for Ians!!
2162
2163Your message cannot be delivered to the following recipients:
2164
2165 Recipient address: jangel1@cougar.noc.ucla.edu
2166 Reason: recipient reached disk quota
2167
2168""")
2169 # Subpart 2 contains the machine parsable DSN information. It
2170 # consists of two blocks of headers, represented by two nested Message
2171 # objects.
2172 subpart = msg.get_payload(1)
2173 eq(subpart.get_content_type(), 'message/delivery-status')
2174 eq(len(subpart.get_payload()), 2)
2175 # message/delivery-status should treat each block as a bunch of
2176 # headers, i.e. a bunch of Message objects.
2177 dsn1 = subpart.get_payload(0)
2178 unless(isinstance(dsn1, Message))
2179 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2180 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2181 # Try a missing one <wink>
2182 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2183 dsn2 = subpart.get_payload(1)
2184 unless(isinstance(dsn2, Message))
2185 eq(dsn2['action'], 'failed')
2186 eq(dsn2.get_params(header='original-recipient'),
2187 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2188 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2189 # Subpart 3 is the original message
2190 subpart = msg.get_payload(2)
2191 eq(subpart.get_content_type(), 'message/rfc822')
2192 payload = subpart.get_payload()
2193 unless(isinstance(payload, list))
2194 eq(len(payload), 1)
2195 subsubpart = payload[0]
2196 unless(isinstance(subsubpart, Message))
2197 eq(subsubpart.get_content_type(), 'text/plain')
2198 eq(subsubpart['message-id'],
2199 '<002001c144a6$8752e060$56104586@oxy.edu>')
2200
2201 def test_epilogue(self):
2202 eq = self.ndiffAssertEqual
2203 with openfile('msg_21.txt') as fp:
2204 text = fp.read()
2205 msg = Message()
2206 msg['From'] = 'aperson@dom.ain'
2207 msg['To'] = 'bperson@dom.ain'
2208 msg['Subject'] = 'Test'
2209 msg.preamble = 'MIME message'
2210 msg.epilogue = 'End of MIME message\n'
2211 msg1 = MIMEText('One')
2212 msg2 = MIMEText('Two')
2213 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2214 msg.attach(msg1)
2215 msg.attach(msg2)
2216 sfp = StringIO()
2217 g = Generator(sfp)
2218 g.flatten(msg)
2219 eq(sfp.getvalue(), text)
2220
2221 def test_no_nl_preamble(self):
2222 eq = self.ndiffAssertEqual
2223 msg = Message()
2224 msg['From'] = 'aperson@dom.ain'
2225 msg['To'] = 'bperson@dom.ain'
2226 msg['Subject'] = 'Test'
2227 msg.preamble = 'MIME message'
2228 msg.epilogue = ''
2229 msg1 = MIMEText('One')
2230 msg2 = MIMEText('Two')
2231 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2232 msg.attach(msg1)
2233 msg.attach(msg2)
2234 eq(msg.as_string(), """\
2235From: aperson@dom.ain
2236To: bperson@dom.ain
2237Subject: Test
2238Content-Type: multipart/mixed; boundary="BOUNDARY"
2239
2240MIME message
2241--BOUNDARY
2242Content-Type: text/plain; charset="us-ascii"
2243MIME-Version: 1.0
2244Content-Transfer-Encoding: 7bit
2245
2246One
2247--BOUNDARY
2248Content-Type: text/plain; charset="us-ascii"
2249MIME-Version: 1.0
2250Content-Transfer-Encoding: 7bit
2251
2252Two
2253--BOUNDARY--
2254""")
2255
2256 def test_default_type(self):
2257 eq = self.assertEqual
2258 with openfile('msg_30.txt') as fp:
2259 msg = email.message_from_file(fp)
2260 container1 = msg.get_payload(0)
2261 eq(container1.get_default_type(), 'message/rfc822')
2262 eq(container1.get_content_type(), 'message/rfc822')
2263 container2 = msg.get_payload(1)
2264 eq(container2.get_default_type(), 'message/rfc822')
2265 eq(container2.get_content_type(), 'message/rfc822')
2266 container1a = container1.get_payload(0)
2267 eq(container1a.get_default_type(), 'text/plain')
2268 eq(container1a.get_content_type(), 'text/plain')
2269 container2a = container2.get_payload(0)
2270 eq(container2a.get_default_type(), 'text/plain')
2271 eq(container2a.get_content_type(), 'text/plain')
2272
2273 def test_default_type_with_explicit_container_type(self):
2274 eq = self.assertEqual
2275 with openfile('msg_28.txt') as fp:
2276 msg = email.message_from_file(fp)
2277 container1 = msg.get_payload(0)
2278 eq(container1.get_default_type(), 'message/rfc822')
2279 eq(container1.get_content_type(), 'message/rfc822')
2280 container2 = msg.get_payload(1)
2281 eq(container2.get_default_type(), 'message/rfc822')
2282 eq(container2.get_content_type(), 'message/rfc822')
2283 container1a = container1.get_payload(0)
2284 eq(container1a.get_default_type(), 'text/plain')
2285 eq(container1a.get_content_type(), 'text/plain')
2286 container2a = container2.get_payload(0)
2287 eq(container2a.get_default_type(), 'text/plain')
2288 eq(container2a.get_content_type(), 'text/plain')
2289
2290 def test_default_type_non_parsed(self):
2291 eq = self.assertEqual
2292 neq = self.ndiffAssertEqual
2293 # Set up container
2294 container = MIMEMultipart('digest', 'BOUNDARY')
2295 container.epilogue = ''
2296 # Set up subparts
2297 subpart1a = MIMEText('message 1\n')
2298 subpart2a = MIMEText('message 2\n')
2299 subpart1 = MIMEMessage(subpart1a)
2300 subpart2 = MIMEMessage(subpart2a)
2301 container.attach(subpart1)
2302 container.attach(subpart2)
2303 eq(subpart1.get_content_type(), 'message/rfc822')
2304 eq(subpart1.get_default_type(), 'message/rfc822')
2305 eq(subpart2.get_content_type(), 'message/rfc822')
2306 eq(subpart2.get_default_type(), 'message/rfc822')
2307 neq(container.as_string(0), '''\
2308Content-Type: multipart/digest; boundary="BOUNDARY"
2309MIME-Version: 1.0
2310
2311--BOUNDARY
2312Content-Type: message/rfc822
2313MIME-Version: 1.0
2314
2315Content-Type: text/plain; charset="us-ascii"
2316MIME-Version: 1.0
2317Content-Transfer-Encoding: 7bit
2318
2319message 1
2320
2321--BOUNDARY
2322Content-Type: message/rfc822
2323MIME-Version: 1.0
2324
2325Content-Type: text/plain; charset="us-ascii"
2326MIME-Version: 1.0
2327Content-Transfer-Encoding: 7bit
2328
2329message 2
2330
2331--BOUNDARY--
2332''')
2333 del subpart1['content-type']
2334 del subpart1['mime-version']
2335 del subpart2['content-type']
2336 del subpart2['mime-version']
2337 eq(subpart1.get_content_type(), 'message/rfc822')
2338 eq(subpart1.get_default_type(), 'message/rfc822')
2339 eq(subpart2.get_content_type(), 'message/rfc822')
2340 eq(subpart2.get_default_type(), 'message/rfc822')
2341 neq(container.as_string(0), '''\
2342Content-Type: multipart/digest; boundary="BOUNDARY"
2343MIME-Version: 1.0
2344
2345--BOUNDARY
2346
2347Content-Type: text/plain; charset="us-ascii"
2348MIME-Version: 1.0
2349Content-Transfer-Encoding: 7bit
2350
2351message 1
2352
2353--BOUNDARY
2354
2355Content-Type: text/plain; charset="us-ascii"
2356MIME-Version: 1.0
2357Content-Transfer-Encoding: 7bit
2358
2359message 2
2360
2361--BOUNDARY--
2362''')
2363
2364 def test_mime_attachments_in_constructor(self):
2365 eq = self.assertEqual
2366 text1 = MIMEText('')
2367 text2 = MIMEText('')
2368 msg = MIMEMultipart(_subparts=(text1, text2))
2369 eq(len(msg.get_payload()), 2)
2370 eq(msg.get_payload(0), text1)
2371 eq(msg.get_payload(1), text2)
2372
Christian Heimes587c2bf2008-01-19 16:21:02 +00002373 def test_default_multipart_constructor(self):
2374 msg = MIMEMultipart()
2375 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376
Ezio Melottib3aedd42010-11-20 19:04:17 +00002377
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002378# A general test of parser->model->generator idempotency. IOW, read a message
2379# in, parse it into a message object tree, then without touching the tree,
2380# regenerate the plain text. The original text and the transformed text
2381# should be identical. Note: that we ignore the Unix-From since that may
2382# contain a changed date.
2383class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002384
2385 linesep = '\n'
2386
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002387 def _msgobj(self, filename):
2388 with openfile(filename) as fp:
2389 data = fp.read()
2390 msg = email.message_from_string(data)
2391 return msg, data
2392
R. David Murray719a4492010-11-21 16:53:48 +00002393 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002394 eq = self.ndiffAssertEqual
2395 s = StringIO()
2396 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002397 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002398 eq(text, s.getvalue())
2399
2400 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002401 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002402 msg, text = self._msgobj('msg_01.txt')
2403 eq(msg.get_content_type(), 'text/plain')
2404 eq(msg.get_content_maintype(), 'text')
2405 eq(msg.get_content_subtype(), 'plain')
2406 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2407 eq(msg.get_param('charset'), 'us-ascii')
2408 eq(msg.preamble, None)
2409 eq(msg.epilogue, None)
2410 self._idempotent(msg, text)
2411
2412 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002413 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 msg, text = self._msgobj('msg_03.txt')
2415 eq(msg.get_content_type(), 'text/plain')
2416 eq(msg.get_params(), None)
2417 eq(msg.get_param('charset'), None)
2418 self._idempotent(msg, text)
2419
2420 def test_simple_multipart(self):
2421 msg, text = self._msgobj('msg_04.txt')
2422 self._idempotent(msg, text)
2423
2424 def test_MIME_digest(self):
2425 msg, text = self._msgobj('msg_02.txt')
2426 self._idempotent(msg, text)
2427
2428 def test_long_header(self):
2429 msg, text = self._msgobj('msg_27.txt')
2430 self._idempotent(msg, text)
2431
2432 def test_MIME_digest_with_part_headers(self):
2433 msg, text = self._msgobj('msg_28.txt')
2434 self._idempotent(msg, text)
2435
2436 def test_mixed_with_image(self):
2437 msg, text = self._msgobj('msg_06.txt')
2438 self._idempotent(msg, text)
2439
2440 def test_multipart_report(self):
2441 msg, text = self._msgobj('msg_05.txt')
2442 self._idempotent(msg, text)
2443
2444 def test_dsn(self):
2445 msg, text = self._msgobj('msg_16.txt')
2446 self._idempotent(msg, text)
2447
2448 def test_preamble_epilogue(self):
2449 msg, text = self._msgobj('msg_21.txt')
2450 self._idempotent(msg, text)
2451
2452 def test_multipart_one_part(self):
2453 msg, text = self._msgobj('msg_23.txt')
2454 self._idempotent(msg, text)
2455
2456 def test_multipart_no_parts(self):
2457 msg, text = self._msgobj('msg_24.txt')
2458 self._idempotent(msg, text)
2459
2460 def test_no_start_boundary(self):
2461 msg, text = self._msgobj('msg_31.txt')
2462 self._idempotent(msg, text)
2463
2464 def test_rfc2231_charset(self):
2465 msg, text = self._msgobj('msg_32.txt')
2466 self._idempotent(msg, text)
2467
2468 def test_more_rfc2231_parameters(self):
2469 msg, text = self._msgobj('msg_33.txt')
2470 self._idempotent(msg, text)
2471
2472 def test_text_plain_in_a_multipart_digest(self):
2473 msg, text = self._msgobj('msg_34.txt')
2474 self._idempotent(msg, text)
2475
2476 def test_nested_multipart_mixeds(self):
2477 msg, text = self._msgobj('msg_12a.txt')
2478 self._idempotent(msg, text)
2479
2480 def test_message_external_body_idempotent(self):
2481 msg, text = self._msgobj('msg_36.txt')
2482 self._idempotent(msg, text)
2483
R. David Murray719a4492010-11-21 16:53:48 +00002484 def test_message_delivery_status(self):
2485 msg, text = self._msgobj('msg_43.txt')
2486 self._idempotent(msg, text, unixfrom=True)
2487
R. David Murray96fd54e2010-10-08 15:55:28 +00002488 def test_message_signed_idempotent(self):
2489 msg, text = self._msgobj('msg_45.txt')
2490 self._idempotent(msg, text)
2491
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002492 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002493 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002494 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002495 # Get a message object and reset the seek pointer for other tests
2496 msg, text = self._msgobj('msg_05.txt')
2497 eq(msg.get_content_type(), 'multipart/report')
2498 # Test the Content-Type: parameters
2499 params = {}
2500 for pk, pv in msg.get_params():
2501 params[pk] = pv
2502 eq(params['report-type'], 'delivery-status')
2503 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002504 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2505 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002506 eq(len(msg.get_payload()), 3)
2507 # Make sure the subparts are what we expect
2508 msg1 = msg.get_payload(0)
2509 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002510 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002511 msg2 = msg.get_payload(1)
2512 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002513 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002514 msg3 = msg.get_payload(2)
2515 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002516 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002517 payload = msg3.get_payload()
2518 unless(isinstance(payload, list))
2519 eq(len(payload), 1)
2520 msg4 = payload[0]
2521 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002522 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523
2524 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002525 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002526 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002527 msg, text = self._msgobj('msg_06.txt')
2528 # Check some of the outer headers
2529 eq(msg.get_content_type(), 'message/rfc822')
2530 # Make sure the payload is a list of exactly one sub-Message, and that
2531 # that submessage has a type of text/plain
2532 payload = msg.get_payload()
2533 unless(isinstance(payload, list))
2534 eq(len(payload), 1)
2535 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002536 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002537 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002538 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002539 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002540
2541
Ezio Melottib3aedd42010-11-20 19:04:17 +00002542
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002543# Test various other bits of the package's functionality
2544class TestMiscellaneous(TestEmailBase):
2545 def test_message_from_string(self):
2546 with openfile('msg_01.txt') as fp:
2547 text = fp.read()
2548 msg = email.message_from_string(text)
2549 s = StringIO()
2550 # Don't wrap/continue long headers since we're trying to test
2551 # idempotency.
2552 g = Generator(s, maxheaderlen=0)
2553 g.flatten(msg)
2554 self.assertEqual(text, s.getvalue())
2555
2556 def test_message_from_file(self):
2557 with openfile('msg_01.txt') as fp:
2558 text = fp.read()
2559 fp.seek(0)
2560 msg = email.message_from_file(fp)
2561 s = StringIO()
2562 # Don't wrap/continue long headers since we're trying to test
2563 # idempotency.
2564 g = Generator(s, maxheaderlen=0)
2565 g.flatten(msg)
2566 self.assertEqual(text, s.getvalue())
2567
2568 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002569 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002570 with openfile('msg_01.txt') as fp:
2571 text = fp.read()
2572
2573 # Create a subclass
2574 class MyMessage(Message):
2575 pass
2576
2577 msg = email.message_from_string(text, MyMessage)
2578 unless(isinstance(msg, MyMessage))
2579 # Try something more complicated
2580 with openfile('msg_02.txt') as fp:
2581 text = fp.read()
2582 msg = email.message_from_string(text, MyMessage)
2583 for subpart in msg.walk():
2584 unless(isinstance(subpart, MyMessage))
2585
2586 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002587 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002588 # Create a subclass
2589 class MyMessage(Message):
2590 pass
2591
2592 with openfile('msg_01.txt') as fp:
2593 msg = email.message_from_file(fp, MyMessage)
2594 unless(isinstance(msg, MyMessage))
2595 # Try something more complicated
2596 with openfile('msg_02.txt') as fp:
2597 msg = email.message_from_file(fp, MyMessage)
2598 for subpart in msg.walk():
2599 unless(isinstance(subpart, MyMessage))
2600
2601 def test__all__(self):
2602 module = __import__('email')
2603 # Can't use sorted() here due to Python 2.3 compatibility
2604 all = module.__all__[:]
2605 all.sort()
2606 self.assertEqual(all, [
2607 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002608 'header', 'iterators', 'message', 'message_from_binary_file',
2609 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002610 'message_from_string', 'mime', 'parser',
2611 'quoprimime', 'utils',
2612 ])
2613
2614 def test_formatdate(self):
2615 now = time.time()
2616 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2617 time.gmtime(now)[:6])
2618
2619 def test_formatdate_localtime(self):
2620 now = time.time()
2621 self.assertEqual(
2622 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2623 time.localtime(now)[:6])
2624
2625 def test_formatdate_usegmt(self):
2626 now = time.time()
2627 self.assertEqual(
2628 utils.formatdate(now, localtime=False),
2629 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2630 self.assertEqual(
2631 utils.formatdate(now, localtime=False, usegmt=True),
2632 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2633
2634 def test_parsedate_none(self):
2635 self.assertEqual(utils.parsedate(''), None)
2636
2637 def test_parsedate_compact(self):
2638 # The FWS after the comma is optional
2639 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2640 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2641
2642 def test_parsedate_no_dayofweek(self):
2643 eq = self.assertEqual
2644 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2645 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2646
2647 def test_parsedate_compact_no_dayofweek(self):
2648 eq = self.assertEqual
2649 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2650 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2651
R. David Murray4a62e892010-12-23 20:35:46 +00002652 def test_parsedate_no_space_before_positive_offset(self):
2653 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2654 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2655
2656 def test_parsedate_no_space_before_negative_offset(self):
2657 # Issue 1155362: we already handled '+' for this case.
2658 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2659 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2660
2661
R David Murrayaccd1c02011-03-13 20:06:23 -04002662 def test_parsedate_accepts_time_with_dots(self):
2663 eq = self.assertEqual
2664 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2665 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2666 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2667 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2668
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002669 def test_parsedate_acceptable_to_time_functions(self):
2670 eq = self.assertEqual
2671 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2672 t = int(time.mktime(timetup))
2673 eq(time.localtime(t)[:6], timetup[:6])
2674 eq(int(time.strftime('%Y', timetup)), 2003)
2675 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2676 t = int(time.mktime(timetup[:9]))
2677 eq(time.localtime(t)[:6], timetup[:6])
2678 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2679
R. David Murray219d1c82010-08-25 00:45:55 +00002680 def test_parsedate_y2k(self):
2681 """Test for parsing a date with a two-digit year.
2682
2683 Parsing a date with a two-digit year should return the correct
2684 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2685 obsoletes RFC822) requires four-digit years.
2686
2687 """
2688 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2689 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2690 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2691 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2692
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002693 def test_parseaddr_empty(self):
2694 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2695 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2696
2697 def test_noquote_dump(self):
2698 self.assertEqual(
2699 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2700 'A Silly Person <person@dom.ain>')
2701
2702 def test_escape_dump(self):
2703 self.assertEqual(
2704 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2705 r'"A \(Very\) Silly Person" <person@dom.ain>')
2706 a = r'A \(Special\) Person'
2707 b = 'person@dom.ain'
2708 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2709
2710 def test_escape_backslashes(self):
2711 self.assertEqual(
2712 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2713 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2714 a = r'Arthur \Backslash\ Foobar'
2715 b = 'person@dom.ain'
2716 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2717
R David Murray8debacb2011-04-06 09:35:57 -04002718 def test_quotes_unicode_names(self):
2719 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2720 name = "H\u00e4ns W\u00fcrst"
2721 addr = 'person@dom.ain'
2722 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2723 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2724 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2725 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2726 latin1_quopri)
2727
2728 def test_accepts_any_charset_like_object(self):
2729 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2730 name = "H\u00e4ns W\u00fcrst"
2731 addr = 'person@dom.ain'
2732 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2733 foobar = "FOOBAR"
2734 class CharsetMock:
2735 def header_encode(self, string):
2736 return foobar
2737 mock = CharsetMock()
2738 mock_expected = "%s <%s>" % (foobar, addr)
2739 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2740 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2741 utf8_base64)
2742
2743 def test_invalid_charset_like_object_raises_error(self):
2744 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2745 name = "H\u00e4ns W\u00fcrst"
2746 addr = 'person@dom.ain'
2747 # A object without a header_encode method:
2748 bad_charset = object()
2749 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2750 bad_charset)
2751
2752 def test_unicode_address_raises_error(self):
2753 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2754 addr = 'pers\u00f6n@dom.in'
2755 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2756 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2757
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002758 def test_name_with_dot(self):
2759 x = 'John X. Doe <jxd@example.com>'
2760 y = '"John X. Doe" <jxd@example.com>'
2761 a, b = ('John X. Doe', 'jxd@example.com')
2762 self.assertEqual(utils.parseaddr(x), (a, b))
2763 self.assertEqual(utils.parseaddr(y), (a, b))
2764 # formataddr() quotes the name if there's a dot in it
2765 self.assertEqual(utils.formataddr((a, b)), y)
2766
R. David Murray5397e862010-10-02 15:58:26 +00002767 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2768 # issue 10005. Note that in the third test the second pair of
2769 # backslashes is not actually a quoted pair because it is not inside a
2770 # comment or quoted string: the address being parsed has a quoted
2771 # string containing a quoted backslash, followed by 'example' and two
2772 # backslashes, followed by another quoted string containing a space and
2773 # the word 'example'. parseaddr copies those two backslashes
2774 # literally. Per rfc5322 this is not technically correct since a \ may
2775 # not appear in an address outside of a quoted string. It is probably
2776 # a sensible Postel interpretation, though.
2777 eq = self.assertEqual
2778 eq(utils.parseaddr('""example" example"@example.com'),
2779 ('', '""example" example"@example.com'))
2780 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2781 ('', '"\\"example\\" example"@example.com'))
2782 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2783 ('', '"\\\\"example\\\\" example"@example.com'))
2784
R. David Murray63563cd2010-12-18 18:25:38 +00002785 def test_parseaddr_preserves_spaces_in_local_part(self):
2786 # issue 9286. A normal RFC5322 local part should not contain any
2787 # folding white space, but legacy local parts can (they are a sequence
2788 # of atoms, not dotatoms). On the other hand we strip whitespace from
2789 # before the @ and around dots, on the assumption that the whitespace
2790 # around the punctuation is a mistake in what would otherwise be
2791 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2792 self.assertEqual(('', "merwok wok@xample.com"),
2793 utils.parseaddr("merwok wok@xample.com"))
2794 self.assertEqual(('', "merwok wok@xample.com"),
2795 utils.parseaddr("merwok wok@xample.com"))
2796 self.assertEqual(('', "merwok wok@xample.com"),
2797 utils.parseaddr(" merwok wok @xample.com"))
2798 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2799 utils.parseaddr('merwok"wok" wok@xample.com'))
2800 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2801 utils.parseaddr('merwok. wok . wok@xample.com'))
2802
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002803 def test_multiline_from_comment(self):
2804 x = """\
2805Foo
2806\tBar <foo@example.com>"""
2807 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2808
2809 def test_quote_dump(self):
2810 self.assertEqual(
2811 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2812 r'"A Silly; Person" <person@dom.ain>')
2813
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002814 def test_charset_richcomparisons(self):
2815 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002816 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817 cset1 = Charset()
2818 cset2 = Charset()
2819 eq(cset1, 'us-ascii')
2820 eq(cset1, 'US-ASCII')
2821 eq(cset1, 'Us-AsCiI')
2822 eq('us-ascii', cset1)
2823 eq('US-ASCII', cset1)
2824 eq('Us-AsCiI', cset1)
2825 ne(cset1, 'usascii')
2826 ne(cset1, 'USASCII')
2827 ne(cset1, 'UsAsCiI')
2828 ne('usascii', cset1)
2829 ne('USASCII', cset1)
2830 ne('UsAsCiI', cset1)
2831 eq(cset1, cset2)
2832 eq(cset2, cset1)
2833
2834 def test_getaddresses(self):
2835 eq = self.assertEqual
2836 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2837 'Bud Person <bperson@dom.ain>']),
2838 [('Al Person', 'aperson@dom.ain'),
2839 ('Bud Person', 'bperson@dom.ain')])
2840
2841 def test_getaddresses_nasty(self):
2842 eq = self.assertEqual
2843 eq(utils.getaddresses(['foo: ;']), [('', '')])
2844 eq(utils.getaddresses(
2845 ['[]*-- =~$']),
2846 [('', ''), ('', ''), ('', '*--')])
2847 eq(utils.getaddresses(
2848 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2849 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2850
2851 def test_getaddresses_embedded_comment(self):
2852 """Test proper handling of a nested comment"""
2853 eq = self.assertEqual
2854 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2855 eq(addrs[0][1], 'foo@bar.com')
2856
2857 def test_utils_quote_unquote(self):
2858 eq = self.assertEqual
2859 msg = Message()
2860 msg.add_header('content-disposition', 'attachment',
2861 filename='foo\\wacky"name')
2862 eq(msg.get_filename(), 'foo\\wacky"name')
2863
2864 def test_get_body_encoding_with_bogus_charset(self):
2865 charset = Charset('not a charset')
2866 self.assertEqual(charset.get_body_encoding(), 'base64')
2867
2868 def test_get_body_encoding_with_uppercase_charset(self):
2869 eq = self.assertEqual
2870 msg = Message()
2871 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2872 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2873 charsets = msg.get_charsets()
2874 eq(len(charsets), 1)
2875 eq(charsets[0], 'utf-8')
2876 charset = Charset(charsets[0])
2877 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002878 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002879 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2880 eq(msg.get_payload(decode=True), b'hello world')
2881 eq(msg['content-transfer-encoding'], 'base64')
2882 # Try another one
2883 msg = Message()
2884 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2885 charsets = msg.get_charsets()
2886 eq(len(charsets), 1)
2887 eq(charsets[0], 'us-ascii')
2888 charset = Charset(charsets[0])
2889 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2890 msg.set_payload('hello world', charset=charset)
2891 eq(msg.get_payload(), 'hello world')
2892 eq(msg['content-transfer-encoding'], '7bit')
2893
2894 def test_charsets_case_insensitive(self):
2895 lc = Charset('us-ascii')
2896 uc = Charset('US-ASCII')
2897 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2898
2899 def test_partial_falls_inside_message_delivery_status(self):
2900 eq = self.ndiffAssertEqual
2901 # The Parser interface provides chunks of data to FeedParser in 8192
2902 # byte gulps. SF bug #1076485 found one of those chunks inside
2903 # message/delivery-status header block, which triggered an
2904 # unreadline() of NeedMoreData.
2905 msg = self._msgobj('msg_43.txt')
2906 sfp = StringIO()
2907 iterators._structure(msg, sfp)
2908 eq(sfp.getvalue(), """\
2909multipart/report
2910 text/plain
2911 message/delivery-status
2912 text/plain
2913 text/plain
2914 text/plain
2915 text/plain
2916 text/plain
2917 text/plain
2918 text/plain
2919 text/plain
2920 text/plain
2921 text/plain
2922 text/plain
2923 text/plain
2924 text/plain
2925 text/plain
2926 text/plain
2927 text/plain
2928 text/plain
2929 text/plain
2930 text/plain
2931 text/plain
2932 text/plain
2933 text/plain
2934 text/plain
2935 text/plain
2936 text/plain
2937 text/plain
2938 text/rfc822-headers
2939""")
2940
R. David Murraya0b44b52010-12-02 21:47:19 +00002941 def test_make_msgid_domain(self):
2942 self.assertEqual(
2943 email.utils.make_msgid(domain='testdomain-string')[-19:],
2944 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002945
Ezio Melottib3aedd42010-11-20 19:04:17 +00002946
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002947# Test the iterator/generators
2948class TestIterators(TestEmailBase):
2949 def test_body_line_iterator(self):
2950 eq = self.assertEqual
2951 neq = self.ndiffAssertEqual
2952 # First a simple non-multipart message
2953 msg = self._msgobj('msg_01.txt')
2954 it = iterators.body_line_iterator(msg)
2955 lines = list(it)
2956 eq(len(lines), 6)
2957 neq(EMPTYSTRING.join(lines), msg.get_payload())
2958 # Now a more complicated multipart
2959 msg = self._msgobj('msg_02.txt')
2960 it = iterators.body_line_iterator(msg)
2961 lines = list(it)
2962 eq(len(lines), 43)
2963 with openfile('msg_19.txt') as fp:
2964 neq(EMPTYSTRING.join(lines), fp.read())
2965
2966 def test_typed_subpart_iterator(self):
2967 eq = self.assertEqual
2968 msg = self._msgobj('msg_04.txt')
2969 it = iterators.typed_subpart_iterator(msg, 'text')
2970 lines = []
2971 subparts = 0
2972 for subpart in it:
2973 subparts += 1
2974 lines.append(subpart.get_payload())
2975 eq(subparts, 2)
2976 eq(EMPTYSTRING.join(lines), """\
2977a simple kind of mirror
2978to reflect upon our own
2979a simple kind of mirror
2980to reflect upon our own
2981""")
2982
2983 def test_typed_subpart_iterator_default_type(self):
2984 eq = self.assertEqual
2985 msg = self._msgobj('msg_03.txt')
2986 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2987 lines = []
2988 subparts = 0
2989 for subpart in it:
2990 subparts += 1
2991 lines.append(subpart.get_payload())
2992 eq(subparts, 1)
2993 eq(EMPTYSTRING.join(lines), """\
2994
2995Hi,
2996
2997Do you like this message?
2998
2999-Me
3000""")
3001
R. David Murray45bf773f2010-07-17 01:19:57 +00003002 def test_pushCR_LF(self):
3003 '''FeedParser BufferedSubFile.push() assumed it received complete
3004 line endings. A CR ending one push() followed by a LF starting
3005 the next push() added an empty line.
3006 '''
3007 imt = [
3008 ("a\r \n", 2),
3009 ("b", 0),
3010 ("c\n", 1),
3011 ("", 0),
3012 ("d\r\n", 1),
3013 ("e\r", 0),
3014 ("\nf", 1),
3015 ("\r\n", 1),
3016 ]
3017 from email.feedparser import BufferedSubFile, NeedMoreData
3018 bsf = BufferedSubFile()
3019 om = []
3020 nt = 0
3021 for il, n in imt:
3022 bsf.push(il)
3023 nt += n
3024 n1 = 0
3025 while True:
3026 ol = bsf.readline()
3027 if ol == NeedMoreData:
3028 break
3029 om.append(ol)
3030 n1 += 1
3031 self.assertTrue(n == n1)
3032 self.assertTrue(len(om) == nt)
3033 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3034
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035
Ezio Melottib3aedd42010-11-20 19:04:17 +00003036
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003037class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003038
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039 def test_header_parser(self):
3040 eq = self.assertEqual
3041 # Parse only the headers of a complex multipart MIME document
3042 with openfile('msg_02.txt') as fp:
3043 msg = HeaderParser().parse(fp)
3044 eq(msg['from'], 'ppp-request@zzz.org')
3045 eq(msg['to'], 'ppp@zzz.org')
3046 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003047 self.assertFalse(msg.is_multipart())
3048 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003049
R David Murrayb35c8502011-04-13 16:46:05 -04003050 def test_bytes_header_parser(self):
3051 eq = self.assertEqual
3052 # Parse only the headers of a complex multipart MIME document
3053 with openfile('msg_02.txt', 'rb') as fp:
3054 msg = email.parser.BytesHeaderParser().parse(fp)
3055 eq(msg['from'], 'ppp-request@zzz.org')
3056 eq(msg['to'], 'ppp@zzz.org')
3057 eq(msg.get_content_type(), 'multipart/mixed')
3058 self.assertFalse(msg.is_multipart())
3059 self.assertTrue(isinstance(msg.get_payload(), str))
3060 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3061
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003062 def test_whitespace_continuation(self):
3063 eq = self.assertEqual
3064 # This message contains a line after the Subject: header that has only
3065 # whitespace, but it is not empty!
3066 msg = email.message_from_string("""\
3067From: aperson@dom.ain
3068To: bperson@dom.ain
3069Subject: the next line has a space on it
3070\x20
3071Date: Mon, 8 Apr 2002 15:09:19 -0400
3072Message-ID: spam
3073
3074Here's the message body
3075""")
3076 eq(msg['subject'], 'the next line has a space on it\n ')
3077 eq(msg['message-id'], 'spam')
3078 eq(msg.get_payload(), "Here's the message body\n")
3079
3080 def test_whitespace_continuation_last_header(self):
3081 eq = self.assertEqual
3082 # Like the previous test, but the subject line is the last
3083 # header.
3084 msg = email.message_from_string("""\
3085From: aperson@dom.ain
3086To: bperson@dom.ain
3087Date: Mon, 8 Apr 2002 15:09:19 -0400
3088Message-ID: spam
3089Subject: the next line has a space on it
3090\x20
3091
3092Here's the message body
3093""")
3094 eq(msg['subject'], 'the next line has a space on it\n ')
3095 eq(msg['message-id'], 'spam')
3096 eq(msg.get_payload(), "Here's the message body\n")
3097
3098 def test_crlf_separation(self):
3099 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003100 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003101 msg = Parser().parse(fp)
3102 eq(len(msg.get_payload()), 2)
3103 part1 = msg.get_payload(0)
3104 eq(part1.get_content_type(), 'text/plain')
3105 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3106 part2 = msg.get_payload(1)
3107 eq(part2.get_content_type(), 'application/riscos')
3108
R. David Murray8451c4b2010-10-23 22:19:56 +00003109 def test_crlf_flatten(self):
3110 # Using newline='\n' preserves the crlfs in this input file.
3111 with openfile('msg_26.txt', newline='\n') as fp:
3112 text = fp.read()
3113 msg = email.message_from_string(text)
3114 s = StringIO()
3115 g = Generator(s)
3116 g.flatten(msg, linesep='\r\n')
3117 self.assertEqual(s.getvalue(), text)
3118
R David Murray3edd22a2011-04-18 13:59:37 -04003119 def test_crlf_control_via_policy(self):
3120 with openfile('msg_26.txt', newline='\n') as fp:
3121 text = fp.read()
3122 msg = email.message_from_string(text)
3123 s = StringIO()
3124 g = email.generator.Generator(s, policy=email.policy.SMTP)
3125 g.flatten(msg)
3126 self.assertEqual(s.getvalue(), text)
3127
3128 def test_flatten_linesep_overrides_policy(self):
3129 # msg_27 is lf separated
3130 with openfile('msg_27.txt', newline='\n') as fp:
3131 text = fp.read()
3132 msg = email.message_from_string(text)
3133 s = StringIO()
3134 g = email.generator.Generator(s, policy=email.policy.SMTP)
3135 g.flatten(msg, linesep='\n')
3136 self.assertEqual(s.getvalue(), text)
3137
R. David Murray8451c4b2010-10-23 22:19:56 +00003138 maxDiff = None
3139
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003140 def test_multipart_digest_with_extra_mime_headers(self):
3141 eq = self.assertEqual
3142 neq = self.ndiffAssertEqual
3143 with openfile('msg_28.txt') as fp:
3144 msg = email.message_from_file(fp)
3145 # Structure is:
3146 # multipart/digest
3147 # message/rfc822
3148 # text/plain
3149 # message/rfc822
3150 # text/plain
3151 eq(msg.is_multipart(), 1)
3152 eq(len(msg.get_payload()), 2)
3153 part1 = msg.get_payload(0)
3154 eq(part1.get_content_type(), 'message/rfc822')
3155 eq(part1.is_multipart(), 1)
3156 eq(len(part1.get_payload()), 1)
3157 part1a = part1.get_payload(0)
3158 eq(part1a.is_multipart(), 0)
3159 eq(part1a.get_content_type(), 'text/plain')
3160 neq(part1a.get_payload(), 'message 1\n')
3161 # next message/rfc822
3162 part2 = msg.get_payload(1)
3163 eq(part2.get_content_type(), 'message/rfc822')
3164 eq(part2.is_multipart(), 1)
3165 eq(len(part2.get_payload()), 1)
3166 part2a = part2.get_payload(0)
3167 eq(part2a.is_multipart(), 0)
3168 eq(part2a.get_content_type(), 'text/plain')
3169 neq(part2a.get_payload(), 'message 2\n')
3170
3171 def test_three_lines(self):
3172 # A bug report by Andrew McNamara
3173 lines = ['From: Andrew Person <aperson@dom.ain',
3174 'Subject: Test',
3175 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3176 msg = email.message_from_string(NL.join(lines))
3177 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3178
3179 def test_strip_line_feed_and_carriage_return_in_headers(self):
3180 eq = self.assertEqual
3181 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3182 value1 = 'text'
3183 value2 = 'more text'
3184 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3185 value1, value2)
3186 msg = email.message_from_string(m)
3187 eq(msg.get('Header'), value1)
3188 eq(msg.get('Next-Header'), value2)
3189
3190 def test_rfc2822_header_syntax(self):
3191 eq = self.assertEqual
3192 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3193 msg = email.message_from_string(m)
3194 eq(len(msg), 3)
3195 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3196 eq(msg.get_payload(), 'body')
3197
3198 def test_rfc2822_space_not_allowed_in_header(self):
3199 eq = self.assertEqual
3200 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3201 msg = email.message_from_string(m)
3202 eq(len(msg.keys()), 0)
3203
3204 def test_rfc2822_one_character_header(self):
3205 eq = self.assertEqual
3206 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3207 msg = email.message_from_string(m)
3208 headers = msg.keys()
3209 headers.sort()
3210 eq(headers, ['A', 'B', 'CC'])
3211 eq(msg.get_payload(), 'body')
3212
R. David Murray45e0e142010-06-16 02:19:40 +00003213 def test_CRLFLF_at_end_of_part(self):
3214 # issue 5610: feedparser should not eat two chars from body part ending
3215 # with "\r\n\n".
3216 m = (
3217 "From: foo@bar.com\n"
3218 "To: baz\n"
3219 "Mime-Version: 1.0\n"
3220 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3221 "\n"
3222 "--BOUNDARY\n"
3223 "Content-Type: text/plain\n"
3224 "\n"
3225 "body ending with CRLF newline\r\n"
3226 "\n"
3227 "--BOUNDARY--\n"
3228 )
3229 msg = email.message_from_string(m)
3230 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003231
Ezio Melottib3aedd42010-11-20 19:04:17 +00003232
R. David Murray96fd54e2010-10-08 15:55:28 +00003233class Test8BitBytesHandling(unittest.TestCase):
3234 # In Python3 all input is string, but that doesn't work if the actual input
3235 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3236 # decode byte streams using the surrogateescape error handler, and
3237 # reconvert to binary at appropriate places if we detect surrogates. This
3238 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3239 # but it does allow us to parse and preserve them, and to decode body
3240 # parts that use an 8bit CTE.
3241
3242 bodytest_msg = textwrap.dedent("""\
3243 From: foo@bar.com
3244 To: baz
3245 Mime-Version: 1.0
3246 Content-Type: text/plain; charset={charset}
3247 Content-Transfer-Encoding: {cte}
3248
3249 {bodyline}
3250 """)
3251
3252 def test_known_8bit_CTE(self):
3253 m = self.bodytest_msg.format(charset='utf-8',
3254 cte='8bit',
3255 bodyline='pöstal').encode('utf-8')
3256 msg = email.message_from_bytes(m)
3257 self.assertEqual(msg.get_payload(), "pöstal\n")
3258 self.assertEqual(msg.get_payload(decode=True),
3259 "pöstal\n".encode('utf-8'))
3260
3261 def test_unknown_8bit_CTE(self):
3262 m = self.bodytest_msg.format(charset='notavalidcharset',
3263 cte='8bit',
3264 bodyline='pöstal').encode('utf-8')
3265 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003266 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003267 self.assertEqual(msg.get_payload(decode=True),
3268 "pöstal\n".encode('utf-8'))
3269
3270 def test_8bit_in_quopri_body(self):
3271 # This is non-RFC compliant data...without 'decode' the library code
3272 # decodes the body using the charset from the headers, and because the
3273 # source byte really is utf-8 this works. This is likely to fail
3274 # against real dirty data (ie: produce mojibake), but the data is
3275 # invalid anyway so it is as good a guess as any. But this means that
3276 # this test just confirms the current behavior; that behavior is not
3277 # necessarily the best possible behavior. With 'decode' it is
3278 # returning the raw bytes, so that test should be of correct behavior,
3279 # or at least produce the same result that email4 did.
3280 m = self.bodytest_msg.format(charset='utf-8',
3281 cte='quoted-printable',
3282 bodyline='p=C3=B6stál').encode('utf-8')
3283 msg = email.message_from_bytes(m)
3284 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3285 self.assertEqual(msg.get_payload(decode=True),
3286 'pöstál\n'.encode('utf-8'))
3287
3288 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3289 # This is similar to the previous test, but proves that if the 8bit
3290 # byte is undecodeable in the specified charset, it gets replaced
3291 # by the unicode 'unknown' character. Again, this may or may not
3292 # be the ideal behavior. Note that if decode=False none of the
3293 # decoders will get involved, so this is the only test we need
3294 # for this behavior.
3295 m = self.bodytest_msg.format(charset='ascii',
3296 cte='quoted-printable',
3297 bodyline='p=C3=B6stál').encode('utf-8')
3298 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003299 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003300 self.assertEqual(msg.get_payload(decode=True),
3301 'pöstál\n'.encode('utf-8'))
3302
3303 def test_8bit_in_base64_body(self):
3304 # Sticking an 8bit byte in a base64 block makes it undecodable by
3305 # normal means, so the block is returned undecoded, but as bytes.
3306 m = self.bodytest_msg.format(charset='utf-8',
3307 cte='base64',
3308 bodyline='cMO2c3RhbAá=').encode('utf-8')
3309 msg = email.message_from_bytes(m)
3310 self.assertEqual(msg.get_payload(decode=True),
3311 'cMO2c3RhbAá=\n'.encode('utf-8'))
3312
3313 def test_8bit_in_uuencode_body(self):
3314 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3315 # normal means, so the block is returned undecoded, but as bytes.
3316 m = self.bodytest_msg.format(charset='utf-8',
3317 cte='uuencode',
3318 bodyline='<,.V<W1A; á ').encode('utf-8')
3319 msg = email.message_from_bytes(m)
3320 self.assertEqual(msg.get_payload(decode=True),
3321 '<,.V<W1A; á \n'.encode('utf-8'))
3322
3323
R. David Murray92532142011-01-07 23:25:30 +00003324 headertest_headers = (
3325 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3326 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3327 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3328 '\tJean de Baddie',
3329 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3330 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3331 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3332 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3333 )
3334 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3335 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003336
3337 def test_get_8bit_header(self):
3338 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003339 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3340 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003341
3342 def test_print_8bit_headers(self):
3343 msg = email.message_from_bytes(self.headertest_msg)
3344 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003345 textwrap.dedent("""\
3346 From: {}
3347 To: {}
3348 Subject: {}
3349 From: {}
3350
3351 Yes, they are flying.
3352 """).format(*[expected[1] for (_, expected) in
3353 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003354
3355 def test_values_with_8bit_headers(self):
3356 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003357 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003358 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003359 'b\uFFFD\uFFFDz',
3360 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3361 'coll\uFFFD\uFFFDgue, le pouf '
3362 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003363 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003364 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003365
3366 def test_items_with_8bit_headers(self):
3367 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003368 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003369 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003370 ('To', 'b\uFFFD\uFFFDz'),
3371 ('Subject', 'Maintenant je vous '
3372 'pr\uFFFD\uFFFDsente '
3373 'mon coll\uFFFD\uFFFDgue, le pouf '
3374 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3375 '\tJean de Baddie'),
3376 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003377
3378 def test_get_all_with_8bit_headers(self):
3379 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003380 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003381 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003382 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003383
R David Murraya2150232011-03-16 21:11:23 -04003384 def test_get_content_type_with_8bit(self):
3385 msg = email.message_from_bytes(textwrap.dedent("""\
3386 Content-Type: text/pl\xA7in; charset=utf-8
3387 """).encode('latin-1'))
3388 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3389 self.assertEqual(msg.get_content_maintype(), "text")
3390 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3391
3392 def test_get_params_with_8bit(self):
3393 msg = email.message_from_bytes(
3394 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3395 self.assertEqual(msg.get_params(header='x-header'),
3396 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3397 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3398 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3399 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3400
3401 def test_get_rfc2231_params_with_8bit(self):
3402 msg = email.message_from_bytes(textwrap.dedent("""\
3403 Content-Type: text/plain; charset=us-ascii;
3404 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3405 ).encode('latin-1'))
3406 self.assertEqual(msg.get_param('title'),
3407 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3408
3409 def test_set_rfc2231_params_with_8bit(self):
3410 msg = email.message_from_bytes(textwrap.dedent("""\
3411 Content-Type: text/plain; charset=us-ascii;
3412 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3413 ).encode('latin-1'))
3414 msg.set_param('title', 'test')
3415 self.assertEqual(msg.get_param('title'), 'test')
3416
3417 def test_del_rfc2231_params_with_8bit(self):
3418 msg = email.message_from_bytes(textwrap.dedent("""\
3419 Content-Type: text/plain; charset=us-ascii;
3420 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3421 ).encode('latin-1'))
3422 msg.del_param('title')
3423 self.assertEqual(msg.get_param('title'), None)
3424 self.assertEqual(msg.get_content_maintype(), 'text')
3425
3426 def test_get_payload_with_8bit_cte_header(self):
3427 msg = email.message_from_bytes(textwrap.dedent("""\
3428 Content-Transfer-Encoding: b\xa7se64
3429 Content-Type: text/plain; charset=latin-1
3430
3431 payload
3432 """).encode('latin-1'))
3433 self.assertEqual(msg.get_payload(), 'payload\n')
3434 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3435
R. David Murray96fd54e2010-10-08 15:55:28 +00003436 non_latin_bin_msg = textwrap.dedent("""\
3437 From: foo@bar.com
3438 To: báz
3439 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3440 \tJean de Baddie
3441 Mime-Version: 1.0
3442 Content-Type: text/plain; charset="utf-8"
3443 Content-Transfer-Encoding: 8bit
3444
3445 Да, они летят.
3446 """).encode('utf-8')
3447
3448 def test_bytes_generator(self):
3449 msg = email.message_from_bytes(self.non_latin_bin_msg)
3450 out = BytesIO()
3451 email.generator.BytesGenerator(out).flatten(msg)
3452 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3453
R. David Murray7372a072011-01-26 21:21:32 +00003454 def test_bytes_generator_handles_None_body(self):
3455 #Issue 11019
3456 msg = email.message.Message()
3457 out = BytesIO()
3458 email.generator.BytesGenerator(out).flatten(msg)
3459 self.assertEqual(out.getvalue(), b"\n")
3460
R. David Murray92532142011-01-07 23:25:30 +00003461 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003462 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003463 To: =?unknown-8bit?q?b=C3=A1z?=
3464 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3465 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3466 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003467 Mime-Version: 1.0
3468 Content-Type: text/plain; charset="utf-8"
3469 Content-Transfer-Encoding: base64
3470
3471 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3472 """)
3473
3474 def test_generator_handles_8bit(self):
3475 msg = email.message_from_bytes(self.non_latin_bin_msg)
3476 out = StringIO()
3477 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003478 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003479
3480 def test_bytes_generator_with_unix_from(self):
3481 # The unixfrom contains a current date, so we can't check it
3482 # literally. Just make sure the first word is 'From' and the
3483 # rest of the message matches the input.
3484 msg = email.message_from_bytes(self.non_latin_bin_msg)
3485 out = BytesIO()
3486 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3487 lines = out.getvalue().split(b'\n')
3488 self.assertEqual(lines[0].split()[0], b'From')
3489 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3490
R. David Murray92532142011-01-07 23:25:30 +00003491 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3492 non_latin_bin_msg_as7bit[2:4] = [
3493 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3494 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3495 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3496
R. David Murray96fd54e2010-10-08 15:55:28 +00003497 def test_message_from_binary_file(self):
3498 fn = 'test.msg'
3499 self.addCleanup(unlink, fn)
3500 with open(fn, 'wb') as testfile:
3501 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003502 with open(fn, 'rb') as testfile:
3503 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003504 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3505
3506 latin_bin_msg = textwrap.dedent("""\
3507 From: foo@bar.com
3508 To: Dinsdale
3509 Subject: Nudge nudge, wink, wink
3510 Mime-Version: 1.0
3511 Content-Type: text/plain; charset="latin-1"
3512 Content-Transfer-Encoding: 8bit
3513
3514 oh là là, know what I mean, know what I mean?
3515 """).encode('latin-1')
3516
3517 latin_bin_msg_as7bit = textwrap.dedent("""\
3518 From: foo@bar.com
3519 To: Dinsdale
3520 Subject: Nudge nudge, wink, wink
3521 Mime-Version: 1.0
3522 Content-Type: text/plain; charset="iso-8859-1"
3523 Content-Transfer-Encoding: quoted-printable
3524
3525 oh l=E0 l=E0, know what I mean, know what I mean?
3526 """)
3527
3528 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3529 m = email.message_from_bytes(self.latin_bin_msg)
3530 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3531
3532 def test_decoded_generator_emits_unicode_body(self):
3533 m = email.message_from_bytes(self.latin_bin_msg)
3534 out = StringIO()
3535 email.generator.DecodedGenerator(out).flatten(m)
3536 #DecodedHeader output contains an extra blank line compared
3537 #to the input message. RDM: not sure if this is a bug or not,
3538 #but it is not specific to the 8bit->7bit conversion.
3539 self.assertEqual(out.getvalue(),
3540 self.latin_bin_msg.decode('latin-1')+'\n')
3541
3542 def test_bytes_feedparser(self):
3543 bfp = email.feedparser.BytesFeedParser()
3544 for i in range(0, len(self.latin_bin_msg), 10):
3545 bfp.feed(self.latin_bin_msg[i:i+10])
3546 m = bfp.close()
3547 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3548
R. David Murray8451c4b2010-10-23 22:19:56 +00003549 def test_crlf_flatten(self):
3550 with openfile('msg_26.txt', 'rb') as fp:
3551 text = fp.read()
3552 msg = email.message_from_bytes(text)
3553 s = BytesIO()
3554 g = email.generator.BytesGenerator(s)
3555 g.flatten(msg, linesep='\r\n')
3556 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003557
3558 def test_8bit_multipart(self):
3559 # Issue 11605
3560 source = textwrap.dedent("""\
3561 Date: Fri, 18 Mar 2011 17:15:43 +0100
3562 To: foo@example.com
3563 From: foodwatch-Newsletter <bar@example.com>
3564 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3565 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3566 MIME-Version: 1.0
3567 Content-Type: multipart/alternative;
3568 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3569
3570 --b1_76a486bee62b0d200f33dc2ca08220ad
3571 Content-Type: text/plain; charset="utf-8"
3572 Content-Transfer-Encoding: 8bit
3573
3574 Guten Tag, ,
3575
3576 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3577 Nachrichten aus Japan.
3578
3579
3580 --b1_76a486bee62b0d200f33dc2ca08220ad
3581 Content-Type: text/html; charset="utf-8"
3582 Content-Transfer-Encoding: 8bit
3583
3584 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3585 "http://www.w3.org/TR/html4/loose.dtd">
3586 <html lang="de">
3587 <head>
3588 <title>foodwatch - Newsletter</title>
3589 </head>
3590 <body>
3591 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3592 die Nachrichten aus Japan.</p>
3593 </body>
3594 </html>
3595 --b1_76a486bee62b0d200f33dc2ca08220ad--
3596
3597 """).encode('utf-8')
3598 msg = email.message_from_bytes(source)
3599 s = BytesIO()
3600 g = email.generator.BytesGenerator(s)
3601 g.flatten(msg)
3602 self.assertEqual(s.getvalue(), source)
3603
R David Murray9fd170e2012-03-14 14:05:03 -04003604 def test_bytes_generator_b_encoding_linesep(self):
3605 # Issue 14062: b encoding was tacking on an extra \n.
3606 m = Message()
3607 # This has enough non-ascii that it should always end up b encoded.
3608 m['Subject'] = Header('žluťoučký kůň')
3609 s = BytesIO()
3610 g = email.generator.BytesGenerator(s)
3611 g.flatten(m, linesep='\r\n')
3612 self.assertEqual(
3613 s.getvalue(),
3614 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3615
3616 def test_generator_b_encoding_linesep(self):
3617 # Since this broke in ByteGenerator, test Generator for completeness.
3618 m = Message()
3619 # This has enough non-ascii that it should always end up b encoded.
3620 m['Subject'] = Header('žluťoučký kůň')
3621 s = StringIO()
3622 g = email.generator.Generator(s)
3623 g.flatten(m, linesep='\r\n')
3624 self.assertEqual(
3625 s.getvalue(),
3626 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3627
R David Murray3edd22a2011-04-18 13:59:37 -04003628 def test_crlf_control_via_policy(self):
3629 # msg_26 is crlf terminated
3630 with openfile('msg_26.txt', 'rb') as fp:
3631 text = fp.read()
3632 msg = email.message_from_bytes(text)
3633 s = BytesIO()
3634 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3635 g.flatten(msg)
3636 self.assertEqual(s.getvalue(), text)
3637
3638 def test_flatten_linesep_overrides_policy(self):
3639 # msg_27 is lf separated
3640 with openfile('msg_27.txt', 'rb') as fp:
3641 text = fp.read()
3642 msg = email.message_from_bytes(text)
3643 s = BytesIO()
3644 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3645 g.flatten(msg, linesep='\n')
3646 self.assertEqual(s.getvalue(), text)
3647
3648 def test_must_be_7bit_handles_unknown_8bit(self):
3649 msg = email.message_from_bytes(self.non_latin_bin_msg)
3650 out = BytesIO()
3651 g = email.generator.BytesGenerator(out,
3652 policy=email.policy.default.clone(must_be_7bit=True))
3653 g.flatten(msg)
3654 self.assertEqual(out.getvalue(),
3655 self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
3656
3657 def test_must_be_7bit_transforms_8bit_cte(self):
3658 msg = email.message_from_bytes(self.latin_bin_msg)
3659 out = BytesIO()
3660 g = email.generator.BytesGenerator(out,
3661 policy=email.policy.default.clone(must_be_7bit=True))
3662 g.flatten(msg)
3663 self.assertEqual(out.getvalue(),
3664 self.latin_bin_msg_as7bit.encode('ascii'))
3665
R. David Murray8451c4b2010-10-23 22:19:56 +00003666 maxDiff = None
3667
Ezio Melottib3aedd42010-11-20 19:04:17 +00003668
R. David Murray719a4492010-11-21 16:53:48 +00003669class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003670
R. David Murraye5db2632010-11-20 15:10:13 +00003671 maxDiff = None
3672
R. David Murray96fd54e2010-10-08 15:55:28 +00003673 def _msgobj(self, filename):
3674 with openfile(filename, 'rb') as fp:
3675 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003676 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003677 msg = email.message_from_bytes(data)
3678 return msg, data
3679
R. David Murray719a4492010-11-21 16:53:48 +00003680 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003681 b = BytesIO()
3682 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003683 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003684 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003685
3686
R. David Murray719a4492010-11-21 16:53:48 +00003687class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3688 TestIdempotent):
3689 linesep = '\n'
3690 blinesep = b'\n'
3691 normalize_linesep_regex = re.compile(br'\r\n')
3692
3693
3694class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3695 TestIdempotent):
3696 linesep = '\r\n'
3697 blinesep = b'\r\n'
3698 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3699
Ezio Melottib3aedd42010-11-20 19:04:17 +00003700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003701class TestBase64(unittest.TestCase):
3702 def test_len(self):
3703 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003704 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003705 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003706 for size in range(15):
3707 if size == 0 : bsize = 0
3708 elif size <= 3 : bsize = 4
3709 elif size <= 6 : bsize = 8
3710 elif size <= 9 : bsize = 12
3711 elif size <= 12: bsize = 16
3712 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003713 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003714
3715 def test_decode(self):
3716 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003717 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719
3720 def test_encode(self):
3721 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003722 eq(base64mime.body_encode(b''), b'')
3723 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003724 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003725 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003726 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003727 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003728eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3729eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3730eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3731eHh4eCB4eHh4IA==
3732""")
3733 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003734 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003735 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3737eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3738eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3739eHh4eCB4eHh4IA==\r
3740""")
3741
3742 def test_header_encode(self):
3743 eq = self.assertEqual
3744 he = base64mime.header_encode
3745 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003746 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3747 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748 # Test the charset option
3749 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3750 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003751
3752
Ezio Melottib3aedd42010-11-20 19:04:17 +00003753
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003754class TestQuopri(unittest.TestCase):
3755 def setUp(self):
3756 # Set of characters (as byte integers) that don't need to be encoded
3757 # in headers.
3758 self.hlit = list(chain(
3759 range(ord('a'), ord('z') + 1),
3760 range(ord('A'), ord('Z') + 1),
3761 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003762 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003763 # Set of characters (as byte integers) that do need to be encoded in
3764 # headers.
3765 self.hnon = [c for c in range(256) if c not in self.hlit]
3766 assert len(self.hlit) + len(self.hnon) == 256
3767 # Set of characters (as byte integers) that don't need to be encoded
3768 # in bodies.
3769 self.blit = list(range(ord(' '), ord('~') + 1))
3770 self.blit.append(ord('\t'))
3771 self.blit.remove(ord('='))
3772 # Set of characters (as byte integers) that do need to be encoded in
3773 # bodies.
3774 self.bnon = [c for c in range(256) if c not in self.blit]
3775 assert len(self.blit) + len(self.bnon) == 256
3776
Guido van Rossum9604e662007-08-30 03:46:43 +00003777 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003778 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003779 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003780 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003781 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003782 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003783 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003784
Guido van Rossum9604e662007-08-30 03:46:43 +00003785 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003786 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003787 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003788 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003789 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003790 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003791 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003792
3793 def test_header_quopri_len(self):
3794 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 eq(quoprimime.header_length(b'hello'), 5)
3796 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003797 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003798 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799 # =?xxx?q?...?= means 10 extra characters
3800 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003801 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3802 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003803 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003804 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003805 # =?xxx?q?...?= means 10 extra characters
3806 10)
3807 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003808 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003809 'expected length 1 for %r' % chr(c))
3810 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003811 # Space is special; it's encoded to _
3812 if c == ord(' '):
3813 continue
3814 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003815 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003816 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003817
3818 def test_body_quopri_len(self):
3819 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003820 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003821 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003822 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003823 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003824
3825 def test_quote_unquote_idempotent(self):
3826 for x in range(256):
3827 c = chr(x)
3828 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3829
R David Murrayec1b5b82011-03-23 14:19:05 -04003830 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3831 if charset is None:
3832 encoded_header = quoprimime.header_encode(header)
3833 else:
3834 encoded_header = quoprimime.header_encode(header, charset)
3835 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003836
R David Murraycafd79d2011-03-23 15:25:55 -04003837 def test_header_encode_null(self):
3838 self._test_header_encode(b'', '')
3839
R David Murrayec1b5b82011-03-23 14:19:05 -04003840 def test_header_encode_one_word(self):
3841 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3842
3843 def test_header_encode_two_lines(self):
3844 self._test_header_encode(b'hello\nworld',
3845 '=?iso-8859-1?q?hello=0Aworld?=')
3846
3847 def test_header_encode_non_ascii(self):
3848 self._test_header_encode(b'hello\xc7there',
3849 '=?iso-8859-1?q?hello=C7there?=')
3850
3851 def test_header_encode_alt_charset(self):
3852 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3853 charset='iso-8859-2')
3854
3855 def _test_header_decode(self, encoded_header, expected_decoded_header):
3856 decoded_header = quoprimime.header_decode(encoded_header)
3857 self.assertEqual(decoded_header, expected_decoded_header)
3858
3859 def test_header_decode_null(self):
3860 self._test_header_decode('', '')
3861
3862 def test_header_decode_one_word(self):
3863 self._test_header_decode('hello', 'hello')
3864
3865 def test_header_decode_two_lines(self):
3866 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3867
3868 def test_header_decode_non_ascii(self):
3869 self._test_header_decode('hello=C7there', 'hello\xc7there')
3870
3871 def _test_decode(self, encoded, expected_decoded, eol=None):
3872 if eol is None:
3873 decoded = quoprimime.decode(encoded)
3874 else:
3875 decoded = quoprimime.decode(encoded, eol=eol)
3876 self.assertEqual(decoded, expected_decoded)
3877
3878 def test_decode_null_word(self):
3879 self._test_decode('', '')
3880
3881 def test_decode_null_line_null_word(self):
3882 self._test_decode('\r\n', '\n')
3883
3884 def test_decode_one_word(self):
3885 self._test_decode('hello', 'hello')
3886
3887 def test_decode_one_word_eol(self):
3888 self._test_decode('hello', 'hello', eol='X')
3889
3890 def test_decode_one_line(self):
3891 self._test_decode('hello\r\n', 'hello\n')
3892
3893 def test_decode_one_line_lf(self):
3894 self._test_decode('hello\n', 'hello\n')
3895
R David Murraycafd79d2011-03-23 15:25:55 -04003896 def test_decode_one_line_cr(self):
3897 self._test_decode('hello\r', 'hello\n')
3898
3899 def test_decode_one_line_nl(self):
3900 self._test_decode('hello\n', 'helloX', eol='X')
3901
3902 def test_decode_one_line_crnl(self):
3903 self._test_decode('hello\r\n', 'helloX', eol='X')
3904
R David Murrayec1b5b82011-03-23 14:19:05 -04003905 def test_decode_one_line_one_word(self):
3906 self._test_decode('hello\r\nworld', 'hello\nworld')
3907
3908 def test_decode_one_line_one_word_eol(self):
3909 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3910
3911 def test_decode_two_lines(self):
3912 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3913
R David Murraycafd79d2011-03-23 15:25:55 -04003914 def test_decode_two_lines_eol(self):
3915 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3916
R David Murrayec1b5b82011-03-23 14:19:05 -04003917 def test_decode_one_long_line(self):
3918 self._test_decode('Spam' * 250, 'Spam' * 250)
3919
3920 def test_decode_one_space(self):
3921 self._test_decode(' ', '')
3922
3923 def test_decode_multiple_spaces(self):
3924 self._test_decode(' ' * 5, '')
3925
3926 def test_decode_one_line_trailing_spaces(self):
3927 self._test_decode('hello \r\n', 'hello\n')
3928
3929 def test_decode_two_lines_trailing_spaces(self):
3930 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3931
3932 def test_decode_quoted_word(self):
3933 self._test_decode('=22quoted=20words=22', '"quoted words"')
3934
3935 def test_decode_uppercase_quoting(self):
3936 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3937
3938 def test_decode_lowercase_quoting(self):
3939 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3940
3941 def test_decode_soft_line_break(self):
3942 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3943
3944 def test_decode_false_quoting(self):
3945 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3946
3947 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3948 kwargs = {}
3949 if maxlinelen is None:
3950 # Use body_encode's default.
3951 maxlinelen = 76
3952 else:
3953 kwargs['maxlinelen'] = maxlinelen
3954 if eol is None:
3955 # Use body_encode's default.
3956 eol = '\n'
3957 else:
3958 kwargs['eol'] = eol
3959 encoded_body = quoprimime.body_encode(body, **kwargs)
3960 self.assertEqual(encoded_body, expected_encoded_body)
3961 if eol == '\n' or eol == '\r\n':
3962 # We know how to split the result back into lines, so maxlinelen
3963 # can be checked.
3964 for line in encoded_body.splitlines():
3965 self.assertLessEqual(len(line), maxlinelen)
3966
3967 def test_encode_null(self):
3968 self._test_encode('', '')
3969
3970 def test_encode_null_lines(self):
3971 self._test_encode('\n\n', '\n\n')
3972
3973 def test_encode_one_line(self):
3974 self._test_encode('hello\n', 'hello\n')
3975
3976 def test_encode_one_line_crlf(self):
3977 self._test_encode('hello\r\n', 'hello\n')
3978
3979 def test_encode_one_line_eol(self):
3980 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3981
3982 def test_encode_one_space(self):
3983 self._test_encode(' ', '=20')
3984
3985 def test_encode_one_line_one_space(self):
3986 self._test_encode(' \n', '=20\n')
3987
R David Murrayb938c8c2011-03-24 12:19:26 -04003988# XXX: body_encode() expect strings, but uses ord(char) from these strings
3989# to index into a 256-entry list. For code points above 255, this will fail.
3990# Should there be a check for 8-bit only ord() values in body, or at least
3991# a comment about the expected input?
3992
3993 def test_encode_two_lines_one_space(self):
3994 self._test_encode(' \n \n', '=20\n=20\n')
3995
R David Murrayec1b5b82011-03-23 14:19:05 -04003996 def test_encode_one_word_trailing_spaces(self):
3997 self._test_encode('hello ', 'hello =20')
3998
3999 def test_encode_one_line_trailing_spaces(self):
4000 self._test_encode('hello \n', 'hello =20\n')
4001
4002 def test_encode_one_word_trailing_tab(self):
4003 self._test_encode('hello \t', 'hello =09')
4004
4005 def test_encode_one_line_trailing_tab(self):
4006 self._test_encode('hello \t\n', 'hello =09\n')
4007
4008 def test_encode_trailing_space_before_maxlinelen(self):
4009 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4010
R David Murrayb938c8c2011-03-24 12:19:26 -04004011 def test_encode_trailing_space_at_maxlinelen(self):
4012 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4013
R David Murrayec1b5b82011-03-23 14:19:05 -04004014 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004015 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4016
4017 def test_encode_whitespace_lines(self):
4018 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004019
4020 def test_encode_quoted_equals(self):
4021 self._test_encode('a = b', 'a =3D b')
4022
4023 def test_encode_one_long_string(self):
4024 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4025
4026 def test_encode_one_long_line(self):
4027 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4028
4029 def test_encode_one_very_long_line(self):
4030 self._test_encode('x' * 200 + '\n',
4031 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4032
4033 def test_encode_one_long_line(self):
4034 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4035
4036 def test_encode_shortest_maxlinelen(self):
4037 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004038
R David Murrayb938c8c2011-03-24 12:19:26 -04004039 def test_encode_maxlinelen_too_small(self):
4040 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4041
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004042 def test_encode(self):
4043 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004044 eq(quoprimime.body_encode(''), '')
4045 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004046 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004047 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004049 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004050xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4051 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4052x xxxx xxxx xxxx xxxx=20""")
4053 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004054 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4055 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004056xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4057 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4058x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004059 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060one line
4061
4062two line"""), """\
4063one line
4064
4065two line""")
4066
4067
Ezio Melottib3aedd42010-11-20 19:04:17 +00004068
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004069# Test the Charset class
4070class TestCharset(unittest.TestCase):
4071 def tearDown(self):
4072 from email import charset as CharsetModule
4073 try:
4074 del CharsetModule.CHARSETS['fake']
4075 except KeyError:
4076 pass
4077
Guido van Rossum9604e662007-08-30 03:46:43 +00004078 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004079 eq = self.assertEqual
4080 # Make sure us-ascii = no Unicode conversion
4081 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004082 eq(c.header_encode('Hello World!'), 'Hello World!')
4083 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004084 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004085 self.assertRaises(UnicodeError, c.header_encode, s)
4086 c = Charset('utf-8')
4087 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004088
4089 def test_body_encode(self):
4090 eq = self.assertEqual
4091 # Try a charset with QP body encoding
4092 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004093 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004094 # Try a charset with Base64 body encoding
4095 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004096 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097 # Try a charset with None body encoding
4098 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004099 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004100 # Try the convert argument, where input codec != output codec
4101 c = Charset('euc-jp')
4102 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004103 # XXX FIXME
4104## try:
4105## eq('\x1b$B5FCO;~IW\x1b(B',
4106## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4107## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4108## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4109## except LookupError:
4110## # We probably don't have the Japanese codecs installed
4111## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004112 # Testing SF bug #625509, which we have to fake, since there are no
4113 # built-in encodings where the header encoding is QP but the body
4114 # encoding is not.
4115 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004116 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004117 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004118 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004119
4120 def test_unicode_charset_name(self):
4121 charset = Charset('us-ascii')
4122 self.assertEqual(str(charset), 'us-ascii')
4123 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4124
4125
Ezio Melottib3aedd42010-11-20 19:04:17 +00004126
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004127# Test multilingual MIME headers.
4128class TestHeader(TestEmailBase):
4129 def test_simple(self):
4130 eq = self.ndiffAssertEqual
4131 h = Header('Hello World!')
4132 eq(h.encode(), 'Hello World!')
4133 h.append(' Goodbye World!')
4134 eq(h.encode(), 'Hello World! Goodbye World!')
4135
4136 def test_simple_surprise(self):
4137 eq = self.ndiffAssertEqual
4138 h = Header('Hello World!')
4139 eq(h.encode(), 'Hello World!')
4140 h.append('Goodbye World!')
4141 eq(h.encode(), 'Hello World! Goodbye World!')
4142
4143 def test_header_needs_no_decoding(self):
4144 h = 'no decoding needed'
4145 self.assertEqual(decode_header(h), [(h, None)])
4146
4147 def test_long(self):
4148 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4149 maxlinelen=76)
4150 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004151 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004152
4153 def test_multilingual(self):
4154 eq = self.ndiffAssertEqual
4155 g = Charset("iso-8859-1")
4156 cz = Charset("iso-8859-2")
4157 utf8 = Charset("utf-8")
4158 g_head = (b'Die Mieter treten hier ein werden mit einem '
4159 b'Foerderband komfortabel den Korridor entlang, '
4160 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4161 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4162 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4163 b'd\xf9vtipu.. ')
4164 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4165 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4166 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4167 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4168 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4169 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4170 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4171 '\u3044\u307e\u3059\u3002')
4172 h = Header(g_head, g)
4173 h.append(cz_head, cz)
4174 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004175 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004176 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004177=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4178 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4179 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4180 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004181 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4182 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4183 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4184 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004185 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4186 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4187 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4188 decoded = decode_header(enc)
4189 eq(len(decoded), 3)
4190 eq(decoded[0], (g_head, 'iso-8859-1'))
4191 eq(decoded[1], (cz_head, 'iso-8859-2'))
4192 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004193 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004194 eq(ustr,
4195 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4196 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4197 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4198 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4199 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4200 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4201 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4202 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4203 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4204 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4205 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4206 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4207 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4208 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4209 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4210 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4211 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004212 # Test make_header()
4213 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004214 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004215
4216 def test_empty_header_encode(self):
4217 h = Header()
4218 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004219
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004220 def test_header_ctor_default_args(self):
4221 eq = self.ndiffAssertEqual
4222 h = Header()
4223 eq(h, '')
4224 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004225 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004226
4227 def test_explicit_maxlinelen(self):
4228 eq = self.ndiffAssertEqual
4229 hstr = ('A very long line that must get split to something other '
4230 'than at the 76th character boundary to test the non-default '
4231 'behavior')
4232 h = Header(hstr)
4233 eq(h.encode(), '''\
4234A very long line that must get split to something other than at the 76th
4235 character boundary to test the non-default behavior''')
4236 eq(str(h), hstr)
4237 h = Header(hstr, header_name='Subject')
4238 eq(h.encode(), '''\
4239A very long line that must get split to something other than at the
4240 76th character boundary to test the non-default behavior''')
4241 eq(str(h), hstr)
4242 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4243 eq(h.encode(), hstr)
4244 eq(str(h), hstr)
4245
Guido van Rossum9604e662007-08-30 03:46:43 +00004246 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004247 eq = self.ndiffAssertEqual
4248 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004249 x = 'xxxx ' * 20
4250 h.append(x)
4251 s = h.encode()
4252 eq(s, """\
4253=?iso-8859-1?q?xxx?=
4254 =?iso-8859-1?q?x_?=
4255 =?iso-8859-1?q?xx?=
4256 =?iso-8859-1?q?xx?=
4257 =?iso-8859-1?q?_x?=
4258 =?iso-8859-1?q?xx?=
4259 =?iso-8859-1?q?x_?=
4260 =?iso-8859-1?q?xx?=
4261 =?iso-8859-1?q?xx?=
4262 =?iso-8859-1?q?_x?=
4263 =?iso-8859-1?q?xx?=
4264 =?iso-8859-1?q?x_?=
4265 =?iso-8859-1?q?xx?=
4266 =?iso-8859-1?q?xx?=
4267 =?iso-8859-1?q?_x?=
4268 =?iso-8859-1?q?xx?=
4269 =?iso-8859-1?q?x_?=
4270 =?iso-8859-1?q?xx?=
4271 =?iso-8859-1?q?xx?=
4272 =?iso-8859-1?q?_x?=
4273 =?iso-8859-1?q?xx?=
4274 =?iso-8859-1?q?x_?=
4275 =?iso-8859-1?q?xx?=
4276 =?iso-8859-1?q?xx?=
4277 =?iso-8859-1?q?_x?=
4278 =?iso-8859-1?q?xx?=
4279 =?iso-8859-1?q?x_?=
4280 =?iso-8859-1?q?xx?=
4281 =?iso-8859-1?q?xx?=
4282 =?iso-8859-1?q?_x?=
4283 =?iso-8859-1?q?xx?=
4284 =?iso-8859-1?q?x_?=
4285 =?iso-8859-1?q?xx?=
4286 =?iso-8859-1?q?xx?=
4287 =?iso-8859-1?q?_x?=
4288 =?iso-8859-1?q?xx?=
4289 =?iso-8859-1?q?x_?=
4290 =?iso-8859-1?q?xx?=
4291 =?iso-8859-1?q?xx?=
4292 =?iso-8859-1?q?_x?=
4293 =?iso-8859-1?q?xx?=
4294 =?iso-8859-1?q?x_?=
4295 =?iso-8859-1?q?xx?=
4296 =?iso-8859-1?q?xx?=
4297 =?iso-8859-1?q?_x?=
4298 =?iso-8859-1?q?xx?=
4299 =?iso-8859-1?q?x_?=
4300 =?iso-8859-1?q?xx?=
4301 =?iso-8859-1?q?xx?=
4302 =?iso-8859-1?q?_?=""")
4303 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004304 h = Header(charset='iso-8859-1', maxlinelen=40)
4305 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004306 s = h.encode()
4307 eq(s, """\
4308=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4309 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4310 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4311 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4312 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4313 eq(x, str(make_header(decode_header(s))))
4314
4315 def test_base64_splittable(self):
4316 eq = self.ndiffAssertEqual
4317 h = Header(charset='koi8-r', maxlinelen=20)
4318 x = 'xxxx ' * 20
4319 h.append(x)
4320 s = h.encode()
4321 eq(s, """\
4322=?koi8-r?b?eHh4?=
4323 =?koi8-r?b?eCB4?=
4324 =?koi8-r?b?eHh4?=
4325 =?koi8-r?b?IHh4?=
4326 =?koi8-r?b?eHgg?=
4327 =?koi8-r?b?eHh4?=
4328 =?koi8-r?b?eCB4?=
4329 =?koi8-r?b?eHh4?=
4330 =?koi8-r?b?IHh4?=
4331 =?koi8-r?b?eHgg?=
4332 =?koi8-r?b?eHh4?=
4333 =?koi8-r?b?eCB4?=
4334 =?koi8-r?b?eHh4?=
4335 =?koi8-r?b?IHh4?=
4336 =?koi8-r?b?eHgg?=
4337 =?koi8-r?b?eHh4?=
4338 =?koi8-r?b?eCB4?=
4339 =?koi8-r?b?eHh4?=
4340 =?koi8-r?b?IHh4?=
4341 =?koi8-r?b?eHgg?=
4342 =?koi8-r?b?eHh4?=
4343 =?koi8-r?b?eCB4?=
4344 =?koi8-r?b?eHh4?=
4345 =?koi8-r?b?IHh4?=
4346 =?koi8-r?b?eHgg?=
4347 =?koi8-r?b?eHh4?=
4348 =?koi8-r?b?eCB4?=
4349 =?koi8-r?b?eHh4?=
4350 =?koi8-r?b?IHh4?=
4351 =?koi8-r?b?eHgg?=
4352 =?koi8-r?b?eHh4?=
4353 =?koi8-r?b?eCB4?=
4354 =?koi8-r?b?eHh4?=
4355 =?koi8-r?b?IA==?=""")
4356 eq(x, str(make_header(decode_header(s))))
4357 h = Header(charset='koi8-r', maxlinelen=40)
4358 h.append(x)
4359 s = h.encode()
4360 eq(s, """\
4361=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4362 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4363 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4364 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4365 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4366 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4367 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004368
4369 def test_us_ascii_header(self):
4370 eq = self.assertEqual
4371 s = 'hello'
4372 x = decode_header(s)
4373 eq(x, [('hello', None)])
4374 h = make_header(x)
4375 eq(s, h.encode())
4376
4377 def test_string_charset(self):
4378 eq = self.assertEqual
4379 h = Header()
4380 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004381 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004382
4383## def test_unicode_error(self):
4384## raises = self.assertRaises
4385## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4386## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4387## h = Header()
4388## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4389## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4390## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4391
4392 def test_utf8_shortest(self):
4393 eq = self.assertEqual
4394 h = Header('p\xf6stal', 'utf-8')
4395 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4396 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4397 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4398
4399 def test_bad_8bit_header(self):
4400 raises = self.assertRaises
4401 eq = self.assertEqual
4402 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4403 raises(UnicodeError, Header, x)
4404 h = Header()
4405 raises(UnicodeError, h.append, x)
4406 e = x.decode('utf-8', 'replace')
4407 eq(str(Header(x, errors='replace')), e)
4408 h.append(x, errors='replace')
4409 eq(str(h), e)
4410
R David Murray041015c2011-03-25 15:10:55 -04004411 def test_escaped_8bit_header(self):
4412 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004413 e = x.decode('ascii', 'surrogateescape')
4414 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004415 self.assertEqual(str(h),
4416 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4417 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4418
R David Murraye5e366c2011-06-18 12:57:28 -04004419 def test_header_handles_binary_unknown8bit(self):
4420 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4421 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4422 self.assertEqual(str(h),
4423 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4424 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4425
4426 def test_make_header_handles_binary_unknown8bit(self):
4427 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4428 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4429 h2 = email.header.make_header(email.header.decode_header(h))
4430 self.assertEqual(str(h2),
4431 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4432 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4433
R David Murray041015c2011-03-25 15:10:55 -04004434 def test_modify_returned_list_does_not_change_header(self):
4435 h = Header('test')
4436 chunks = email.header.decode_header(h)
4437 chunks.append(('ascii', 'test2'))
4438 self.assertEqual(str(h), 'test')
4439
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004440 def test_encoded_adjacent_nonencoded(self):
4441 eq = self.assertEqual
4442 h = Header()
4443 h.append('hello', 'iso-8859-1')
4444 h.append('world')
4445 s = h.encode()
4446 eq(s, '=?iso-8859-1?q?hello?= world')
4447 h = make_header(decode_header(s))
4448 eq(h.encode(), s)
4449
4450 def test_whitespace_eater(self):
4451 eq = self.assertEqual
4452 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4453 parts = decode_header(s)
4454 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4455 hdr = make_header(parts)
4456 eq(hdr.encode(),
4457 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4458
4459 def test_broken_base64_header(self):
4460 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004461 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004462 raises(errors.HeaderParseError, decode_header, s)
4463
R. David Murray477efb32011-01-05 01:39:32 +00004464 def test_shift_jis_charset(self):
4465 h = Header('文', charset='shift_jis')
4466 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4467
R David Murrayde912762011-03-16 18:26:23 -04004468 def test_flatten_header_with_no_value(self):
4469 # Issue 11401 (regression from email 4.x) Note that the space after
4470 # the header doesn't reflect the input, but this is also the way
4471 # email 4.x behaved. At some point it would be nice to fix that.
4472 msg = email.message_from_string("EmptyHeader:")
4473 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4474
R David Murray01581ee2011-04-18 10:04:34 -04004475 def test_encode_preserves_leading_ws_on_value(self):
4476 msg = Message()
4477 msg['SomeHeader'] = ' value with leading ws'
4478 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4479
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004480
Ezio Melottib3aedd42010-11-20 19:04:17 +00004481
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004482# Test RFC 2231 header parameters (en/de)coding
4483class TestRFC2231(TestEmailBase):
4484 def test_get_param(self):
4485 eq = self.assertEqual
4486 msg = self._msgobj('msg_29.txt')
4487 eq(msg.get_param('title'),
4488 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4489 eq(msg.get_param('title', unquote=False),
4490 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4491
4492 def test_set_param(self):
4493 eq = self.ndiffAssertEqual
4494 msg = Message()
4495 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4496 charset='us-ascii')
4497 eq(msg.get_param('title'),
4498 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4499 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4500 charset='us-ascii', language='en')
4501 eq(msg.get_param('title'),
4502 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4503 msg = self._msgobj('msg_01.txt')
4504 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4505 charset='us-ascii', language='en')
4506 eq(msg.as_string(maxheaderlen=78), """\
4507Return-Path: <bbb@zzz.org>
4508Delivered-To: bbb@zzz.org
4509Received: by mail.zzz.org (Postfix, from userid 889)
4510\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4511MIME-Version: 1.0
4512Content-Transfer-Encoding: 7bit
4513Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4514From: bbb@ddd.com (John X. Doe)
4515To: bbb@zzz.org
4516Subject: This is a test message
4517Date: Fri, 4 May 2001 14:05:44 -0400
4518Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004519 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004520
4521
4522Hi,
4523
4524Do you like this message?
4525
4526-Me
4527""")
4528
R David Murraya2860e82011-04-16 09:20:30 -04004529 def test_set_param_requote(self):
4530 msg = Message()
4531 msg.set_param('title', 'foo')
4532 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4533 msg.set_param('title', 'bar', requote=False)
4534 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4535 # tspecial is still quoted.
4536 msg.set_param('title', "(bar)bell", requote=False)
4537 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4538
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004539 def test_del_param(self):
4540 eq = self.ndiffAssertEqual
4541 msg = self._msgobj('msg_01.txt')
4542 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4543 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4544 charset='us-ascii', language='en')
4545 msg.del_param('foo', header='Content-Type')
4546 eq(msg.as_string(maxheaderlen=78), """\
4547Return-Path: <bbb@zzz.org>
4548Delivered-To: bbb@zzz.org
4549Received: by mail.zzz.org (Postfix, from userid 889)
4550\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4551MIME-Version: 1.0
4552Content-Transfer-Encoding: 7bit
4553Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4554From: bbb@ddd.com (John X. Doe)
4555To: bbb@zzz.org
4556Subject: This is a test message
4557Date: Fri, 4 May 2001 14:05:44 -0400
4558Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004559 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004560
4561
4562Hi,
4563
4564Do you like this message?
4565
4566-Me
4567""")
4568
4569 def test_rfc2231_get_content_charset(self):
4570 eq = self.assertEqual
4571 msg = self._msgobj('msg_32.txt')
4572 eq(msg.get_content_charset(), 'us-ascii')
4573
R. David Murraydfd7eb02010-12-24 22:36:49 +00004574 def test_rfc2231_parse_rfc_quoting(self):
4575 m = textwrap.dedent('''\
4576 Content-Disposition: inline;
4577 \tfilename*0*=''This%20is%20even%20more%20;
4578 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4579 \tfilename*2="is it not.pdf"
4580
4581 ''')
4582 msg = email.message_from_string(m)
4583 self.assertEqual(msg.get_filename(),
4584 'This is even more ***fun*** is it not.pdf')
4585 self.assertEqual(m, msg.as_string())
4586
4587 def test_rfc2231_parse_extra_quoting(self):
4588 m = textwrap.dedent('''\
4589 Content-Disposition: inline;
4590 \tfilename*0*="''This%20is%20even%20more%20";
4591 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4592 \tfilename*2="is it not.pdf"
4593
4594 ''')
4595 msg = email.message_from_string(m)
4596 self.assertEqual(msg.get_filename(),
4597 'This is even more ***fun*** is it not.pdf')
4598 self.assertEqual(m, msg.as_string())
4599
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004600 def test_rfc2231_no_language_or_charset(self):
4601 m = '''\
4602Content-Transfer-Encoding: 8bit
4603Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4604Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4605
4606'''
4607 msg = email.message_from_string(m)
4608 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004609 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004610 self.assertEqual(
4611 param,
4612 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4613
4614 def test_rfc2231_no_language_or_charset_in_filename(self):
4615 m = '''\
4616Content-Disposition: inline;
4617\tfilename*0*="''This%20is%20even%20more%20";
4618\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4619\tfilename*2="is it not.pdf"
4620
4621'''
4622 msg = email.message_from_string(m)
4623 self.assertEqual(msg.get_filename(),
4624 'This is even more ***fun*** is it not.pdf')
4625
4626 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4627 m = '''\
4628Content-Disposition: inline;
4629\tfilename*0*="''This%20is%20even%20more%20";
4630\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4631\tfilename*2="is it not.pdf"
4632
4633'''
4634 msg = email.message_from_string(m)
4635 self.assertEqual(msg.get_filename(),
4636 'This is even more ***fun*** is it not.pdf')
4637
4638 def test_rfc2231_partly_encoded(self):
4639 m = '''\
4640Content-Disposition: inline;
4641\tfilename*0="''This%20is%20even%20more%20";
4642\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4643\tfilename*2="is it not.pdf"
4644
4645'''
4646 msg = email.message_from_string(m)
4647 self.assertEqual(
4648 msg.get_filename(),
4649 'This%20is%20even%20more%20***fun*** is it not.pdf')
4650
4651 def test_rfc2231_partly_nonencoded(self):
4652 m = '''\
4653Content-Disposition: inline;
4654\tfilename*0="This%20is%20even%20more%20";
4655\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4656\tfilename*2="is it not.pdf"
4657
4658'''
4659 msg = email.message_from_string(m)
4660 self.assertEqual(
4661 msg.get_filename(),
4662 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4663
4664 def test_rfc2231_no_language_or_charset_in_boundary(self):
4665 m = '''\
4666Content-Type: multipart/alternative;
4667\tboundary*0*="''This%20is%20even%20more%20";
4668\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4669\tboundary*2="is it not.pdf"
4670
4671'''
4672 msg = email.message_from_string(m)
4673 self.assertEqual(msg.get_boundary(),
4674 'This is even more ***fun*** is it not.pdf')
4675
4676 def test_rfc2231_no_language_or_charset_in_charset(self):
4677 # This is a nonsensical charset value, but tests the code anyway
4678 m = '''\
4679Content-Type: text/plain;
4680\tcharset*0*="This%20is%20even%20more%20";
4681\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4682\tcharset*2="is it not.pdf"
4683
4684'''
4685 msg = email.message_from_string(m)
4686 self.assertEqual(msg.get_content_charset(),
4687 'this is even more ***fun*** is it not.pdf')
4688
4689 def test_rfc2231_bad_encoding_in_filename(self):
4690 m = '''\
4691Content-Disposition: inline;
4692\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4693\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4694\tfilename*2="is it not.pdf"
4695
4696'''
4697 msg = email.message_from_string(m)
4698 self.assertEqual(msg.get_filename(),
4699 'This is even more ***fun*** is it not.pdf')
4700
4701 def test_rfc2231_bad_encoding_in_charset(self):
4702 m = """\
4703Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4704
4705"""
4706 msg = email.message_from_string(m)
4707 # This should return None because non-ascii characters in the charset
4708 # are not allowed.
4709 self.assertEqual(msg.get_content_charset(), None)
4710
4711 def test_rfc2231_bad_character_in_charset(self):
4712 m = """\
4713Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4714
4715"""
4716 msg = email.message_from_string(m)
4717 # This should return None because non-ascii characters in the charset
4718 # are not allowed.
4719 self.assertEqual(msg.get_content_charset(), None)
4720
4721 def test_rfc2231_bad_character_in_filename(self):
4722 m = '''\
4723Content-Disposition: inline;
4724\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4725\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4726\tfilename*2*="is it not.pdf%E2"
4727
4728'''
4729 msg = email.message_from_string(m)
4730 self.assertEqual(msg.get_filename(),
4731 'This is even more ***fun*** is it not.pdf\ufffd')
4732
4733 def test_rfc2231_unknown_encoding(self):
4734 m = """\
4735Content-Transfer-Encoding: 8bit
4736Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4737
4738"""
4739 msg = email.message_from_string(m)
4740 self.assertEqual(msg.get_filename(), 'myfile.txt')
4741
4742 def test_rfc2231_single_tick_in_filename_extended(self):
4743 eq = self.assertEqual
4744 m = """\
4745Content-Type: application/x-foo;
4746\tname*0*=\"Frank's\"; name*1*=\" Document\"
4747
4748"""
4749 msg = email.message_from_string(m)
4750 charset, language, s = msg.get_param('name')
4751 eq(charset, None)
4752 eq(language, None)
4753 eq(s, "Frank's Document")
4754
4755 def test_rfc2231_single_tick_in_filename(self):
4756 m = """\
4757Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4758
4759"""
4760 msg = email.message_from_string(m)
4761 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004762 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004763 self.assertEqual(param, "Frank's Document")
4764
4765 def test_rfc2231_tick_attack_extended(self):
4766 eq = self.assertEqual
4767 m = """\
4768Content-Type: application/x-foo;
4769\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4770
4771"""
4772 msg = email.message_from_string(m)
4773 charset, language, s = msg.get_param('name')
4774 eq(charset, 'us-ascii')
4775 eq(language, 'en-us')
4776 eq(s, "Frank's Document")
4777
4778 def test_rfc2231_tick_attack(self):
4779 m = """\
4780Content-Type: application/x-foo;
4781\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4782
4783"""
4784 msg = email.message_from_string(m)
4785 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004786 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004787 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4788
4789 def test_rfc2231_no_extended_values(self):
4790 eq = self.assertEqual
4791 m = """\
4792Content-Type: application/x-foo; name=\"Frank's Document\"
4793
4794"""
4795 msg = email.message_from_string(m)
4796 eq(msg.get_param('name'), "Frank's Document")
4797
4798 def test_rfc2231_encoded_then_unencoded_segments(self):
4799 eq = self.assertEqual
4800 m = """\
4801Content-Type: application/x-foo;
4802\tname*0*=\"us-ascii'en-us'My\";
4803\tname*1=\" Document\";
4804\tname*2*=\" For You\"
4805
4806"""
4807 msg = email.message_from_string(m)
4808 charset, language, s = msg.get_param('name')
4809 eq(charset, 'us-ascii')
4810 eq(language, 'en-us')
4811 eq(s, 'My Document For You')
4812
4813 def test_rfc2231_unencoded_then_encoded_segments(self):
4814 eq = self.assertEqual
4815 m = """\
4816Content-Type: application/x-foo;
4817\tname*0=\"us-ascii'en-us'My\";
4818\tname*1*=\" Document\";
4819\tname*2*=\" For You\"
4820
4821"""
4822 msg = email.message_from_string(m)
4823 charset, language, s = msg.get_param('name')
4824 eq(charset, 'us-ascii')
4825 eq(language, 'en-us')
4826 eq(s, 'My Document For You')
4827
4828
Ezio Melottib3aedd42010-11-20 19:04:17 +00004829
R. David Murraya8f480f2010-01-16 18:30:03 +00004830# Tests to ensure that signed parts of an email are completely preserved, as
4831# required by RFC1847 section 2.1. Note that these are incomplete, because the
4832# email package does not currently always preserve the body. See issue 1670765.
4833class TestSigned(TestEmailBase):
4834
4835 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004836 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004837 original = fp.read()
4838 msg = email.message_from_string(original)
4839 return original, msg
4840
4841 def _signed_parts_eq(self, original, result):
4842 # Extract the first mime part of each message
4843 import re
4844 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4845 inpart = repart.search(original).group(2)
4846 outpart = repart.search(result).group(2)
4847 self.assertEqual(outpart, inpart)
4848
4849 def test_long_headers_as_string(self):
4850 original, msg = self._msg_and_obj('msg_45.txt')
4851 result = msg.as_string()
4852 self._signed_parts_eq(original, result)
4853
4854 def test_long_headers_as_string_maxheaderlen(self):
4855 original, msg = self._msg_and_obj('msg_45.txt')
4856 result = msg.as_string(maxheaderlen=60)
4857 self._signed_parts_eq(original, result)
4858
4859 def test_long_headers_flatten(self):
4860 original, msg = self._msg_and_obj('msg_45.txt')
4861 fp = StringIO()
4862 Generator(fp).flatten(msg)
4863 result = fp.getvalue()
4864 self._signed_parts_eq(original, result)
4865
4866
Ezio Melottib3aedd42010-11-20 19:04:17 +00004867
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004868if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004869 unittest.main()