blob: 86aa60c13444964704e26ca2992743bdcfd9a84e [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
R David Murraya2860e82011-04-16 09:20:30 -0400224 def test_get_payload_n_raises_on_non_multipart(self):
225 msg = Message()
226 self.assertRaises(TypeError, msg.get_payload, 1)
227
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
R David Murraya2860e82011-04-16 09:20:30 -0400383 def test_del_param_on_nonexistent_header(self):
384 msg = Message()
385 msg.del_param('filename', 'content-disposition')
386
387 def test_del_nonexistent_param(self):
388 msg = Message()
389 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
390 existing_header = msg['Content-Type']
391 msg.del_param('foobar', header='Content-Type')
392 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
393
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R David Murraya2860e82011-04-16 09:20:30 -0400524 def test_broken_unicode_payload(self):
525 # This test improves coverage but is not a compliance test.
526 # The behavior in this situation is currently undefined by the API.
527 x = 'this is a br\xf6ken thing to do'
528 msg = Message()
529 msg['content-type'] = 'text/plain'
530 msg['content-transfer-encoding'] = '8bit'
531 msg.set_payload(x)
532 self.assertEqual(msg.get_payload(decode=True),
533 bytes(x, 'raw-unicode-escape'))
534
535 def test_questionable_bytes_payload(self):
536 # This test improves coverage but is not a compliance test,
537 # since it involves poking inside the black box.
538 x = 'this is a quéstionable thing to do'.encode('utf-8')
539 msg = Message()
540 msg['content-type'] = 'text/plain; charset="utf-8"'
541 msg['content-transfer-encoding'] = '8bit'
542 msg._payload = x
543 self.assertEqual(msg.get_payload(decode=True), x)
544
R. David Murray7ec754b2010-12-13 23:51:19 +0000545 # Issue 1078919
546 def test_ascii_add_header(self):
547 msg = Message()
548 msg.add_header('Content-Disposition', 'attachment',
549 filename='bud.gif')
550 self.assertEqual('attachment; filename="bud.gif"',
551 msg['Content-Disposition'])
552
553 def test_noascii_add_header(self):
554 msg = Message()
555 msg.add_header('Content-Disposition', 'attachment',
556 filename="Fußballer.ppt")
557 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000558 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000559 msg['Content-Disposition'])
560
561 def test_nonascii_add_header_via_triple(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename=('iso-8859-1', '', 'Fußballer.ppt'))
565 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000566 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
567 msg['Content-Disposition'])
568
569 def test_ascii_add_header_with_tspecial(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="windows [filename].ppt")
573 self.assertEqual(
574 'attachment; filename="windows [filename].ppt"',
575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_with_tspecial(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename="Fußballer [filename].ppt")
581 self.assertEqual(
582 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584
R David Murraya2860e82011-04-16 09:20:30 -0400585 def test_add_header_with_name_only_param(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
588 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
589
590 def test_add_header_with_no_value(self):
591 msg = Message()
592 msg.add_header('X-Status', None)
593 self.assertEqual('', msg['X-Status'])
594
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000595 # Issue 5871: reject an attempt to embed a header inside a header value
596 # (header injection attack).
597 def test_embeded_header_via_Header_rejected(self):
598 msg = Message()
599 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
600 self.assertRaises(errors.HeaderParseError, msg.as_string)
601
602 def test_embeded_header_via_string_rejected(self):
603 msg = Message()
604 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
R David Murray7441a7a2012-03-14 02:59:51 -0400607 def test_unicode_header_defaults_to_utf8_encoding(self):
608 # Issue 14291
609 m = MIMEText('abc\n')
610 m['Subject'] = 'É test'
611 self.assertEqual(str(m),textwrap.dedent("""\
612 Content-Type: text/plain; charset="us-ascii"
613 MIME-Version: 1.0
614 Content-Transfer-Encoding: 7bit
615 Subject: =?utf-8?q?=C3=89_test?=
616
617 abc
618 """))
619
R David Murray8680bcc2012-03-22 22:17:51 -0400620 def test_unicode_body_defaults_to_utf8_encoding(self):
621 # Issue 14291
622 m = MIMEText('É testabc\n')
623 self.assertEqual(str(m),textwrap.dedent("""\
624 MIME-Version: 1.0
625 Content-Type: text/plain; charset="utf-8"
626 Content-Transfer-Encoding: base64
627
628 w4kgdGVzdGFiYwo=
629 """))
630
631
632
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000633# Test the email.encoders module
634class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400635
636 def test_EncodersEncode_base64(self):
637 with openfile('PyBanner048.gif', 'rb') as fp:
638 bindata = fp.read()
639 mimed = email.mime.image.MIMEImage(bindata)
640 base64ed = mimed.get_payload()
641 # the transfer-encoded body lines should all be <=76 characters
642 lines = base64ed.split('\n')
643 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
644
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000645 def test_encode_empty_payload(self):
646 eq = self.assertEqual
647 msg = Message()
648 msg.set_charset('us-ascii')
649 eq(msg['content-transfer-encoding'], '7bit')
650
651 def test_default_cte(self):
652 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000653 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 msg = MIMEText('hello world')
655 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000656 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400658 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 # And now with a different charset
660 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
661 eq(msg['content-transfer-encoding'], 'quoted-printable')
662
R. David Murraye85200d2010-05-06 01:41:14 +0000663 def test_encode7or8bit(self):
664 # Make sure a charset whose input character set is 8bit but
665 # whose output character set is 7bit gets a transfer-encoding
666 # of 7bit.
667 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000668 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000669 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670
Ezio Melottib3aedd42010-11-20 19:04:17 +0000671
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672# Test long header wrapping
673class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400674
675 maxDiff = None
676
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000677 def test_split_long_continuation(self):
678 eq = self.ndiffAssertEqual
679 msg = email.message_from_string("""\
680Subject: bug demonstration
681\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
682\tmore text
683
684test
685""")
686 sfp = StringIO()
687 g = Generator(sfp)
688 g.flatten(msg)
689 eq(sfp.getvalue(), """\
690Subject: bug demonstration
691\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
692\tmore text
693
694test
695""")
696
697 def test_another_long_almost_unsplittable_header(self):
698 eq = self.ndiffAssertEqual
699 hstr = """\
700bug demonstration
701\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
702\tmore text"""
703 h = Header(hstr, continuation_ws='\t')
704 eq(h.encode(), """\
705bug demonstration
706\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
707\tmore text""")
708 h = Header(hstr.replace('\t', ' '))
709 eq(h.encode(), """\
710bug demonstration
711 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
712 more text""")
713
714 def test_long_nonstring(self):
715 eq = self.ndiffAssertEqual
716 g = Charset("iso-8859-1")
717 cz = Charset("iso-8859-2")
718 utf8 = Charset("utf-8")
719 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
720 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
721 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
722 b'bef\xf6rdert. ')
723 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
724 b'd\xf9vtipu.. ')
725 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
726 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
727 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
728 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
729 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
730 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
731 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
732 '\u3044\u307e\u3059\u3002')
733 h = Header(g_head, g, header_name='Subject')
734 h.append(cz_head, cz)
735 h.append(utf8_head, utf8)
736 msg = Message()
737 msg['Subject'] = h
738 sfp = StringIO()
739 g = Generator(sfp)
740 g.flatten(msg)
741 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000742Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
743 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
744 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
745 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
746 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
747 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
748 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
749 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
750 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
751 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
752 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000753
754""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000755 eq(h.encode(maxlinelen=76), """\
756=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
757 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
758 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
759 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
760 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
761 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
762 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
763 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
764 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
765 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
766 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767
768 def test_long_header_encode(self):
769 eq = self.ndiffAssertEqual
770 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
771 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
772 header_name='X-Foobar-Spoink-Defrobnit')
773 eq(h.encode(), '''\
774wasnipoop; giraffes="very-long-necked-animals";
775 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
776
777 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
778 eq = self.ndiffAssertEqual
779 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
780 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
781 header_name='X-Foobar-Spoink-Defrobnit',
782 continuation_ws='\t')
783 eq(h.encode(), '''\
784wasnipoop; giraffes="very-long-necked-animals";
785 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
786
787 def test_long_header_encode_with_tab_continuation(self):
788 eq = self.ndiffAssertEqual
789 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
790 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
791 header_name='X-Foobar-Spoink-Defrobnit',
792 continuation_ws='\t')
793 eq(h.encode(), '''\
794wasnipoop; giraffes="very-long-necked-animals";
795\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
796
R David Murray3a6152f2011-03-14 21:13:03 -0400797 def test_header_encode_with_different_output_charset(self):
798 h = Header('文', 'euc-jp')
799 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
800
801 def test_long_header_encode_with_different_output_charset(self):
802 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
803 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
804 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
805 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
806 res = """\
807=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
808 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
809 self.assertEqual(h.encode(), res)
810
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811 def test_header_splitter(self):
812 eq = self.ndiffAssertEqual
813 msg = MIMEText('')
814 # It'd be great if we could use add_header() here, but that doesn't
815 # guarantee an order of the parameters.
816 msg['X-Foobar-Spoink-Defrobnit'] = (
817 'wasnipoop; giraffes="very-long-necked-animals"; '
818 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
819 sfp = StringIO()
820 g = Generator(sfp)
821 g.flatten(msg)
822 eq(sfp.getvalue(), '''\
823Content-Type: text/plain; charset="us-ascii"
824MIME-Version: 1.0
825Content-Transfer-Encoding: 7bit
826X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
827 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
828
829''')
830
831 def test_no_semis_header_splitter(self):
832 eq = self.ndiffAssertEqual
833 msg = Message()
834 msg['From'] = 'test@dom.ain'
835 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
836 msg.set_payload('Test')
837 sfp = StringIO()
838 g = Generator(sfp)
839 g.flatten(msg)
840 eq(sfp.getvalue(), """\
841From: test@dom.ain
842References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
843 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
844
845Test""")
846
R David Murray7da4db12011-04-07 20:37:17 -0400847 def test_last_split_chunk_does_not_fit(self):
848 eq = self.ndiffAssertEqual
849 h = Header('Subject: the first part of this is short, but_the_second'
850 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
851 '_all_by_itself')
852 eq(h.encode(), """\
853Subject: the first part of this is short,
854 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
855
856 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
857 eq = self.ndiffAssertEqual
858 h = Header(', but_the_second'
859 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
860 '_all_by_itself')
861 eq(h.encode(), """\
862,
863 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
864
865 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
866 eq = self.ndiffAssertEqual
867 h = Header(', , but_the_second'
868 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
869 '_all_by_itself')
870 eq(h.encode(), """\
871, ,
872 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
873
874 def test_trailing_splitable_on_overlong_unsplitable(self):
875 eq = self.ndiffAssertEqual
876 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
877 'be_on_a_line_all_by_itself;')
878 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
879 "be_on_a_line_all_by_itself;")
880
881 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
882 eq = self.ndiffAssertEqual
883 h = Header('; '
884 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400885 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400886 eq(h.encode(), """\
887;
R David Murray01581ee2011-04-18 10:04:34 -0400888 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400889
R David Murraye1292a22011-04-07 20:54:03 -0400890 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400891 eq = self.ndiffAssertEqual
892 h = Header('This is a long line that has two whitespaces in a row. '
893 'This used to cause truncation of the header when folded')
894 eq(h.encode(), """\
895This is a long line that has two whitespaces in a row. This used to cause
896 truncation of the header when folded""")
897
R David Murray01581ee2011-04-18 10:04:34 -0400898 def test_splitter_split_on_punctuation_only_if_fws(self):
899 eq = self.ndiffAssertEqual
900 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
901 'they;arenotlegal;fold,points')
902 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
903 "arenotlegal;fold,points")
904
905 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
906 eq = self.ndiffAssertEqual
907 h = Header('this is a test where we need to have more than one line '
908 'before; our final line that is just too big to fit;; '
909 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
910 'be_on_a_line_all_by_itself;')
911 eq(h.encode(), """\
912this is a test where we need to have more than one line before;
913 our final line that is just too big to fit;;
914 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
915
916 def test_overlong_last_part_followed_by_split_point(self):
917 eq = self.ndiffAssertEqual
918 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
919 'be_on_a_line_all_by_itself ')
920 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
921 "should_be_on_a_line_all_by_itself ")
922
923 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
924 eq = self.ndiffAssertEqual
925 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
926 'before_our_final_line_; ; '
927 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself; ')
929 eq(h.encode(), """\
930this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
931 ;
932 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
933
934 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
935 eq = self.ndiffAssertEqual
936 h = Header('this is a test where we need to have more than one line '
937 'before our final line; ; '
938 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
939 'be_on_a_line_all_by_itself; ')
940 eq(h.encode(), """\
941this is a test where we need to have more than one line before our final line;
942 ;
943 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
944
945 def test_long_header_with_whitespace_runs(self):
946 eq = self.ndiffAssertEqual
947 msg = Message()
948 msg['From'] = 'test@dom.ain'
949 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
950 msg.set_payload('Test')
951 sfp = StringIO()
952 g = Generator(sfp)
953 g.flatten(msg)
954 eq(sfp.getvalue(), """\
955From: test@dom.ain
956References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
957 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
958 <foo@dom.ain> <foo@dom.ain>\x20\x20
959
960Test""")
961
962 def test_long_run_with_semi_header_splitter(self):
963 eq = self.ndiffAssertEqual
964 msg = Message()
965 msg['From'] = 'test@dom.ain'
966 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
967 msg.set_payload('Test')
968 sfp = StringIO()
969 g = Generator(sfp)
970 g.flatten(msg)
971 eq(sfp.getvalue(), """\
972From: test@dom.ain
973References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
974 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
975 <foo@dom.ain>; abc
976
977Test""")
978
979 def test_splitter_split_on_punctuation_only_if_fws(self):
980 eq = self.ndiffAssertEqual
981 msg = Message()
982 msg['From'] = 'test@dom.ain'
983 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
984 'they;arenotlegal;fold,points')
985 msg.set_payload('Test')
986 sfp = StringIO()
987 g = Generator(sfp)
988 g.flatten(msg)
989 # XXX the space after the header should not be there.
990 eq(sfp.getvalue(), """\
991From: test@dom.ain
992References:\x20
993 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
994
995Test""")
996
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000997 def test_no_split_long_header(self):
998 eq = self.ndiffAssertEqual
999 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001000 h = Header(hstr)
1001 # These come on two lines because Headers are really field value
1002 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001003 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001004References:
1005 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1006 h = Header('x' * 80)
1007 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001008
1009 def test_splitting_multiple_long_lines(self):
1010 eq = self.ndiffAssertEqual
1011 hstr = """\
1012from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1014\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1015"""
1016 h = Header(hstr, continuation_ws='\t')
1017 eq(h.encode(), """\
1018from babylon.socal-raves.org (localhost [127.0.0.1]);
1019 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1020 for <mailman-admin@babylon.socal-raves.org>;
1021 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1022\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1030
1031 def test_splitting_first_line_only_is_long(self):
1032 eq = self.ndiffAssertEqual
1033 hstr = """\
1034from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1035\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1036\tid 17k4h5-00034i-00
1037\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1038 h = Header(hstr, maxlinelen=78, header_name='Received',
1039 continuation_ws='\t')
1040 eq(h.encode(), """\
1041from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1042 helo=cthulhu.gerg.ca)
1043\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1044\tid 17k4h5-00034i-00
1045\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1046
1047 def test_long_8bit_header(self):
1048 eq = self.ndiffAssertEqual
1049 msg = Message()
1050 h = Header('Britische Regierung gibt', 'iso-8859-1',
1051 header_name='Subject')
1052 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001053 eq(h.encode(maxlinelen=76), """\
1054=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1055 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(msg.as_string(maxheaderlen=76), """\
1058Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=
1060
1061""")
1062 eq(msg.as_string(maxheaderlen=0), """\
1063Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001064
1065""")
1066
1067 def test_long_8bit_header_no_charset(self):
1068 eq = self.ndiffAssertEqual
1069 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001070 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1071 'f\xfcr Offshore-Windkraftprojekte '
1072 '<a-very-long-address@example.com>')
1073 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001074 eq(msg.as_string(maxheaderlen=78), """\
1075Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1076 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1077
1078""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001079 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001080 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001081 header_name='Reply-To')
1082 eq(msg.as_string(maxheaderlen=78), """\
1083Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1084 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085
1086""")
1087
1088 def test_long_to_header(self):
1089 eq = self.ndiffAssertEqual
1090 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001091 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 '"Someone Test #B" <someone@umich.edu>, '
1093 '"Someone Test #C" <someone@eecs.umich.edu>, '
1094 '"Someone Test #D" <someone@eecs.umich.edu>')
1095 msg = Message()
1096 msg['To'] = to
1097 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001098To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001099 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001100 "Someone Test #C" <someone@eecs.umich.edu>,
1101 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102
1103''')
1104
1105 def test_long_line_after_append(self):
1106 eq = self.ndiffAssertEqual
1107 s = 'This is an example of string which has almost the limit of header length.'
1108 h = Header(s)
1109 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001110 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111This is an example of string which has almost the limit of header length.
1112 Add another line.""")
1113
1114 def test_shorter_line_with_append(self):
1115 eq = self.ndiffAssertEqual
1116 s = 'This is a shorter line.'
1117 h = Header(s)
1118 h.append('Add another sentence. (Surprise?)')
1119 eq(h.encode(),
1120 'This is a shorter line. Add another sentence. (Surprise?)')
1121
1122 def test_long_field_name(self):
1123 eq = self.ndiffAssertEqual
1124 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001125 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1126 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1127 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1128 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129 h = Header(gs, 'iso-8859-1', header_name=fn)
1130 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001131 eq(h.encode(maxlinelen=76), """\
1132=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1133 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1134 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1135 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001136
1137 def test_long_received_header(self):
1138 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1139 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1140 'Wed, 05 Mar 2003 18:10:18 -0700')
1141 msg = Message()
1142 msg['Received-1'] = Header(h, continuation_ws='\t')
1143 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001144 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001146Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1147 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001149Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1150 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001151 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152
1153""")
1154
1155 def test_string_headerinst_eq(self):
1156 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1157 'tu-muenchen.de> (David Bremner\'s message of '
1158 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1159 msg = Message()
1160 msg['Received-1'] = Header(h, header_name='Received-1',
1161 continuation_ws='\t')
1162 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001163 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001164 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001165Received-1:\x20
1166 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1167 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1168Received-2:\x20
1169 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1170 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001171
1172""")
1173
1174 def test_long_unbreakable_lines_with_continuation(self):
1175 eq = self.ndiffAssertEqual
1176 msg = Message()
1177 t = """\
1178iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1179 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1180 msg['Face-1'] = t
1181 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001182 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001183 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001184 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001185 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001186Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001187 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001188 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001189Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001190 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001192Face-3:\x20
1193 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1194 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196""")
1197
1198 def test_another_long_multiline_header(self):
1199 eq = self.ndiffAssertEqual
1200 m = ('Received: from siimage.com '
1201 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001202 'Microsoft SMTPSVC(5.0.2195.4905); '
1203 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001204 msg = email.message_from_string(m)
1205 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001206Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1207 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208
1209''')
1210
1211 def test_long_lines_with_different_header(self):
1212 eq = self.ndiffAssertEqual
1213 h = ('List-Unsubscribe: '
1214 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1215 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1216 '?subject=unsubscribe>')
1217 msg = Message()
1218 msg['List'] = h
1219 msg['List'] = Header(h, header_name='List')
1220 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001221List: List-Unsubscribe:
1222 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001223 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001224List: List-Unsubscribe:
1225 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001226 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227
1228""")
1229
R. David Murray6f0022d2011-01-07 21:57:25 +00001230 def test_long_rfc2047_header_with_embedded_fws(self):
1231 h = Header(textwrap.dedent("""\
1232 We're going to pretend this header is in a non-ascii character set
1233 \tto see if line wrapping with encoded words and embedded
1234 folding white space works"""),
1235 charset='utf-8',
1236 header_name='Test')
1237 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1238 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1239 =?utf-8?q?cter_set?=
1240 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1241 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1242
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001243
Ezio Melottib3aedd42010-11-20 19:04:17 +00001244
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001245# Test mangling of "From " lines in the body of a message
1246class TestFromMangling(unittest.TestCase):
1247 def setUp(self):
1248 self.msg = Message()
1249 self.msg['From'] = 'aaa@bbb.org'
1250 self.msg.set_payload("""\
1251From the desk of A.A.A.:
1252Blah blah blah
1253""")
1254
1255 def test_mangled_from(self):
1256 s = StringIO()
1257 g = Generator(s, mangle_from_=True)
1258 g.flatten(self.msg)
1259 self.assertEqual(s.getvalue(), """\
1260From: aaa@bbb.org
1261
1262>From the desk of A.A.A.:
1263Blah blah blah
1264""")
1265
1266 def test_dont_mangle_from(self):
1267 s = StringIO()
1268 g = Generator(s, mangle_from_=False)
1269 g.flatten(self.msg)
1270 self.assertEqual(s.getvalue(), """\
1271From: aaa@bbb.org
1272
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277
Ezio Melottib3aedd42010-11-20 19:04:17 +00001278
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001279# Test the basic MIMEAudio class
1280class TestMIMEAudio(unittest.TestCase):
1281 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001282 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283 self._audiodata = fp.read()
1284 self._au = MIMEAudio(self._audiodata)
1285
1286 def test_guess_minor_type(self):
1287 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1288
1289 def test_encoding(self):
1290 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001291 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1292 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001293
1294 def test_checkSetMinor(self):
1295 au = MIMEAudio(self._audiodata, 'fish')
1296 self.assertEqual(au.get_content_type(), 'audio/fish')
1297
1298 def test_add_header(self):
1299 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001300 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001301 self._au.add_header('Content-Disposition', 'attachment',
1302 filename='audiotest.au')
1303 eq(self._au['content-disposition'],
1304 'attachment; filename="audiotest.au"')
1305 eq(self._au.get_params(header='content-disposition'),
1306 [('attachment', ''), ('filename', 'audiotest.au')])
1307 eq(self._au.get_param('filename', header='content-disposition'),
1308 'audiotest.au')
1309 missing = []
1310 eq(self._au.get_param('attachment', header='content-disposition'), '')
1311 unless(self._au.get_param('foo', failobj=missing,
1312 header='content-disposition') is missing)
1313 # Try some missing stuff
1314 unless(self._au.get_param('foobar', missing) is missing)
1315 unless(self._au.get_param('attachment', missing,
1316 header='foobar') is missing)
1317
1318
Ezio Melottib3aedd42010-11-20 19:04:17 +00001319
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001320# Test the basic MIMEImage class
1321class TestMIMEImage(unittest.TestCase):
1322 def setUp(self):
1323 with openfile('PyBanner048.gif', 'rb') as fp:
1324 self._imgdata = fp.read()
1325 self._im = MIMEImage(self._imgdata)
1326
1327 def test_guess_minor_type(self):
1328 self.assertEqual(self._im.get_content_type(), 'image/gif')
1329
1330 def test_encoding(self):
1331 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001332 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1333 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001334
1335 def test_checkSetMinor(self):
1336 im = MIMEImage(self._imgdata, 'fish')
1337 self.assertEqual(im.get_content_type(), 'image/fish')
1338
1339 def test_add_header(self):
1340 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001341 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001342 self._im.add_header('Content-Disposition', 'attachment',
1343 filename='dingusfish.gif')
1344 eq(self._im['content-disposition'],
1345 'attachment; filename="dingusfish.gif"')
1346 eq(self._im.get_params(header='content-disposition'),
1347 [('attachment', ''), ('filename', 'dingusfish.gif')])
1348 eq(self._im.get_param('filename', header='content-disposition'),
1349 'dingusfish.gif')
1350 missing = []
1351 eq(self._im.get_param('attachment', header='content-disposition'), '')
1352 unless(self._im.get_param('foo', failobj=missing,
1353 header='content-disposition') is missing)
1354 # Try some missing stuff
1355 unless(self._im.get_param('foobar', missing) is missing)
1356 unless(self._im.get_param('attachment', missing,
1357 header='foobar') is missing)
1358
1359
Ezio Melottib3aedd42010-11-20 19:04:17 +00001360
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001361# Test the basic MIMEApplication class
1362class TestMIMEApplication(unittest.TestCase):
1363 def test_headers(self):
1364 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001365 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366 eq(msg.get_content_type(), 'application/octet-stream')
1367 eq(msg['content-transfer-encoding'], 'base64')
1368
1369 def test_body(self):
1370 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001371 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1372 msg = MIMEApplication(bytesdata)
1373 # whitespace in the cte encoded block is RFC-irrelevant.
1374 eq(msg.get_payload().strip(), '+vv8/f7/')
1375 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001376
1377
Ezio Melottib3aedd42010-11-20 19:04:17 +00001378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001379# Test the basic MIMEText class
1380class TestMIMEText(unittest.TestCase):
1381 def setUp(self):
1382 self._msg = MIMEText('hello there')
1383
1384 def test_types(self):
1385 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001386 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387 eq(self._msg.get_content_type(), 'text/plain')
1388 eq(self._msg.get_param('charset'), 'us-ascii')
1389 missing = []
1390 unless(self._msg.get_param('foobar', missing) is missing)
1391 unless(self._msg.get_param('charset', missing, header='foobar')
1392 is missing)
1393
1394 def test_payload(self):
1395 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001396 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001397
1398 def test_charset(self):
1399 eq = self.assertEqual
1400 msg = MIMEText('hello there', _charset='us-ascii')
1401 eq(msg.get_charset().input_charset, 'us-ascii')
1402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1403
R. David Murray850fc852010-06-03 01:58:28 +00001404 def test_7bit_input(self):
1405 eq = self.assertEqual
1406 msg = MIMEText('hello there', _charset='us-ascii')
1407 eq(msg.get_charset().input_charset, 'us-ascii')
1408 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1409
1410 def test_7bit_input_no_charset(self):
1411 eq = self.assertEqual
1412 msg = MIMEText('hello there')
1413 eq(msg.get_charset(), 'us-ascii')
1414 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1415 self.assertTrue('hello there' in msg.as_string())
1416
1417 def test_utf8_input(self):
1418 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1419 eq = self.assertEqual
1420 msg = MIMEText(teststr, _charset='utf-8')
1421 eq(msg.get_charset().output_charset, 'utf-8')
1422 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1423 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1424
1425 @unittest.skip("can't fix because of backward compat in email5, "
1426 "will fix in email6")
1427 def test_utf8_input_no_charset(self):
1428 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1429 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1430
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431
Ezio Melottib3aedd42010-11-20 19:04:17 +00001432
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001433# Test complicated multipart/* messages
1434class TestMultipart(TestEmailBase):
1435 def setUp(self):
1436 with openfile('PyBanner048.gif', 'rb') as fp:
1437 data = fp.read()
1438 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1439 image = MIMEImage(data, name='dingusfish.gif')
1440 image.add_header('content-disposition', 'attachment',
1441 filename='dingusfish.gif')
1442 intro = MIMEText('''\
1443Hi there,
1444
1445This is the dingus fish.
1446''')
1447 container.attach(intro)
1448 container.attach(image)
1449 container['From'] = 'Barry <barry@digicool.com>'
1450 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1451 container['Subject'] = 'Here is your dingus fish'
1452
1453 now = 987809702.54848599
1454 timetuple = time.localtime(now)
1455 if timetuple[-1] == 0:
1456 tzsecs = time.timezone
1457 else:
1458 tzsecs = time.altzone
1459 if tzsecs > 0:
1460 sign = '-'
1461 else:
1462 sign = '+'
1463 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1464 container['Date'] = time.strftime(
1465 '%a, %d %b %Y %H:%M:%S',
1466 time.localtime(now)) + tzoffset
1467 self._msg = container
1468 self._im = image
1469 self._txt = intro
1470
1471 def test_hierarchy(self):
1472 # convenience
1473 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001474 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001475 raises = self.assertRaises
1476 # tests
1477 m = self._msg
1478 unless(m.is_multipart())
1479 eq(m.get_content_type(), 'multipart/mixed')
1480 eq(len(m.get_payload()), 2)
1481 raises(IndexError, m.get_payload, 2)
1482 m0 = m.get_payload(0)
1483 m1 = m.get_payload(1)
1484 unless(m0 is self._txt)
1485 unless(m1 is self._im)
1486 eq(m.get_payload(), [m0, m1])
1487 unless(not m0.is_multipart())
1488 unless(not m1.is_multipart())
1489
1490 def test_empty_multipart_idempotent(self):
1491 text = """\
1492Content-Type: multipart/mixed; boundary="BOUNDARY"
1493MIME-Version: 1.0
1494Subject: A subject
1495To: aperson@dom.ain
1496From: bperson@dom.ain
1497
1498
1499--BOUNDARY
1500
1501
1502--BOUNDARY--
1503"""
1504 msg = Parser().parsestr(text)
1505 self.ndiffAssertEqual(text, msg.as_string())
1506
1507 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1508 outer = MIMEBase('multipart', 'mixed')
1509 outer['Subject'] = 'A subject'
1510 outer['To'] = 'aperson@dom.ain'
1511 outer['From'] = 'bperson@dom.ain'
1512 outer.set_boundary('BOUNDARY')
1513 self.ndiffAssertEqual(outer.as_string(), '''\
1514Content-Type: multipart/mixed; boundary="BOUNDARY"
1515MIME-Version: 1.0
1516Subject: A subject
1517To: aperson@dom.ain
1518From: bperson@dom.ain
1519
1520--BOUNDARY
1521
1522--BOUNDARY--''')
1523
1524 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1525 outer = MIMEBase('multipart', 'mixed')
1526 outer['Subject'] = 'A subject'
1527 outer['To'] = 'aperson@dom.ain'
1528 outer['From'] = 'bperson@dom.ain'
1529 outer.preamble = ''
1530 outer.epilogue = ''
1531 outer.set_boundary('BOUNDARY')
1532 self.ndiffAssertEqual(outer.as_string(), '''\
1533Content-Type: multipart/mixed; boundary="BOUNDARY"
1534MIME-Version: 1.0
1535Subject: A subject
1536To: aperson@dom.ain
1537From: bperson@dom.ain
1538
1539
1540--BOUNDARY
1541
1542--BOUNDARY--
1543''')
1544
1545 def test_one_part_in_a_multipart(self):
1546 eq = self.ndiffAssertEqual
1547 outer = MIMEBase('multipart', 'mixed')
1548 outer['Subject'] = 'A subject'
1549 outer['To'] = 'aperson@dom.ain'
1550 outer['From'] = 'bperson@dom.ain'
1551 outer.set_boundary('BOUNDARY')
1552 msg = MIMEText('hello world')
1553 outer.attach(msg)
1554 eq(outer.as_string(), '''\
1555Content-Type: multipart/mixed; boundary="BOUNDARY"
1556MIME-Version: 1.0
1557Subject: A subject
1558To: aperson@dom.ain
1559From: bperson@dom.ain
1560
1561--BOUNDARY
1562Content-Type: text/plain; charset="us-ascii"
1563MIME-Version: 1.0
1564Content-Transfer-Encoding: 7bit
1565
1566hello world
1567--BOUNDARY--''')
1568
1569 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1570 eq = self.ndiffAssertEqual
1571 outer = MIMEBase('multipart', 'mixed')
1572 outer['Subject'] = 'A subject'
1573 outer['To'] = 'aperson@dom.ain'
1574 outer['From'] = 'bperson@dom.ain'
1575 outer.preamble = ''
1576 msg = MIMEText('hello world')
1577 outer.attach(msg)
1578 outer.set_boundary('BOUNDARY')
1579 eq(outer.as_string(), '''\
1580Content-Type: multipart/mixed; boundary="BOUNDARY"
1581MIME-Version: 1.0
1582Subject: A subject
1583To: aperson@dom.ain
1584From: bperson@dom.ain
1585
1586
1587--BOUNDARY
1588Content-Type: text/plain; charset="us-ascii"
1589MIME-Version: 1.0
1590Content-Transfer-Encoding: 7bit
1591
1592hello world
1593--BOUNDARY--''')
1594
1595
1596 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1597 eq = self.ndiffAssertEqual
1598 outer = MIMEBase('multipart', 'mixed')
1599 outer['Subject'] = 'A subject'
1600 outer['To'] = 'aperson@dom.ain'
1601 outer['From'] = 'bperson@dom.ain'
1602 outer.preamble = None
1603 msg = MIMEText('hello world')
1604 outer.attach(msg)
1605 outer.set_boundary('BOUNDARY')
1606 eq(outer.as_string(), '''\
1607Content-Type: multipart/mixed; boundary="BOUNDARY"
1608MIME-Version: 1.0
1609Subject: A subject
1610To: aperson@dom.ain
1611From: bperson@dom.ain
1612
1613--BOUNDARY
1614Content-Type: text/plain; charset="us-ascii"
1615MIME-Version: 1.0
1616Content-Transfer-Encoding: 7bit
1617
1618hello world
1619--BOUNDARY--''')
1620
1621
1622 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1623 eq = self.ndiffAssertEqual
1624 outer = MIMEBase('multipart', 'mixed')
1625 outer['Subject'] = 'A subject'
1626 outer['To'] = 'aperson@dom.ain'
1627 outer['From'] = 'bperson@dom.ain'
1628 outer.epilogue = None
1629 msg = MIMEText('hello world')
1630 outer.attach(msg)
1631 outer.set_boundary('BOUNDARY')
1632 eq(outer.as_string(), '''\
1633Content-Type: multipart/mixed; boundary="BOUNDARY"
1634MIME-Version: 1.0
1635Subject: A subject
1636To: aperson@dom.ain
1637From: bperson@dom.ain
1638
1639--BOUNDARY
1640Content-Type: text/plain; charset="us-ascii"
1641MIME-Version: 1.0
1642Content-Transfer-Encoding: 7bit
1643
1644hello world
1645--BOUNDARY--''')
1646
1647
1648 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1649 eq = self.ndiffAssertEqual
1650 outer = MIMEBase('multipart', 'mixed')
1651 outer['Subject'] = 'A subject'
1652 outer['To'] = 'aperson@dom.ain'
1653 outer['From'] = 'bperson@dom.ain'
1654 outer.epilogue = ''
1655 msg = MIMEText('hello world')
1656 outer.attach(msg)
1657 outer.set_boundary('BOUNDARY')
1658 eq(outer.as_string(), '''\
1659Content-Type: multipart/mixed; boundary="BOUNDARY"
1660MIME-Version: 1.0
1661Subject: A subject
1662To: aperson@dom.ain
1663From: bperson@dom.ain
1664
1665--BOUNDARY
1666Content-Type: text/plain; charset="us-ascii"
1667MIME-Version: 1.0
1668Content-Transfer-Encoding: 7bit
1669
1670hello world
1671--BOUNDARY--
1672''')
1673
1674
1675 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1676 eq = self.ndiffAssertEqual
1677 outer = MIMEBase('multipart', 'mixed')
1678 outer['Subject'] = 'A subject'
1679 outer['To'] = 'aperson@dom.ain'
1680 outer['From'] = 'bperson@dom.ain'
1681 outer.epilogue = '\n'
1682 msg = MIMEText('hello world')
1683 outer.attach(msg)
1684 outer.set_boundary('BOUNDARY')
1685 eq(outer.as_string(), '''\
1686Content-Type: multipart/mixed; boundary="BOUNDARY"
1687MIME-Version: 1.0
1688Subject: A subject
1689To: aperson@dom.ain
1690From: bperson@dom.ain
1691
1692--BOUNDARY
1693Content-Type: text/plain; charset="us-ascii"
1694MIME-Version: 1.0
1695Content-Transfer-Encoding: 7bit
1696
1697hello world
1698--BOUNDARY--
1699
1700''')
1701
1702 def test_message_external_body(self):
1703 eq = self.assertEqual
1704 msg = self._msgobj('msg_36.txt')
1705 eq(len(msg.get_payload()), 2)
1706 msg1 = msg.get_payload(1)
1707 eq(msg1.get_content_type(), 'multipart/alternative')
1708 eq(len(msg1.get_payload()), 2)
1709 for subpart in msg1.get_payload():
1710 eq(subpart.get_content_type(), 'message/external-body')
1711 eq(len(subpart.get_payload()), 1)
1712 subsubpart = subpart.get_payload(0)
1713 eq(subsubpart.get_content_type(), 'text/plain')
1714
1715 def test_double_boundary(self):
1716 # msg_37.txt is a multipart that contains two dash-boundary's in a
1717 # row. Our interpretation of RFC 2046 calls for ignoring the second
1718 # and subsequent boundaries.
1719 msg = self._msgobj('msg_37.txt')
1720 self.assertEqual(len(msg.get_payload()), 3)
1721
1722 def test_nested_inner_contains_outer_boundary(self):
1723 eq = self.ndiffAssertEqual
1724 # msg_38.txt has an inner part that contains outer boundaries. My
1725 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1726 # these are illegal and should be interpreted as unterminated inner
1727 # parts.
1728 msg = self._msgobj('msg_38.txt')
1729 sfp = StringIO()
1730 iterators._structure(msg, sfp)
1731 eq(sfp.getvalue(), """\
1732multipart/mixed
1733 multipart/mixed
1734 multipart/alternative
1735 text/plain
1736 text/plain
1737 text/plain
1738 text/plain
1739""")
1740
1741 def test_nested_with_same_boundary(self):
1742 eq = self.ndiffAssertEqual
1743 # msg 39.txt is similarly evil in that it's got inner parts that use
1744 # the same boundary as outer parts. Again, I believe the way this is
1745 # parsed is closest to the spirit of RFC 2046
1746 msg = self._msgobj('msg_39.txt')
1747 sfp = StringIO()
1748 iterators._structure(msg, sfp)
1749 eq(sfp.getvalue(), """\
1750multipart/mixed
1751 multipart/mixed
1752 multipart/alternative
1753 application/octet-stream
1754 application/octet-stream
1755 text/plain
1756""")
1757
1758 def test_boundary_in_non_multipart(self):
1759 msg = self._msgobj('msg_40.txt')
1760 self.assertEqual(msg.as_string(), '''\
1761MIME-Version: 1.0
1762Content-Type: text/html; boundary="--961284236552522269"
1763
1764----961284236552522269
1765Content-Type: text/html;
1766Content-Transfer-Encoding: 7Bit
1767
1768<html></html>
1769
1770----961284236552522269--
1771''')
1772
1773 def test_boundary_with_leading_space(self):
1774 eq = self.assertEqual
1775 msg = email.message_from_string('''\
1776MIME-Version: 1.0
1777Content-Type: multipart/mixed; boundary=" XXXX"
1778
1779-- XXXX
1780Content-Type: text/plain
1781
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786-- XXXX--
1787''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001788 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001789 eq(msg.get_boundary(), ' XXXX')
1790 eq(len(msg.get_payload()), 2)
1791
1792 def test_boundary_without_trailing_newline(self):
1793 m = Parser().parsestr("""\
1794Content-Type: multipart/mixed; boundary="===============0012394164=="
1795MIME-Version: 1.0
1796
1797--===============0012394164==
1798Content-Type: image/file1.jpg
1799MIME-Version: 1.0
1800Content-Transfer-Encoding: base64
1801
1802YXNkZg==
1803--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001804 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001805
1806
Ezio Melottib3aedd42010-11-20 19:04:17 +00001807
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001808# Test some badly formatted messages
R David Murray3edd22a2011-04-18 13:59:37 -04001809class TestNonConformantBase:
1810
1811 def _msgobj(self, filename):
1812 with openfile(filename) as fp:
1813 return email.message_from_file(fp, policy=self.policy)
1814
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 def test_parse_missing_minor_type(self):
1816 eq = self.assertEqual
1817 msg = self._msgobj('msg_14.txt')
1818 eq(msg.get_content_type(), 'text/plain')
1819 eq(msg.get_content_maintype(), 'text')
1820 eq(msg.get_content_subtype(), 'plain')
1821
1822 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001823 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001824 msg = self._msgobj('msg_15.txt')
1825 # XXX We can probably eventually do better
1826 inner = msg.get_payload(0)
1827 unless(hasattr(inner, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001828 self.assertEqual(len(self.get_defects(inner)), 1)
1829 unless(isinstance(self.get_defects(inner)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001830 errors.StartBoundaryNotFoundDefect))
1831
1832 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001833 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001834 msg = self._msgobj('msg_25.txt')
1835 unless(isinstance(msg.get_payload(), str))
R David Murray3edd22a2011-04-18 13:59:37 -04001836 self.assertEqual(len(self.get_defects(msg)), 2)
1837 unless(isinstance(self.get_defects(msg)[0],
1838 errors.NoBoundaryInMultipartDefect))
1839 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001840 errors.MultipartInvariantViolationDefect))
1841
R David Murray749073a2011-06-22 13:47:53 -04001842 multipart_msg = textwrap.dedent("""\
1843 Date: Wed, 14 Nov 2007 12:56:23 GMT
1844 From: foo@bar.invalid
1845 To: foo@bar.invalid
1846 Subject: Content-Transfer-Encoding: base64 and multipart
1847 MIME-Version: 1.0
1848 Content-Type: multipart/mixed;
1849 boundary="===============3344438784458119861=="{}
1850
1851 --===============3344438784458119861==
1852 Content-Type: text/plain
1853
1854 Test message
1855
1856 --===============3344438784458119861==
1857 Content-Type: application/octet-stream
1858 Content-Transfer-Encoding: base64
1859
1860 YWJj
1861
1862 --===============3344438784458119861==--
1863 """)
1864
1865 def test_multipart_invalid_cte(self):
1866 msg = email.message_from_string(
1867 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"),
1868 policy = self.policy)
1869 self.assertEqual(len(self.get_defects(msg)), 1)
1870 self.assertIsInstance(self.get_defects(msg)[0],
1871 errors.InvalidMultipartContentTransferEncodingDefect)
1872
1873 def test_multipart_no_cte_no_defect(self):
1874 msg = email.message_from_string(
1875 self.multipart_msg.format(''),
1876 policy = self.policy)
1877 self.assertEqual(len(self.get_defects(msg)), 0)
1878
1879 def test_multipart_valid_cte_no_defect(self):
1880 for cte in ('7bit', '8bit', 'BINary'):
1881 msg = email.message_from_string(
1882 self.multipart_msg.format(
1883 "\nContent-Transfer-Encoding: {}".format(cte)),
1884 policy = self.policy)
1885 self.assertEqual(len(self.get_defects(msg)), 0)
1886
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001887 def test_invalid_content_type(self):
1888 eq = self.assertEqual
1889 neq = self.ndiffAssertEqual
1890 msg = Message()
1891 # RFC 2045, $5.2 says invalid yields text/plain
1892 msg['Content-Type'] = 'text'
1893 eq(msg.get_content_maintype(), 'text')
1894 eq(msg.get_content_subtype(), 'plain')
1895 eq(msg.get_content_type(), 'text/plain')
1896 # Clear the old value and try something /really/ invalid
1897 del msg['content-type']
1898 msg['Content-Type'] = 'foo'
1899 eq(msg.get_content_maintype(), 'text')
1900 eq(msg.get_content_subtype(), 'plain')
1901 eq(msg.get_content_type(), 'text/plain')
1902 # Still, make sure that the message is idempotently generated
1903 s = StringIO()
1904 g = Generator(s)
1905 g.flatten(msg)
1906 neq(s.getvalue(), 'Content-Type: foo\n\n')
1907
1908 def test_no_start_boundary(self):
1909 eq = self.ndiffAssertEqual
1910 msg = self._msgobj('msg_31.txt')
1911 eq(msg.get_payload(), """\
1912--BOUNDARY
1913Content-Type: text/plain
1914
1915message 1
1916
1917--BOUNDARY
1918Content-Type: text/plain
1919
1920message 2
1921
1922--BOUNDARY--
1923""")
1924
1925 def test_no_separating_blank_line(self):
1926 eq = self.ndiffAssertEqual
1927 msg = self._msgobj('msg_35.txt')
1928 eq(msg.as_string(), """\
1929From: aperson@dom.ain
1930To: bperson@dom.ain
1931Subject: here's something interesting
1932
1933counter to RFC 2822, there's no separating newline here
1934""")
1935
1936 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001937 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001938 msg = self._msgobj('msg_41.txt')
1939 unless(hasattr(msg, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001940 self.assertEqual(len(self.get_defects(msg)), 2)
1941 unless(isinstance(self.get_defects(msg)[0],
1942 errors.NoBoundaryInMultipartDefect))
1943 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001944 errors.MultipartInvariantViolationDefect))
1945
1946 def test_missing_start_boundary(self):
1947 outer = self._msgobj('msg_42.txt')
1948 # The message structure is:
1949 #
1950 # multipart/mixed
1951 # text/plain
1952 # message/rfc822
1953 # multipart/mixed [*]
1954 #
1955 # [*] This message is missing its start boundary
1956 bad = outer.get_payload(1).get_payload(0)
R David Murray3edd22a2011-04-18 13:59:37 -04001957 self.assertEqual(len(self.get_defects(bad)), 1)
1958 self.assertTrue(isinstance(self.get_defects(bad)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001959 errors.StartBoundaryNotFoundDefect))
1960
1961 def test_first_line_is_continuation_header(self):
1962 eq = self.assertEqual
1963 m = ' Line 1\nLine 2\nLine 3'
R David Murray3edd22a2011-04-18 13:59:37 -04001964 msg = email.message_from_string(m, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965 eq(msg.keys(), [])
1966 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murray3edd22a2011-04-18 13:59:37 -04001967 eq(len(self.get_defects(msg)), 1)
1968 self.assertTrue(isinstance(self.get_defects(msg)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001969 errors.FirstHeaderLineIsContinuationDefect))
R David Murray3edd22a2011-04-18 13:59:37 -04001970 eq(self.get_defects(msg)[0].line, ' Line 1\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001971
1972
R David Murray3edd22a2011-04-18 13:59:37 -04001973class TestNonConformant(TestNonConformantBase, TestEmailBase):
1974
1975 policy=email.policy.default
1976
1977 def get_defects(self, obj):
1978 return obj.defects
1979
1980
1981class TestNonConformantCapture(TestNonConformantBase, TestEmailBase):
1982
1983 class CapturePolicy(email.policy.Policy):
1984 captured = None
1985 def register_defect(self, obj, defect):
1986 self.captured.append(defect)
1987
1988 def setUp(self):
1989 self.policy = self.CapturePolicy(captured=list())
1990
1991 def get_defects(self, obj):
1992 return self.policy.captured
1993
1994
1995class TestRaisingDefects(TestEmailBase):
1996
1997 def _msgobj(self, filename):
1998 with openfile(filename) as fp:
1999 return email.message_from_file(fp, policy=email.policy.strict)
2000
2001 def test_same_boundary_inner_outer(self):
2002 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2003 self._msgobj('msg_15.txt')
2004
2005 def test_multipart_no_boundary(self):
2006 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
2007 self._msgobj('msg_25.txt')
2008
2009 def test_lying_multipart(self):
2010 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
2011 self._msgobj('msg_41.txt')
2012
2013
2014 def test_missing_start_boundary(self):
2015 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
2016 self._msgobj('msg_42.txt')
2017
2018 def test_first_line_is_continuation_header(self):
2019 m = ' Line 1\nLine 2\nLine 3'
2020 with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
2021 msg = email.message_from_string(m, policy=email.policy.strict)
2022
Ezio Melottib3aedd42010-11-20 19:04:17 +00002023
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002024# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002025class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002026 def test_rfc2047_multiline(self):
2027 eq = self.assertEqual
2028 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2029 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2030 dh = decode_header(s)
2031 eq(dh, [
2032 (b'Re:', None),
2033 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2034 (b'baz foo bar', None),
2035 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2036 header = make_header(dh)
2037 eq(str(header),
2038 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002039 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002040Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2041 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042
2043 def test_whitespace_eater_unicode(self):
2044 eq = self.assertEqual
2045 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2046 dh = decode_header(s)
2047 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2048 (b'Pirard <pirard@dom.ain>', None)])
2049 header = str(make_header(dh))
2050 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2051
2052 def test_whitespace_eater_unicode_2(self):
2053 eq = self.assertEqual
2054 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2055 dh = decode_header(s)
2056 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2057 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2058 hu = str(make_header(dh))
2059 eq(hu, 'The quick brown fox jumped over the lazy dog')
2060
2061 def test_rfc2047_missing_whitespace(self):
2062 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2063 dh = decode_header(s)
2064 self.assertEqual(dh, [(s, None)])
2065
2066 def test_rfc2047_with_whitespace(self):
2067 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2068 dh = decode_header(s)
2069 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2070 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2071 (b'sbord', None)])
2072
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002073 def test_rfc2047_B_bad_padding(self):
2074 s = '=?iso-8859-1?B?%s?='
2075 data = [ # only test complete bytes
2076 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2077 ('dmk=', b'vi'), ('dmk', b'vi')
2078 ]
2079 for q, a in data:
2080 dh = decode_header(s % q)
2081 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002082
R. David Murray31e984c2010-10-01 15:40:20 +00002083 def test_rfc2047_Q_invalid_digits(self):
2084 # issue 10004.
2085 s = '=?iso-8659-1?Q?andr=e9=zz?='
2086 self.assertEqual(decode_header(s),
2087 [(b'andr\xe9=zz', 'iso-8659-1')])
2088
Ezio Melottib3aedd42010-11-20 19:04:17 +00002089
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002090# Test the MIMEMessage class
2091class TestMIMEMessage(TestEmailBase):
2092 def setUp(self):
2093 with openfile('msg_11.txt') as fp:
2094 self._text = fp.read()
2095
2096 def test_type_error(self):
2097 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2098
2099 def test_valid_argument(self):
2100 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002101 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002102 subject = 'A sub-message'
2103 m = Message()
2104 m['Subject'] = subject
2105 r = MIMEMessage(m)
2106 eq(r.get_content_type(), 'message/rfc822')
2107 payload = r.get_payload()
2108 unless(isinstance(payload, list))
2109 eq(len(payload), 1)
2110 subpart = payload[0]
2111 unless(subpart is m)
2112 eq(subpart['subject'], subject)
2113
2114 def test_bad_multipart(self):
2115 eq = self.assertEqual
2116 msg1 = Message()
2117 msg1['Subject'] = 'subpart 1'
2118 msg2 = Message()
2119 msg2['Subject'] = 'subpart 2'
2120 r = MIMEMessage(msg1)
2121 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2122
2123 def test_generate(self):
2124 # First craft the message to be encapsulated
2125 m = Message()
2126 m['Subject'] = 'An enclosed message'
2127 m.set_payload('Here is the body of the message.\n')
2128 r = MIMEMessage(m)
2129 r['Subject'] = 'The enclosing message'
2130 s = StringIO()
2131 g = Generator(s)
2132 g.flatten(r)
2133 self.assertEqual(s.getvalue(), """\
2134Content-Type: message/rfc822
2135MIME-Version: 1.0
2136Subject: The enclosing message
2137
2138Subject: An enclosed message
2139
2140Here is the body of the message.
2141""")
2142
2143 def test_parse_message_rfc822(self):
2144 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002145 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002146 msg = self._msgobj('msg_11.txt')
2147 eq(msg.get_content_type(), 'message/rfc822')
2148 payload = msg.get_payload()
2149 unless(isinstance(payload, list))
2150 eq(len(payload), 1)
2151 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002152 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002153 eq(submsg['subject'], 'An enclosed message')
2154 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2155
2156 def test_dsn(self):
2157 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002158 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002159 # msg 16 is a Delivery Status Notification, see RFC 1894
2160 msg = self._msgobj('msg_16.txt')
2161 eq(msg.get_content_type(), 'multipart/report')
2162 unless(msg.is_multipart())
2163 eq(len(msg.get_payload()), 3)
2164 # Subpart 1 is a text/plain, human readable section
2165 subpart = msg.get_payload(0)
2166 eq(subpart.get_content_type(), 'text/plain')
2167 eq(subpart.get_payload(), """\
2168This report relates to a message you sent with the following header fields:
2169
2170 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2171 Date: Sun, 23 Sep 2001 20:10:55 -0700
2172 From: "Ian T. Henry" <henryi@oxy.edu>
2173 To: SoCal Raves <scr@socal-raves.org>
2174 Subject: [scr] yeah for Ians!!
2175
2176Your message cannot be delivered to the following recipients:
2177
2178 Recipient address: jangel1@cougar.noc.ucla.edu
2179 Reason: recipient reached disk quota
2180
2181""")
2182 # Subpart 2 contains the machine parsable DSN information. It
2183 # consists of two blocks of headers, represented by two nested Message
2184 # objects.
2185 subpart = msg.get_payload(1)
2186 eq(subpart.get_content_type(), 'message/delivery-status')
2187 eq(len(subpart.get_payload()), 2)
2188 # message/delivery-status should treat each block as a bunch of
2189 # headers, i.e. a bunch of Message objects.
2190 dsn1 = subpart.get_payload(0)
2191 unless(isinstance(dsn1, Message))
2192 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2193 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2194 # Try a missing one <wink>
2195 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2196 dsn2 = subpart.get_payload(1)
2197 unless(isinstance(dsn2, Message))
2198 eq(dsn2['action'], 'failed')
2199 eq(dsn2.get_params(header='original-recipient'),
2200 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2201 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2202 # Subpart 3 is the original message
2203 subpart = msg.get_payload(2)
2204 eq(subpart.get_content_type(), 'message/rfc822')
2205 payload = subpart.get_payload()
2206 unless(isinstance(payload, list))
2207 eq(len(payload), 1)
2208 subsubpart = payload[0]
2209 unless(isinstance(subsubpart, Message))
2210 eq(subsubpart.get_content_type(), 'text/plain')
2211 eq(subsubpart['message-id'],
2212 '<002001c144a6$8752e060$56104586@oxy.edu>')
2213
2214 def test_epilogue(self):
2215 eq = self.ndiffAssertEqual
2216 with openfile('msg_21.txt') as fp:
2217 text = fp.read()
2218 msg = Message()
2219 msg['From'] = 'aperson@dom.ain'
2220 msg['To'] = 'bperson@dom.ain'
2221 msg['Subject'] = 'Test'
2222 msg.preamble = 'MIME message'
2223 msg.epilogue = 'End of MIME message\n'
2224 msg1 = MIMEText('One')
2225 msg2 = MIMEText('Two')
2226 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2227 msg.attach(msg1)
2228 msg.attach(msg2)
2229 sfp = StringIO()
2230 g = Generator(sfp)
2231 g.flatten(msg)
2232 eq(sfp.getvalue(), text)
2233
2234 def test_no_nl_preamble(self):
2235 eq = self.ndiffAssertEqual
2236 msg = Message()
2237 msg['From'] = 'aperson@dom.ain'
2238 msg['To'] = 'bperson@dom.ain'
2239 msg['Subject'] = 'Test'
2240 msg.preamble = 'MIME message'
2241 msg.epilogue = ''
2242 msg1 = MIMEText('One')
2243 msg2 = MIMEText('Two')
2244 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2245 msg.attach(msg1)
2246 msg.attach(msg2)
2247 eq(msg.as_string(), """\
2248From: aperson@dom.ain
2249To: bperson@dom.ain
2250Subject: Test
2251Content-Type: multipart/mixed; boundary="BOUNDARY"
2252
2253MIME message
2254--BOUNDARY
2255Content-Type: text/plain; charset="us-ascii"
2256MIME-Version: 1.0
2257Content-Transfer-Encoding: 7bit
2258
2259One
2260--BOUNDARY
2261Content-Type: text/plain; charset="us-ascii"
2262MIME-Version: 1.0
2263Content-Transfer-Encoding: 7bit
2264
2265Two
2266--BOUNDARY--
2267""")
2268
2269 def test_default_type(self):
2270 eq = self.assertEqual
2271 with openfile('msg_30.txt') as fp:
2272 msg = email.message_from_file(fp)
2273 container1 = msg.get_payload(0)
2274 eq(container1.get_default_type(), 'message/rfc822')
2275 eq(container1.get_content_type(), 'message/rfc822')
2276 container2 = msg.get_payload(1)
2277 eq(container2.get_default_type(), 'message/rfc822')
2278 eq(container2.get_content_type(), 'message/rfc822')
2279 container1a = container1.get_payload(0)
2280 eq(container1a.get_default_type(), 'text/plain')
2281 eq(container1a.get_content_type(), 'text/plain')
2282 container2a = container2.get_payload(0)
2283 eq(container2a.get_default_type(), 'text/plain')
2284 eq(container2a.get_content_type(), 'text/plain')
2285
2286 def test_default_type_with_explicit_container_type(self):
2287 eq = self.assertEqual
2288 with openfile('msg_28.txt') as fp:
2289 msg = email.message_from_file(fp)
2290 container1 = msg.get_payload(0)
2291 eq(container1.get_default_type(), 'message/rfc822')
2292 eq(container1.get_content_type(), 'message/rfc822')
2293 container2 = msg.get_payload(1)
2294 eq(container2.get_default_type(), 'message/rfc822')
2295 eq(container2.get_content_type(), 'message/rfc822')
2296 container1a = container1.get_payload(0)
2297 eq(container1a.get_default_type(), 'text/plain')
2298 eq(container1a.get_content_type(), 'text/plain')
2299 container2a = container2.get_payload(0)
2300 eq(container2a.get_default_type(), 'text/plain')
2301 eq(container2a.get_content_type(), 'text/plain')
2302
2303 def test_default_type_non_parsed(self):
2304 eq = self.assertEqual
2305 neq = self.ndiffAssertEqual
2306 # Set up container
2307 container = MIMEMultipart('digest', 'BOUNDARY')
2308 container.epilogue = ''
2309 # Set up subparts
2310 subpart1a = MIMEText('message 1\n')
2311 subpart2a = MIMEText('message 2\n')
2312 subpart1 = MIMEMessage(subpart1a)
2313 subpart2 = MIMEMessage(subpart2a)
2314 container.attach(subpart1)
2315 container.attach(subpart2)
2316 eq(subpart1.get_content_type(), 'message/rfc822')
2317 eq(subpart1.get_default_type(), 'message/rfc822')
2318 eq(subpart2.get_content_type(), 'message/rfc822')
2319 eq(subpart2.get_default_type(), 'message/rfc822')
2320 neq(container.as_string(0), '''\
2321Content-Type: multipart/digest; boundary="BOUNDARY"
2322MIME-Version: 1.0
2323
2324--BOUNDARY
2325Content-Type: message/rfc822
2326MIME-Version: 1.0
2327
2328Content-Type: text/plain; charset="us-ascii"
2329MIME-Version: 1.0
2330Content-Transfer-Encoding: 7bit
2331
2332message 1
2333
2334--BOUNDARY
2335Content-Type: message/rfc822
2336MIME-Version: 1.0
2337
2338Content-Type: text/plain; charset="us-ascii"
2339MIME-Version: 1.0
2340Content-Transfer-Encoding: 7bit
2341
2342message 2
2343
2344--BOUNDARY--
2345''')
2346 del subpart1['content-type']
2347 del subpart1['mime-version']
2348 del subpart2['content-type']
2349 del subpart2['mime-version']
2350 eq(subpart1.get_content_type(), 'message/rfc822')
2351 eq(subpart1.get_default_type(), 'message/rfc822')
2352 eq(subpart2.get_content_type(), 'message/rfc822')
2353 eq(subpart2.get_default_type(), 'message/rfc822')
2354 neq(container.as_string(0), '''\
2355Content-Type: multipart/digest; boundary="BOUNDARY"
2356MIME-Version: 1.0
2357
2358--BOUNDARY
2359
2360Content-Type: text/plain; charset="us-ascii"
2361MIME-Version: 1.0
2362Content-Transfer-Encoding: 7bit
2363
2364message 1
2365
2366--BOUNDARY
2367
2368Content-Type: text/plain; charset="us-ascii"
2369MIME-Version: 1.0
2370Content-Transfer-Encoding: 7bit
2371
2372message 2
2373
2374--BOUNDARY--
2375''')
2376
2377 def test_mime_attachments_in_constructor(self):
2378 eq = self.assertEqual
2379 text1 = MIMEText('')
2380 text2 = MIMEText('')
2381 msg = MIMEMultipart(_subparts=(text1, text2))
2382 eq(len(msg.get_payload()), 2)
2383 eq(msg.get_payload(0), text1)
2384 eq(msg.get_payload(1), text2)
2385
Christian Heimes587c2bf2008-01-19 16:21:02 +00002386 def test_default_multipart_constructor(self):
2387 msg = MIMEMultipart()
2388 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002389
Ezio Melottib3aedd42010-11-20 19:04:17 +00002390
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002391# A general test of parser->model->generator idempotency. IOW, read a message
2392# in, parse it into a message object tree, then without touching the tree,
2393# regenerate the plain text. The original text and the transformed text
2394# should be identical. Note: that we ignore the Unix-From since that may
2395# contain a changed date.
2396class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002397
2398 linesep = '\n'
2399
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002400 def _msgobj(self, filename):
2401 with openfile(filename) as fp:
2402 data = fp.read()
2403 msg = email.message_from_string(data)
2404 return msg, data
2405
R. David Murray719a4492010-11-21 16:53:48 +00002406 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002407 eq = self.ndiffAssertEqual
2408 s = StringIO()
2409 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002410 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002411 eq(text, s.getvalue())
2412
2413 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002414 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002415 msg, text = self._msgobj('msg_01.txt')
2416 eq(msg.get_content_type(), 'text/plain')
2417 eq(msg.get_content_maintype(), 'text')
2418 eq(msg.get_content_subtype(), 'plain')
2419 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2420 eq(msg.get_param('charset'), 'us-ascii')
2421 eq(msg.preamble, None)
2422 eq(msg.epilogue, None)
2423 self._idempotent(msg, text)
2424
2425 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002426 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002427 msg, text = self._msgobj('msg_03.txt')
2428 eq(msg.get_content_type(), 'text/plain')
2429 eq(msg.get_params(), None)
2430 eq(msg.get_param('charset'), None)
2431 self._idempotent(msg, text)
2432
2433 def test_simple_multipart(self):
2434 msg, text = self._msgobj('msg_04.txt')
2435 self._idempotent(msg, text)
2436
2437 def test_MIME_digest(self):
2438 msg, text = self._msgobj('msg_02.txt')
2439 self._idempotent(msg, text)
2440
2441 def test_long_header(self):
2442 msg, text = self._msgobj('msg_27.txt')
2443 self._idempotent(msg, text)
2444
2445 def test_MIME_digest_with_part_headers(self):
2446 msg, text = self._msgobj('msg_28.txt')
2447 self._idempotent(msg, text)
2448
2449 def test_mixed_with_image(self):
2450 msg, text = self._msgobj('msg_06.txt')
2451 self._idempotent(msg, text)
2452
2453 def test_multipart_report(self):
2454 msg, text = self._msgobj('msg_05.txt')
2455 self._idempotent(msg, text)
2456
2457 def test_dsn(self):
2458 msg, text = self._msgobj('msg_16.txt')
2459 self._idempotent(msg, text)
2460
2461 def test_preamble_epilogue(self):
2462 msg, text = self._msgobj('msg_21.txt')
2463 self._idempotent(msg, text)
2464
2465 def test_multipart_one_part(self):
2466 msg, text = self._msgobj('msg_23.txt')
2467 self._idempotent(msg, text)
2468
2469 def test_multipart_no_parts(self):
2470 msg, text = self._msgobj('msg_24.txt')
2471 self._idempotent(msg, text)
2472
2473 def test_no_start_boundary(self):
2474 msg, text = self._msgobj('msg_31.txt')
2475 self._idempotent(msg, text)
2476
2477 def test_rfc2231_charset(self):
2478 msg, text = self._msgobj('msg_32.txt')
2479 self._idempotent(msg, text)
2480
2481 def test_more_rfc2231_parameters(self):
2482 msg, text = self._msgobj('msg_33.txt')
2483 self._idempotent(msg, text)
2484
2485 def test_text_plain_in_a_multipart_digest(self):
2486 msg, text = self._msgobj('msg_34.txt')
2487 self._idempotent(msg, text)
2488
2489 def test_nested_multipart_mixeds(self):
2490 msg, text = self._msgobj('msg_12a.txt')
2491 self._idempotent(msg, text)
2492
2493 def test_message_external_body_idempotent(self):
2494 msg, text = self._msgobj('msg_36.txt')
2495 self._idempotent(msg, text)
2496
R. David Murray719a4492010-11-21 16:53:48 +00002497 def test_message_delivery_status(self):
2498 msg, text = self._msgobj('msg_43.txt')
2499 self._idempotent(msg, text, unixfrom=True)
2500
R. David Murray96fd54e2010-10-08 15:55:28 +00002501 def test_message_signed_idempotent(self):
2502 msg, text = self._msgobj('msg_45.txt')
2503 self._idempotent(msg, text)
2504
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002505 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002506 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002507 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002508 # Get a message object and reset the seek pointer for other tests
2509 msg, text = self._msgobj('msg_05.txt')
2510 eq(msg.get_content_type(), 'multipart/report')
2511 # Test the Content-Type: parameters
2512 params = {}
2513 for pk, pv in msg.get_params():
2514 params[pk] = pv
2515 eq(params['report-type'], 'delivery-status')
2516 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002517 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2518 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002519 eq(len(msg.get_payload()), 3)
2520 # Make sure the subparts are what we expect
2521 msg1 = msg.get_payload(0)
2522 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002523 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002524 msg2 = msg.get_payload(1)
2525 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002526 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002527 msg3 = msg.get_payload(2)
2528 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002529 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002530 payload = msg3.get_payload()
2531 unless(isinstance(payload, list))
2532 eq(len(payload), 1)
2533 msg4 = payload[0]
2534 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002535 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002536
2537 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002538 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002539 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002540 msg, text = self._msgobj('msg_06.txt')
2541 # Check some of the outer headers
2542 eq(msg.get_content_type(), 'message/rfc822')
2543 # Make sure the payload is a list of exactly one sub-Message, and that
2544 # that submessage has a type of text/plain
2545 payload = msg.get_payload()
2546 unless(isinstance(payload, list))
2547 eq(len(payload), 1)
2548 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002549 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002550 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002551 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002552 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002553
2554
Ezio Melottib3aedd42010-11-20 19:04:17 +00002555
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002556# Test various other bits of the package's functionality
2557class TestMiscellaneous(TestEmailBase):
2558 def test_message_from_string(self):
2559 with openfile('msg_01.txt') as fp:
2560 text = fp.read()
2561 msg = email.message_from_string(text)
2562 s = StringIO()
2563 # Don't wrap/continue long headers since we're trying to test
2564 # idempotency.
2565 g = Generator(s, maxheaderlen=0)
2566 g.flatten(msg)
2567 self.assertEqual(text, s.getvalue())
2568
2569 def test_message_from_file(self):
2570 with openfile('msg_01.txt') as fp:
2571 text = fp.read()
2572 fp.seek(0)
2573 msg = email.message_from_file(fp)
2574 s = StringIO()
2575 # Don't wrap/continue long headers since we're trying to test
2576 # idempotency.
2577 g = Generator(s, maxheaderlen=0)
2578 g.flatten(msg)
2579 self.assertEqual(text, s.getvalue())
2580
2581 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002582 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002583 with openfile('msg_01.txt') as fp:
2584 text = fp.read()
2585
2586 # Create a subclass
2587 class MyMessage(Message):
2588 pass
2589
2590 msg = email.message_from_string(text, MyMessage)
2591 unless(isinstance(msg, MyMessage))
2592 # Try something more complicated
2593 with openfile('msg_02.txt') as fp:
2594 text = fp.read()
2595 msg = email.message_from_string(text, MyMessage)
2596 for subpart in msg.walk():
2597 unless(isinstance(subpart, MyMessage))
2598
2599 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002600 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002601 # Create a subclass
2602 class MyMessage(Message):
2603 pass
2604
2605 with openfile('msg_01.txt') as fp:
2606 msg = email.message_from_file(fp, MyMessage)
2607 unless(isinstance(msg, MyMessage))
2608 # Try something more complicated
2609 with openfile('msg_02.txt') as fp:
2610 msg = email.message_from_file(fp, MyMessage)
2611 for subpart in msg.walk():
2612 unless(isinstance(subpart, MyMessage))
2613
2614 def test__all__(self):
2615 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002616 self.assertEqual(sorted(module.__all__), [
2617 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2618 'generator', 'header', 'iterators', 'message',
2619 'message_from_binary_file', 'message_from_bytes',
2620 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002621 'quoprimime', 'utils',
2622 ])
2623
2624 def test_formatdate(self):
2625 now = time.time()
2626 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2627 time.gmtime(now)[:6])
2628
2629 def test_formatdate_localtime(self):
2630 now = time.time()
2631 self.assertEqual(
2632 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2633 time.localtime(now)[:6])
2634
2635 def test_formatdate_usegmt(self):
2636 now = time.time()
2637 self.assertEqual(
2638 utils.formatdate(now, localtime=False),
2639 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2640 self.assertEqual(
2641 utils.formatdate(now, localtime=False, usegmt=True),
2642 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2643
2644 def test_parsedate_none(self):
2645 self.assertEqual(utils.parsedate(''), None)
2646
2647 def test_parsedate_compact(self):
2648 # The FWS after the comma is optional
2649 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2650 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2651
2652 def test_parsedate_no_dayofweek(self):
2653 eq = self.assertEqual
2654 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2655 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2656
2657 def test_parsedate_compact_no_dayofweek(self):
2658 eq = self.assertEqual
2659 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2660 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2661
R. David Murray4a62e892010-12-23 20:35:46 +00002662 def test_parsedate_no_space_before_positive_offset(self):
2663 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2664 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2665
2666 def test_parsedate_no_space_before_negative_offset(self):
2667 # Issue 1155362: we already handled '+' for this case.
2668 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2669 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2670
2671
R David Murrayaccd1c02011-03-13 20:06:23 -04002672 def test_parsedate_accepts_time_with_dots(self):
2673 eq = self.assertEqual
2674 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2675 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2676 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2677 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2678
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002679 def test_parsedate_acceptable_to_time_functions(self):
2680 eq = self.assertEqual
2681 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2682 t = int(time.mktime(timetup))
2683 eq(time.localtime(t)[:6], timetup[:6])
2684 eq(int(time.strftime('%Y', timetup)), 2003)
2685 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2686 t = int(time.mktime(timetup[:9]))
2687 eq(time.localtime(t)[:6], timetup[:6])
2688 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2689
R. David Murray219d1c82010-08-25 00:45:55 +00002690 def test_parsedate_y2k(self):
2691 """Test for parsing a date with a two-digit year.
2692
2693 Parsing a date with a two-digit year should return the correct
2694 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2695 obsoletes RFC822) requires four-digit years.
2696
2697 """
2698 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2699 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2700 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2701 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2702
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002703 def test_parseaddr_empty(self):
2704 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2705 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2706
2707 def test_noquote_dump(self):
2708 self.assertEqual(
2709 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2710 'A Silly Person <person@dom.ain>')
2711
2712 def test_escape_dump(self):
2713 self.assertEqual(
2714 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002715 r'"A (Very) Silly Person" <person@dom.ain>')
2716 self.assertEqual(
2717 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2718 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002719 a = r'A \(Special\) Person'
2720 b = 'person@dom.ain'
2721 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2722
2723 def test_escape_backslashes(self):
2724 self.assertEqual(
2725 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2726 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2727 a = r'Arthur \Backslash\ Foobar'
2728 b = 'person@dom.ain'
2729 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2730
R David Murray8debacb2011-04-06 09:35:57 -04002731 def test_quotes_unicode_names(self):
2732 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2733 name = "H\u00e4ns W\u00fcrst"
2734 addr = 'person@dom.ain'
2735 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2736 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2737 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2738 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2739 latin1_quopri)
2740
2741 def test_accepts_any_charset_like_object(self):
2742 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2743 name = "H\u00e4ns W\u00fcrst"
2744 addr = 'person@dom.ain'
2745 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2746 foobar = "FOOBAR"
2747 class CharsetMock:
2748 def header_encode(self, string):
2749 return foobar
2750 mock = CharsetMock()
2751 mock_expected = "%s <%s>" % (foobar, addr)
2752 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2753 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2754 utf8_base64)
2755
2756 def test_invalid_charset_like_object_raises_error(self):
2757 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2758 name = "H\u00e4ns W\u00fcrst"
2759 addr = 'person@dom.ain'
2760 # A object without a header_encode method:
2761 bad_charset = object()
2762 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2763 bad_charset)
2764
2765 def test_unicode_address_raises_error(self):
2766 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2767 addr = 'pers\u00f6n@dom.in'
2768 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2769 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2770
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002771 def test_name_with_dot(self):
2772 x = 'John X. Doe <jxd@example.com>'
2773 y = '"John X. Doe" <jxd@example.com>'
2774 a, b = ('John X. Doe', 'jxd@example.com')
2775 self.assertEqual(utils.parseaddr(x), (a, b))
2776 self.assertEqual(utils.parseaddr(y), (a, b))
2777 # formataddr() quotes the name if there's a dot in it
2778 self.assertEqual(utils.formataddr((a, b)), y)
2779
R. David Murray5397e862010-10-02 15:58:26 +00002780 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2781 # issue 10005. Note that in the third test the second pair of
2782 # backslashes is not actually a quoted pair because it is not inside a
2783 # comment or quoted string: the address being parsed has a quoted
2784 # string containing a quoted backslash, followed by 'example' and two
2785 # backslashes, followed by another quoted string containing a space and
2786 # the word 'example'. parseaddr copies those two backslashes
2787 # literally. Per rfc5322 this is not technically correct since a \ may
2788 # not appear in an address outside of a quoted string. It is probably
2789 # a sensible Postel interpretation, though.
2790 eq = self.assertEqual
2791 eq(utils.parseaddr('""example" example"@example.com'),
2792 ('', '""example" example"@example.com'))
2793 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2794 ('', '"\\"example\\" example"@example.com'))
2795 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2796 ('', '"\\\\"example\\\\" example"@example.com'))
2797
R. David Murray63563cd2010-12-18 18:25:38 +00002798 def test_parseaddr_preserves_spaces_in_local_part(self):
2799 # issue 9286. A normal RFC5322 local part should not contain any
2800 # folding white space, but legacy local parts can (they are a sequence
2801 # of atoms, not dotatoms). On the other hand we strip whitespace from
2802 # before the @ and around dots, on the assumption that the whitespace
2803 # around the punctuation is a mistake in what would otherwise be
2804 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2805 self.assertEqual(('', "merwok wok@xample.com"),
2806 utils.parseaddr("merwok wok@xample.com"))
2807 self.assertEqual(('', "merwok wok@xample.com"),
2808 utils.parseaddr("merwok wok@xample.com"))
2809 self.assertEqual(('', "merwok wok@xample.com"),
2810 utils.parseaddr(" merwok wok @xample.com"))
2811 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2812 utils.parseaddr('merwok"wok" wok@xample.com'))
2813 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2814 utils.parseaddr('merwok. wok . wok@xample.com'))
2815
R David Murrayb53319f2012-03-14 15:31:47 -04002816 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2817 addr = ("'foo@example.com' (foo@example.com)",
2818 'foo@example.com')
2819 addrstr = ('"\'foo@example.com\' '
2820 '(foo@example.com)" <foo@example.com>')
2821 self.assertEqual(utils.parseaddr(addrstr), addr)
2822 self.assertEqual(utils.formataddr(addr), addrstr)
2823
2824
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002825 def test_multiline_from_comment(self):
2826 x = """\
2827Foo
2828\tBar <foo@example.com>"""
2829 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2830
2831 def test_quote_dump(self):
2832 self.assertEqual(
2833 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2834 r'"A Silly; Person" <person@dom.ain>')
2835
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002836 def test_charset_richcomparisons(self):
2837 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002838 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002839 cset1 = Charset()
2840 cset2 = Charset()
2841 eq(cset1, 'us-ascii')
2842 eq(cset1, 'US-ASCII')
2843 eq(cset1, 'Us-AsCiI')
2844 eq('us-ascii', cset1)
2845 eq('US-ASCII', cset1)
2846 eq('Us-AsCiI', cset1)
2847 ne(cset1, 'usascii')
2848 ne(cset1, 'USASCII')
2849 ne(cset1, 'UsAsCiI')
2850 ne('usascii', cset1)
2851 ne('USASCII', cset1)
2852 ne('UsAsCiI', cset1)
2853 eq(cset1, cset2)
2854 eq(cset2, cset1)
2855
2856 def test_getaddresses(self):
2857 eq = self.assertEqual
2858 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2859 'Bud Person <bperson@dom.ain>']),
2860 [('Al Person', 'aperson@dom.ain'),
2861 ('Bud Person', 'bperson@dom.ain')])
2862
2863 def test_getaddresses_nasty(self):
2864 eq = self.assertEqual
2865 eq(utils.getaddresses(['foo: ;']), [('', '')])
2866 eq(utils.getaddresses(
2867 ['[]*-- =~$']),
2868 [('', ''), ('', ''), ('', '*--')])
2869 eq(utils.getaddresses(
2870 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2871 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2872
2873 def test_getaddresses_embedded_comment(self):
2874 """Test proper handling of a nested comment"""
2875 eq = self.assertEqual
2876 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2877 eq(addrs[0][1], 'foo@bar.com')
2878
2879 def test_utils_quote_unquote(self):
2880 eq = self.assertEqual
2881 msg = Message()
2882 msg.add_header('content-disposition', 'attachment',
2883 filename='foo\\wacky"name')
2884 eq(msg.get_filename(), 'foo\\wacky"name')
2885
2886 def test_get_body_encoding_with_bogus_charset(self):
2887 charset = Charset('not a charset')
2888 self.assertEqual(charset.get_body_encoding(), 'base64')
2889
2890 def test_get_body_encoding_with_uppercase_charset(self):
2891 eq = self.assertEqual
2892 msg = Message()
2893 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2894 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2895 charsets = msg.get_charsets()
2896 eq(len(charsets), 1)
2897 eq(charsets[0], 'utf-8')
2898 charset = Charset(charsets[0])
2899 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002900 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002901 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2902 eq(msg.get_payload(decode=True), b'hello world')
2903 eq(msg['content-transfer-encoding'], 'base64')
2904 # Try another one
2905 msg = Message()
2906 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2907 charsets = msg.get_charsets()
2908 eq(len(charsets), 1)
2909 eq(charsets[0], 'us-ascii')
2910 charset = Charset(charsets[0])
2911 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2912 msg.set_payload('hello world', charset=charset)
2913 eq(msg.get_payload(), 'hello world')
2914 eq(msg['content-transfer-encoding'], '7bit')
2915
2916 def test_charsets_case_insensitive(self):
2917 lc = Charset('us-ascii')
2918 uc = Charset('US-ASCII')
2919 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2920
2921 def test_partial_falls_inside_message_delivery_status(self):
2922 eq = self.ndiffAssertEqual
2923 # The Parser interface provides chunks of data to FeedParser in 8192
2924 # byte gulps. SF bug #1076485 found one of those chunks inside
2925 # message/delivery-status header block, which triggered an
2926 # unreadline() of NeedMoreData.
2927 msg = self._msgobj('msg_43.txt')
2928 sfp = StringIO()
2929 iterators._structure(msg, sfp)
2930 eq(sfp.getvalue(), """\
2931multipart/report
2932 text/plain
2933 message/delivery-status
2934 text/plain
2935 text/plain
2936 text/plain
2937 text/plain
2938 text/plain
2939 text/plain
2940 text/plain
2941 text/plain
2942 text/plain
2943 text/plain
2944 text/plain
2945 text/plain
2946 text/plain
2947 text/plain
2948 text/plain
2949 text/plain
2950 text/plain
2951 text/plain
2952 text/plain
2953 text/plain
2954 text/plain
2955 text/plain
2956 text/plain
2957 text/plain
2958 text/plain
2959 text/plain
2960 text/rfc822-headers
2961""")
2962
R. David Murraya0b44b52010-12-02 21:47:19 +00002963 def test_make_msgid_domain(self):
2964 self.assertEqual(
2965 email.utils.make_msgid(domain='testdomain-string')[-19:],
2966 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002967
Ezio Melottib3aedd42010-11-20 19:04:17 +00002968
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002969# Test the iterator/generators
2970class TestIterators(TestEmailBase):
2971 def test_body_line_iterator(self):
2972 eq = self.assertEqual
2973 neq = self.ndiffAssertEqual
2974 # First a simple non-multipart message
2975 msg = self._msgobj('msg_01.txt')
2976 it = iterators.body_line_iterator(msg)
2977 lines = list(it)
2978 eq(len(lines), 6)
2979 neq(EMPTYSTRING.join(lines), msg.get_payload())
2980 # Now a more complicated multipart
2981 msg = self._msgobj('msg_02.txt')
2982 it = iterators.body_line_iterator(msg)
2983 lines = list(it)
2984 eq(len(lines), 43)
2985 with openfile('msg_19.txt') as fp:
2986 neq(EMPTYSTRING.join(lines), fp.read())
2987
2988 def test_typed_subpart_iterator(self):
2989 eq = self.assertEqual
2990 msg = self._msgobj('msg_04.txt')
2991 it = iterators.typed_subpart_iterator(msg, 'text')
2992 lines = []
2993 subparts = 0
2994 for subpart in it:
2995 subparts += 1
2996 lines.append(subpart.get_payload())
2997 eq(subparts, 2)
2998 eq(EMPTYSTRING.join(lines), """\
2999a simple kind of mirror
3000to reflect upon our own
3001a simple kind of mirror
3002to reflect upon our own
3003""")
3004
3005 def test_typed_subpart_iterator_default_type(self):
3006 eq = self.assertEqual
3007 msg = self._msgobj('msg_03.txt')
3008 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3009 lines = []
3010 subparts = 0
3011 for subpart in it:
3012 subparts += 1
3013 lines.append(subpart.get_payload())
3014 eq(subparts, 1)
3015 eq(EMPTYSTRING.join(lines), """\
3016
3017Hi,
3018
3019Do you like this message?
3020
3021-Me
3022""")
3023
R. David Murray45bf773f2010-07-17 01:19:57 +00003024 def test_pushCR_LF(self):
3025 '''FeedParser BufferedSubFile.push() assumed it received complete
3026 line endings. A CR ending one push() followed by a LF starting
3027 the next push() added an empty line.
3028 '''
3029 imt = [
3030 ("a\r \n", 2),
3031 ("b", 0),
3032 ("c\n", 1),
3033 ("", 0),
3034 ("d\r\n", 1),
3035 ("e\r", 0),
3036 ("\nf", 1),
3037 ("\r\n", 1),
3038 ]
3039 from email.feedparser import BufferedSubFile, NeedMoreData
3040 bsf = BufferedSubFile()
3041 om = []
3042 nt = 0
3043 for il, n in imt:
3044 bsf.push(il)
3045 nt += n
3046 n1 = 0
3047 while True:
3048 ol = bsf.readline()
3049 if ol == NeedMoreData:
3050 break
3051 om.append(ol)
3052 n1 += 1
3053 self.assertTrue(n == n1)
3054 self.assertTrue(len(om) == nt)
3055 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3056
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003057
Ezio Melottib3aedd42010-11-20 19:04:17 +00003058
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003059class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003060
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003061 def test_header_parser(self):
3062 eq = self.assertEqual
3063 # Parse only the headers of a complex multipart MIME document
3064 with openfile('msg_02.txt') as fp:
3065 msg = HeaderParser().parse(fp)
3066 eq(msg['from'], 'ppp-request@zzz.org')
3067 eq(msg['to'], 'ppp@zzz.org')
3068 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003069 self.assertFalse(msg.is_multipart())
3070 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003071
R David Murrayb35c8502011-04-13 16:46:05 -04003072 def test_bytes_header_parser(self):
3073 eq = self.assertEqual
3074 # Parse only the headers of a complex multipart MIME document
3075 with openfile('msg_02.txt', 'rb') as fp:
3076 msg = email.parser.BytesHeaderParser().parse(fp)
3077 eq(msg['from'], 'ppp-request@zzz.org')
3078 eq(msg['to'], 'ppp@zzz.org')
3079 eq(msg.get_content_type(), 'multipart/mixed')
3080 self.assertFalse(msg.is_multipart())
3081 self.assertTrue(isinstance(msg.get_payload(), str))
3082 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3083
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003084 def test_whitespace_continuation(self):
3085 eq = self.assertEqual
3086 # This message contains a line after the Subject: header that has only
3087 # whitespace, but it is not empty!
3088 msg = email.message_from_string("""\
3089From: aperson@dom.ain
3090To: bperson@dom.ain
3091Subject: the next line has a space on it
3092\x20
3093Date: Mon, 8 Apr 2002 15:09:19 -0400
3094Message-ID: spam
3095
3096Here's the message body
3097""")
3098 eq(msg['subject'], 'the next line has a space on it\n ')
3099 eq(msg['message-id'], 'spam')
3100 eq(msg.get_payload(), "Here's the message body\n")
3101
3102 def test_whitespace_continuation_last_header(self):
3103 eq = self.assertEqual
3104 # Like the previous test, but the subject line is the last
3105 # header.
3106 msg = email.message_from_string("""\
3107From: aperson@dom.ain
3108To: bperson@dom.ain
3109Date: Mon, 8 Apr 2002 15:09:19 -0400
3110Message-ID: spam
3111Subject: the next line has a space on it
3112\x20
3113
3114Here's the message body
3115""")
3116 eq(msg['subject'], 'the next line has a space on it\n ')
3117 eq(msg['message-id'], 'spam')
3118 eq(msg.get_payload(), "Here's the message body\n")
3119
3120 def test_crlf_separation(self):
3121 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003122 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003123 msg = Parser().parse(fp)
3124 eq(len(msg.get_payload()), 2)
3125 part1 = msg.get_payload(0)
3126 eq(part1.get_content_type(), 'text/plain')
3127 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3128 part2 = msg.get_payload(1)
3129 eq(part2.get_content_type(), 'application/riscos')
3130
R. David Murray8451c4b2010-10-23 22:19:56 +00003131 def test_crlf_flatten(self):
3132 # Using newline='\n' preserves the crlfs in this input file.
3133 with openfile('msg_26.txt', newline='\n') as fp:
3134 text = fp.read()
3135 msg = email.message_from_string(text)
3136 s = StringIO()
3137 g = Generator(s)
3138 g.flatten(msg, linesep='\r\n')
3139 self.assertEqual(s.getvalue(), text)
3140
R David Murray3edd22a2011-04-18 13:59:37 -04003141 def test_crlf_control_via_policy(self):
3142 with openfile('msg_26.txt', newline='\n') as fp:
3143 text = fp.read()
3144 msg = email.message_from_string(text)
3145 s = StringIO()
3146 g = email.generator.Generator(s, policy=email.policy.SMTP)
3147 g.flatten(msg)
3148 self.assertEqual(s.getvalue(), text)
3149
3150 def test_flatten_linesep_overrides_policy(self):
3151 # msg_27 is lf separated
3152 with openfile('msg_27.txt', newline='\n') as fp:
3153 text = fp.read()
3154 msg = email.message_from_string(text)
3155 s = StringIO()
3156 g = email.generator.Generator(s, policy=email.policy.SMTP)
3157 g.flatten(msg, linesep='\n')
3158 self.assertEqual(s.getvalue(), text)
3159
R. David Murray8451c4b2010-10-23 22:19:56 +00003160 maxDiff = None
3161
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003162 def test_multipart_digest_with_extra_mime_headers(self):
3163 eq = self.assertEqual
3164 neq = self.ndiffAssertEqual
3165 with openfile('msg_28.txt') as fp:
3166 msg = email.message_from_file(fp)
3167 # Structure is:
3168 # multipart/digest
3169 # message/rfc822
3170 # text/plain
3171 # message/rfc822
3172 # text/plain
3173 eq(msg.is_multipart(), 1)
3174 eq(len(msg.get_payload()), 2)
3175 part1 = msg.get_payload(0)
3176 eq(part1.get_content_type(), 'message/rfc822')
3177 eq(part1.is_multipart(), 1)
3178 eq(len(part1.get_payload()), 1)
3179 part1a = part1.get_payload(0)
3180 eq(part1a.is_multipart(), 0)
3181 eq(part1a.get_content_type(), 'text/plain')
3182 neq(part1a.get_payload(), 'message 1\n')
3183 # next message/rfc822
3184 part2 = msg.get_payload(1)
3185 eq(part2.get_content_type(), 'message/rfc822')
3186 eq(part2.is_multipart(), 1)
3187 eq(len(part2.get_payload()), 1)
3188 part2a = part2.get_payload(0)
3189 eq(part2a.is_multipart(), 0)
3190 eq(part2a.get_content_type(), 'text/plain')
3191 neq(part2a.get_payload(), 'message 2\n')
3192
3193 def test_three_lines(self):
3194 # A bug report by Andrew McNamara
3195 lines = ['From: Andrew Person <aperson@dom.ain',
3196 'Subject: Test',
3197 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3198 msg = email.message_from_string(NL.join(lines))
3199 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3200
3201 def test_strip_line_feed_and_carriage_return_in_headers(self):
3202 eq = self.assertEqual
3203 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3204 value1 = 'text'
3205 value2 = 'more text'
3206 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3207 value1, value2)
3208 msg = email.message_from_string(m)
3209 eq(msg.get('Header'), value1)
3210 eq(msg.get('Next-Header'), value2)
3211
3212 def test_rfc2822_header_syntax(self):
3213 eq = self.assertEqual
3214 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3215 msg = email.message_from_string(m)
3216 eq(len(msg), 3)
3217 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3218 eq(msg.get_payload(), 'body')
3219
3220 def test_rfc2822_space_not_allowed_in_header(self):
3221 eq = self.assertEqual
3222 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3223 msg = email.message_from_string(m)
3224 eq(len(msg.keys()), 0)
3225
3226 def test_rfc2822_one_character_header(self):
3227 eq = self.assertEqual
3228 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3229 msg = email.message_from_string(m)
3230 headers = msg.keys()
3231 headers.sort()
3232 eq(headers, ['A', 'B', 'CC'])
3233 eq(msg.get_payload(), 'body')
3234
R. David Murray45e0e142010-06-16 02:19:40 +00003235 def test_CRLFLF_at_end_of_part(self):
3236 # issue 5610: feedparser should not eat two chars from body part ending
3237 # with "\r\n\n".
3238 m = (
3239 "From: foo@bar.com\n"
3240 "To: baz\n"
3241 "Mime-Version: 1.0\n"
3242 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3243 "\n"
3244 "--BOUNDARY\n"
3245 "Content-Type: text/plain\n"
3246 "\n"
3247 "body ending with CRLF newline\r\n"
3248 "\n"
3249 "--BOUNDARY--\n"
3250 )
3251 msg = email.message_from_string(m)
3252 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003253
Ezio Melottib3aedd42010-11-20 19:04:17 +00003254
R. David Murray96fd54e2010-10-08 15:55:28 +00003255class Test8BitBytesHandling(unittest.TestCase):
3256 # In Python3 all input is string, but that doesn't work if the actual input
3257 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3258 # decode byte streams using the surrogateescape error handler, and
3259 # reconvert to binary at appropriate places if we detect surrogates. This
3260 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3261 # but it does allow us to parse and preserve them, and to decode body
3262 # parts that use an 8bit CTE.
3263
3264 bodytest_msg = textwrap.dedent("""\
3265 From: foo@bar.com
3266 To: baz
3267 Mime-Version: 1.0
3268 Content-Type: text/plain; charset={charset}
3269 Content-Transfer-Encoding: {cte}
3270
3271 {bodyline}
3272 """)
3273
3274 def test_known_8bit_CTE(self):
3275 m = self.bodytest_msg.format(charset='utf-8',
3276 cte='8bit',
3277 bodyline='pöstal').encode('utf-8')
3278 msg = email.message_from_bytes(m)
3279 self.assertEqual(msg.get_payload(), "pöstal\n")
3280 self.assertEqual(msg.get_payload(decode=True),
3281 "pöstal\n".encode('utf-8'))
3282
3283 def test_unknown_8bit_CTE(self):
3284 m = self.bodytest_msg.format(charset='notavalidcharset',
3285 cte='8bit',
3286 bodyline='pöstal').encode('utf-8')
3287 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003288 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003289 self.assertEqual(msg.get_payload(decode=True),
3290 "pöstal\n".encode('utf-8'))
3291
3292 def test_8bit_in_quopri_body(self):
3293 # This is non-RFC compliant data...without 'decode' the library code
3294 # decodes the body using the charset from the headers, and because the
3295 # source byte really is utf-8 this works. This is likely to fail
3296 # against real dirty data (ie: produce mojibake), but the data is
3297 # invalid anyway so it is as good a guess as any. But this means that
3298 # this test just confirms the current behavior; that behavior is not
3299 # necessarily the best possible behavior. With 'decode' it is
3300 # returning the raw bytes, so that test should be of correct behavior,
3301 # or at least produce the same result that email4 did.
3302 m = self.bodytest_msg.format(charset='utf-8',
3303 cte='quoted-printable',
3304 bodyline='p=C3=B6stál').encode('utf-8')
3305 msg = email.message_from_bytes(m)
3306 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3307 self.assertEqual(msg.get_payload(decode=True),
3308 'pöstál\n'.encode('utf-8'))
3309
3310 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3311 # This is similar to the previous test, but proves that if the 8bit
3312 # byte is undecodeable in the specified charset, it gets replaced
3313 # by the unicode 'unknown' character. Again, this may or may not
3314 # be the ideal behavior. Note that if decode=False none of the
3315 # decoders will get involved, so this is the only test we need
3316 # for this behavior.
3317 m = self.bodytest_msg.format(charset='ascii',
3318 cte='quoted-printable',
3319 bodyline='p=C3=B6stál').encode('utf-8')
3320 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003321 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003322 self.assertEqual(msg.get_payload(decode=True),
3323 'pöstál\n'.encode('utf-8'))
3324
3325 def test_8bit_in_base64_body(self):
3326 # Sticking an 8bit byte in a base64 block makes it undecodable by
3327 # normal means, so the block is returned undecoded, but as bytes.
3328 m = self.bodytest_msg.format(charset='utf-8',
3329 cte='base64',
3330 bodyline='cMO2c3RhbAá=').encode('utf-8')
3331 msg = email.message_from_bytes(m)
3332 self.assertEqual(msg.get_payload(decode=True),
3333 'cMO2c3RhbAá=\n'.encode('utf-8'))
3334
3335 def test_8bit_in_uuencode_body(self):
3336 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3337 # normal means, so the block is returned undecoded, but as bytes.
3338 m = self.bodytest_msg.format(charset='utf-8',
3339 cte='uuencode',
3340 bodyline='<,.V<W1A; á ').encode('utf-8')
3341 msg = email.message_from_bytes(m)
3342 self.assertEqual(msg.get_payload(decode=True),
3343 '<,.V<W1A; á \n'.encode('utf-8'))
3344
3345
R. David Murray92532142011-01-07 23:25:30 +00003346 headertest_headers = (
3347 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3348 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3349 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3350 '\tJean de Baddie',
3351 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3352 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3353 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3354 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3355 )
3356 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3357 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003358
3359 def test_get_8bit_header(self):
3360 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003361 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3362 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003363
3364 def test_print_8bit_headers(self):
3365 msg = email.message_from_bytes(self.headertest_msg)
3366 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003367 textwrap.dedent("""\
3368 From: {}
3369 To: {}
3370 Subject: {}
3371 From: {}
3372
3373 Yes, they are flying.
3374 """).format(*[expected[1] for (_, expected) in
3375 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003376
3377 def test_values_with_8bit_headers(self):
3378 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003379 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003380 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003381 'b\uFFFD\uFFFDz',
3382 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3383 'coll\uFFFD\uFFFDgue, le pouf '
3384 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003385 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003386 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003387
3388 def test_items_with_8bit_headers(self):
3389 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003390 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003391 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003392 ('To', 'b\uFFFD\uFFFDz'),
3393 ('Subject', 'Maintenant je vous '
3394 'pr\uFFFD\uFFFDsente '
3395 'mon coll\uFFFD\uFFFDgue, le pouf '
3396 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3397 '\tJean de Baddie'),
3398 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003399
3400 def test_get_all_with_8bit_headers(self):
3401 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003402 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003403 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003404 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003405
R David Murraya2150232011-03-16 21:11:23 -04003406 def test_get_content_type_with_8bit(self):
3407 msg = email.message_from_bytes(textwrap.dedent("""\
3408 Content-Type: text/pl\xA7in; charset=utf-8
3409 """).encode('latin-1'))
3410 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3411 self.assertEqual(msg.get_content_maintype(), "text")
3412 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3413
3414 def test_get_params_with_8bit(self):
3415 msg = email.message_from_bytes(
3416 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3417 self.assertEqual(msg.get_params(header='x-header'),
3418 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3419 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3420 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3421 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3422
3423 def test_get_rfc2231_params_with_8bit(self):
3424 msg = email.message_from_bytes(textwrap.dedent("""\
3425 Content-Type: text/plain; charset=us-ascii;
3426 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3427 ).encode('latin-1'))
3428 self.assertEqual(msg.get_param('title'),
3429 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3430
3431 def test_set_rfc2231_params_with_8bit(self):
3432 msg = email.message_from_bytes(textwrap.dedent("""\
3433 Content-Type: text/plain; charset=us-ascii;
3434 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3435 ).encode('latin-1'))
3436 msg.set_param('title', 'test')
3437 self.assertEqual(msg.get_param('title'), 'test')
3438
3439 def test_del_rfc2231_params_with_8bit(self):
3440 msg = email.message_from_bytes(textwrap.dedent("""\
3441 Content-Type: text/plain; charset=us-ascii;
3442 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3443 ).encode('latin-1'))
3444 msg.del_param('title')
3445 self.assertEqual(msg.get_param('title'), None)
3446 self.assertEqual(msg.get_content_maintype(), 'text')
3447
3448 def test_get_payload_with_8bit_cte_header(self):
3449 msg = email.message_from_bytes(textwrap.dedent("""\
3450 Content-Transfer-Encoding: b\xa7se64
3451 Content-Type: text/plain; charset=latin-1
3452
3453 payload
3454 """).encode('latin-1'))
3455 self.assertEqual(msg.get_payload(), 'payload\n')
3456 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3457
R. David Murray96fd54e2010-10-08 15:55:28 +00003458 non_latin_bin_msg = textwrap.dedent("""\
3459 From: foo@bar.com
3460 To: báz
3461 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3462 \tJean de Baddie
3463 Mime-Version: 1.0
3464 Content-Type: text/plain; charset="utf-8"
3465 Content-Transfer-Encoding: 8bit
3466
3467 Да, они летят.
3468 """).encode('utf-8')
3469
3470 def test_bytes_generator(self):
3471 msg = email.message_from_bytes(self.non_latin_bin_msg)
3472 out = BytesIO()
3473 email.generator.BytesGenerator(out).flatten(msg)
3474 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3475
R. David Murray7372a072011-01-26 21:21:32 +00003476 def test_bytes_generator_handles_None_body(self):
3477 #Issue 11019
3478 msg = email.message.Message()
3479 out = BytesIO()
3480 email.generator.BytesGenerator(out).flatten(msg)
3481 self.assertEqual(out.getvalue(), b"\n")
3482
R. David Murray92532142011-01-07 23:25:30 +00003483 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003484 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003485 To: =?unknown-8bit?q?b=C3=A1z?=
3486 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3487 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3488 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003489 Mime-Version: 1.0
3490 Content-Type: text/plain; charset="utf-8"
3491 Content-Transfer-Encoding: base64
3492
3493 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3494 """)
3495
3496 def test_generator_handles_8bit(self):
3497 msg = email.message_from_bytes(self.non_latin_bin_msg)
3498 out = StringIO()
3499 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003500 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003501
3502 def test_bytes_generator_with_unix_from(self):
3503 # The unixfrom contains a current date, so we can't check it
3504 # literally. Just make sure the first word is 'From' and the
3505 # rest of the message matches the input.
3506 msg = email.message_from_bytes(self.non_latin_bin_msg)
3507 out = BytesIO()
3508 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3509 lines = out.getvalue().split(b'\n')
3510 self.assertEqual(lines[0].split()[0], b'From')
3511 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3512
R. David Murray92532142011-01-07 23:25:30 +00003513 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3514 non_latin_bin_msg_as7bit[2:4] = [
3515 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3516 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3517 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3518
R. David Murray96fd54e2010-10-08 15:55:28 +00003519 def test_message_from_binary_file(self):
3520 fn = 'test.msg'
3521 self.addCleanup(unlink, fn)
3522 with open(fn, 'wb') as testfile:
3523 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003524 with open(fn, 'rb') as testfile:
3525 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003526 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3527
3528 latin_bin_msg = textwrap.dedent("""\
3529 From: foo@bar.com
3530 To: Dinsdale
3531 Subject: Nudge nudge, wink, wink
3532 Mime-Version: 1.0
3533 Content-Type: text/plain; charset="latin-1"
3534 Content-Transfer-Encoding: 8bit
3535
3536 oh là là, know what I mean, know what I mean?
3537 """).encode('latin-1')
3538
3539 latin_bin_msg_as7bit = textwrap.dedent("""\
3540 From: foo@bar.com
3541 To: Dinsdale
3542 Subject: Nudge nudge, wink, wink
3543 Mime-Version: 1.0
3544 Content-Type: text/plain; charset="iso-8859-1"
3545 Content-Transfer-Encoding: quoted-printable
3546
3547 oh l=E0 l=E0, know what I mean, know what I mean?
3548 """)
3549
3550 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3551 m = email.message_from_bytes(self.latin_bin_msg)
3552 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3553
3554 def test_decoded_generator_emits_unicode_body(self):
3555 m = email.message_from_bytes(self.latin_bin_msg)
3556 out = StringIO()
3557 email.generator.DecodedGenerator(out).flatten(m)
3558 #DecodedHeader output contains an extra blank line compared
3559 #to the input message. RDM: not sure if this is a bug or not,
3560 #but it is not specific to the 8bit->7bit conversion.
3561 self.assertEqual(out.getvalue(),
3562 self.latin_bin_msg.decode('latin-1')+'\n')
3563
3564 def test_bytes_feedparser(self):
3565 bfp = email.feedparser.BytesFeedParser()
3566 for i in range(0, len(self.latin_bin_msg), 10):
3567 bfp.feed(self.latin_bin_msg[i:i+10])
3568 m = bfp.close()
3569 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3570
R. David Murray8451c4b2010-10-23 22:19:56 +00003571 def test_crlf_flatten(self):
3572 with openfile('msg_26.txt', 'rb') as fp:
3573 text = fp.read()
3574 msg = email.message_from_bytes(text)
3575 s = BytesIO()
3576 g = email.generator.BytesGenerator(s)
3577 g.flatten(msg, linesep='\r\n')
3578 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003579
3580 def test_8bit_multipart(self):
3581 # Issue 11605
3582 source = textwrap.dedent("""\
3583 Date: Fri, 18 Mar 2011 17:15:43 +0100
3584 To: foo@example.com
3585 From: foodwatch-Newsletter <bar@example.com>
3586 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3587 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3588 MIME-Version: 1.0
3589 Content-Type: multipart/alternative;
3590 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3591
3592 --b1_76a486bee62b0d200f33dc2ca08220ad
3593 Content-Type: text/plain; charset="utf-8"
3594 Content-Transfer-Encoding: 8bit
3595
3596 Guten Tag, ,
3597
3598 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3599 Nachrichten aus Japan.
3600
3601
3602 --b1_76a486bee62b0d200f33dc2ca08220ad
3603 Content-Type: text/html; charset="utf-8"
3604 Content-Transfer-Encoding: 8bit
3605
3606 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3607 "http://www.w3.org/TR/html4/loose.dtd">
3608 <html lang="de">
3609 <head>
3610 <title>foodwatch - Newsletter</title>
3611 </head>
3612 <body>
3613 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3614 die Nachrichten aus Japan.</p>
3615 </body>
3616 </html>
3617 --b1_76a486bee62b0d200f33dc2ca08220ad--
3618
3619 """).encode('utf-8')
3620 msg = email.message_from_bytes(source)
3621 s = BytesIO()
3622 g = email.generator.BytesGenerator(s)
3623 g.flatten(msg)
3624 self.assertEqual(s.getvalue(), source)
3625
R David Murray9fd170e2012-03-14 14:05:03 -04003626 def test_bytes_generator_b_encoding_linesep(self):
3627 # Issue 14062: b encoding was tacking on an extra \n.
3628 m = Message()
3629 # This has enough non-ascii that it should always end up b encoded.
3630 m['Subject'] = Header('žluťoučký kůň')
3631 s = BytesIO()
3632 g = email.generator.BytesGenerator(s)
3633 g.flatten(m, linesep='\r\n')
3634 self.assertEqual(
3635 s.getvalue(),
3636 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3637
3638 def test_generator_b_encoding_linesep(self):
3639 # Since this broke in ByteGenerator, test Generator for completeness.
3640 m = Message()
3641 # This has enough non-ascii that it should always end up b encoded.
3642 m['Subject'] = Header('žluťoučký kůň')
3643 s = StringIO()
3644 g = email.generator.Generator(s)
3645 g.flatten(m, linesep='\r\n')
3646 self.assertEqual(
3647 s.getvalue(),
3648 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3649
R David Murray3edd22a2011-04-18 13:59:37 -04003650 def test_crlf_control_via_policy(self):
3651 # msg_26 is crlf terminated
3652 with openfile('msg_26.txt', 'rb') as fp:
3653 text = fp.read()
3654 msg = email.message_from_bytes(text)
3655 s = BytesIO()
3656 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3657 g.flatten(msg)
3658 self.assertEqual(s.getvalue(), text)
3659
3660 def test_flatten_linesep_overrides_policy(self):
3661 # msg_27 is lf separated
3662 with openfile('msg_27.txt', 'rb') as fp:
3663 text = fp.read()
3664 msg = email.message_from_bytes(text)
3665 s = BytesIO()
3666 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3667 g.flatten(msg, linesep='\n')
3668 self.assertEqual(s.getvalue(), text)
3669
3670 def test_must_be_7bit_handles_unknown_8bit(self):
3671 msg = email.message_from_bytes(self.non_latin_bin_msg)
3672 out = BytesIO()
3673 g = email.generator.BytesGenerator(out,
3674 policy=email.policy.default.clone(must_be_7bit=True))
3675 g.flatten(msg)
3676 self.assertEqual(out.getvalue(),
3677 self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
3678
3679 def test_must_be_7bit_transforms_8bit_cte(self):
3680 msg = email.message_from_bytes(self.latin_bin_msg)
3681 out = BytesIO()
3682 g = email.generator.BytesGenerator(out,
3683 policy=email.policy.default.clone(must_be_7bit=True))
3684 g.flatten(msg)
3685 self.assertEqual(out.getvalue(),
3686 self.latin_bin_msg_as7bit.encode('ascii'))
3687
R. David Murray8451c4b2010-10-23 22:19:56 +00003688 maxDiff = None
3689
Ezio Melottib3aedd42010-11-20 19:04:17 +00003690
R. David Murray719a4492010-11-21 16:53:48 +00003691class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003692
R. David Murraye5db2632010-11-20 15:10:13 +00003693 maxDiff = None
3694
R. David Murray96fd54e2010-10-08 15:55:28 +00003695 def _msgobj(self, filename):
3696 with openfile(filename, 'rb') as fp:
3697 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003698 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003699 msg = email.message_from_bytes(data)
3700 return msg, data
3701
R. David Murray719a4492010-11-21 16:53:48 +00003702 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003703 b = BytesIO()
3704 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003705 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003706 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003707
3708
R. David Murray719a4492010-11-21 16:53:48 +00003709class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3710 TestIdempotent):
3711 linesep = '\n'
3712 blinesep = b'\n'
3713 normalize_linesep_regex = re.compile(br'\r\n')
3714
3715
3716class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3717 TestIdempotent):
3718 linesep = '\r\n'
3719 blinesep = b'\r\n'
3720 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3721
Ezio Melottib3aedd42010-11-20 19:04:17 +00003722
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723class TestBase64(unittest.TestCase):
3724 def test_len(self):
3725 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003726 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003727 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003728 for size in range(15):
3729 if size == 0 : bsize = 0
3730 elif size <= 3 : bsize = 4
3731 elif size <= 6 : bsize = 8
3732 elif size <= 9 : bsize = 12
3733 elif size <= 12: bsize = 16
3734 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003735 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736
3737 def test_decode(self):
3738 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003739 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003741
3742 def test_encode(self):
3743 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003744 eq(base64mime.body_encode(b''), b'')
3745 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003746 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003747 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003749 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3751eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3752eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3753eHh4eCB4eHh4IA==
3754""")
3755 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003756 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003757 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003758eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3759eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3760eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3761eHh4eCB4eHh4IA==\r
3762""")
3763
3764 def test_header_encode(self):
3765 eq = self.assertEqual
3766 he = base64mime.header_encode
3767 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003768 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3769 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003770 # Test the charset option
3771 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3772 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003773
3774
Ezio Melottib3aedd42010-11-20 19:04:17 +00003775
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003776class TestQuopri(unittest.TestCase):
3777 def setUp(self):
3778 # Set of characters (as byte integers) that don't need to be encoded
3779 # in headers.
3780 self.hlit = list(chain(
3781 range(ord('a'), ord('z') + 1),
3782 range(ord('A'), ord('Z') + 1),
3783 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003784 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003785 # Set of characters (as byte integers) that do need to be encoded in
3786 # headers.
3787 self.hnon = [c for c in range(256) if c not in self.hlit]
3788 assert len(self.hlit) + len(self.hnon) == 256
3789 # Set of characters (as byte integers) that don't need to be encoded
3790 # in bodies.
3791 self.blit = list(range(ord(' '), ord('~') + 1))
3792 self.blit.append(ord('\t'))
3793 self.blit.remove(ord('='))
3794 # Set of characters (as byte integers) that do need to be encoded in
3795 # bodies.
3796 self.bnon = [c for c in range(256) if c not in self.blit]
3797 assert len(self.blit) + len(self.bnon) == 256
3798
Guido van Rossum9604e662007-08-30 03:46:43 +00003799 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003800 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003801 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003802 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003803 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003804 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003805 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003806
Guido van Rossum9604e662007-08-30 03:46:43 +00003807 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003808 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003809 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003810 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003811 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003812 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003813 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814
3815 def test_header_quopri_len(self):
3816 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003817 eq(quoprimime.header_length(b'hello'), 5)
3818 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003819 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003820 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003821 # =?xxx?q?...?= means 10 extra characters
3822 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003823 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3824 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003825 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003826 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003827 # =?xxx?q?...?= means 10 extra characters
3828 10)
3829 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003830 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003831 'expected length 1 for %r' % chr(c))
3832 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003833 # Space is special; it's encoded to _
3834 if c == ord(' '):
3835 continue
3836 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003837 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003838 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003839
3840 def test_body_quopri_len(self):
3841 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003842 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003843 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003844 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003845 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003846
3847 def test_quote_unquote_idempotent(self):
3848 for x in range(256):
3849 c = chr(x)
3850 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3851
R David Murrayec1b5b82011-03-23 14:19:05 -04003852 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3853 if charset is None:
3854 encoded_header = quoprimime.header_encode(header)
3855 else:
3856 encoded_header = quoprimime.header_encode(header, charset)
3857 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003858
R David Murraycafd79d2011-03-23 15:25:55 -04003859 def test_header_encode_null(self):
3860 self._test_header_encode(b'', '')
3861
R David Murrayec1b5b82011-03-23 14:19:05 -04003862 def test_header_encode_one_word(self):
3863 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3864
3865 def test_header_encode_two_lines(self):
3866 self._test_header_encode(b'hello\nworld',
3867 '=?iso-8859-1?q?hello=0Aworld?=')
3868
3869 def test_header_encode_non_ascii(self):
3870 self._test_header_encode(b'hello\xc7there',
3871 '=?iso-8859-1?q?hello=C7there?=')
3872
3873 def test_header_encode_alt_charset(self):
3874 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3875 charset='iso-8859-2')
3876
3877 def _test_header_decode(self, encoded_header, expected_decoded_header):
3878 decoded_header = quoprimime.header_decode(encoded_header)
3879 self.assertEqual(decoded_header, expected_decoded_header)
3880
3881 def test_header_decode_null(self):
3882 self._test_header_decode('', '')
3883
3884 def test_header_decode_one_word(self):
3885 self._test_header_decode('hello', 'hello')
3886
3887 def test_header_decode_two_lines(self):
3888 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3889
3890 def test_header_decode_non_ascii(self):
3891 self._test_header_decode('hello=C7there', 'hello\xc7there')
3892
3893 def _test_decode(self, encoded, expected_decoded, eol=None):
3894 if eol is None:
3895 decoded = quoprimime.decode(encoded)
3896 else:
3897 decoded = quoprimime.decode(encoded, eol=eol)
3898 self.assertEqual(decoded, expected_decoded)
3899
3900 def test_decode_null_word(self):
3901 self._test_decode('', '')
3902
3903 def test_decode_null_line_null_word(self):
3904 self._test_decode('\r\n', '\n')
3905
3906 def test_decode_one_word(self):
3907 self._test_decode('hello', 'hello')
3908
3909 def test_decode_one_word_eol(self):
3910 self._test_decode('hello', 'hello', eol='X')
3911
3912 def test_decode_one_line(self):
3913 self._test_decode('hello\r\n', 'hello\n')
3914
3915 def test_decode_one_line_lf(self):
3916 self._test_decode('hello\n', 'hello\n')
3917
R David Murraycafd79d2011-03-23 15:25:55 -04003918 def test_decode_one_line_cr(self):
3919 self._test_decode('hello\r', 'hello\n')
3920
3921 def test_decode_one_line_nl(self):
3922 self._test_decode('hello\n', 'helloX', eol='X')
3923
3924 def test_decode_one_line_crnl(self):
3925 self._test_decode('hello\r\n', 'helloX', eol='X')
3926
R David Murrayec1b5b82011-03-23 14:19:05 -04003927 def test_decode_one_line_one_word(self):
3928 self._test_decode('hello\r\nworld', 'hello\nworld')
3929
3930 def test_decode_one_line_one_word_eol(self):
3931 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3932
3933 def test_decode_two_lines(self):
3934 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3935
R David Murraycafd79d2011-03-23 15:25:55 -04003936 def test_decode_two_lines_eol(self):
3937 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3938
R David Murrayec1b5b82011-03-23 14:19:05 -04003939 def test_decode_one_long_line(self):
3940 self._test_decode('Spam' * 250, 'Spam' * 250)
3941
3942 def test_decode_one_space(self):
3943 self._test_decode(' ', '')
3944
3945 def test_decode_multiple_spaces(self):
3946 self._test_decode(' ' * 5, '')
3947
3948 def test_decode_one_line_trailing_spaces(self):
3949 self._test_decode('hello \r\n', 'hello\n')
3950
3951 def test_decode_two_lines_trailing_spaces(self):
3952 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3953
3954 def test_decode_quoted_word(self):
3955 self._test_decode('=22quoted=20words=22', '"quoted words"')
3956
3957 def test_decode_uppercase_quoting(self):
3958 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3959
3960 def test_decode_lowercase_quoting(self):
3961 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3962
3963 def test_decode_soft_line_break(self):
3964 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3965
3966 def test_decode_false_quoting(self):
3967 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3968
3969 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3970 kwargs = {}
3971 if maxlinelen is None:
3972 # Use body_encode's default.
3973 maxlinelen = 76
3974 else:
3975 kwargs['maxlinelen'] = maxlinelen
3976 if eol is None:
3977 # Use body_encode's default.
3978 eol = '\n'
3979 else:
3980 kwargs['eol'] = eol
3981 encoded_body = quoprimime.body_encode(body, **kwargs)
3982 self.assertEqual(encoded_body, expected_encoded_body)
3983 if eol == '\n' or eol == '\r\n':
3984 # We know how to split the result back into lines, so maxlinelen
3985 # can be checked.
3986 for line in encoded_body.splitlines():
3987 self.assertLessEqual(len(line), maxlinelen)
3988
3989 def test_encode_null(self):
3990 self._test_encode('', '')
3991
3992 def test_encode_null_lines(self):
3993 self._test_encode('\n\n', '\n\n')
3994
3995 def test_encode_one_line(self):
3996 self._test_encode('hello\n', 'hello\n')
3997
3998 def test_encode_one_line_crlf(self):
3999 self._test_encode('hello\r\n', 'hello\n')
4000
4001 def test_encode_one_line_eol(self):
4002 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4003
4004 def test_encode_one_space(self):
4005 self._test_encode(' ', '=20')
4006
4007 def test_encode_one_line_one_space(self):
4008 self._test_encode(' \n', '=20\n')
4009
R David Murrayb938c8c2011-03-24 12:19:26 -04004010# XXX: body_encode() expect strings, but uses ord(char) from these strings
4011# to index into a 256-entry list. For code points above 255, this will fail.
4012# Should there be a check for 8-bit only ord() values in body, or at least
4013# a comment about the expected input?
4014
4015 def test_encode_two_lines_one_space(self):
4016 self._test_encode(' \n \n', '=20\n=20\n')
4017
R David Murrayec1b5b82011-03-23 14:19:05 -04004018 def test_encode_one_word_trailing_spaces(self):
4019 self._test_encode('hello ', 'hello =20')
4020
4021 def test_encode_one_line_trailing_spaces(self):
4022 self._test_encode('hello \n', 'hello =20\n')
4023
4024 def test_encode_one_word_trailing_tab(self):
4025 self._test_encode('hello \t', 'hello =09')
4026
4027 def test_encode_one_line_trailing_tab(self):
4028 self._test_encode('hello \t\n', 'hello =09\n')
4029
4030 def test_encode_trailing_space_before_maxlinelen(self):
4031 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4032
R David Murrayb938c8c2011-03-24 12:19:26 -04004033 def test_encode_trailing_space_at_maxlinelen(self):
4034 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4035
R David Murrayec1b5b82011-03-23 14:19:05 -04004036 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004037 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4038
4039 def test_encode_whitespace_lines(self):
4040 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004041
4042 def test_encode_quoted_equals(self):
4043 self._test_encode('a = b', 'a =3D b')
4044
4045 def test_encode_one_long_string(self):
4046 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4047
4048 def test_encode_one_long_line(self):
4049 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4050
4051 def test_encode_one_very_long_line(self):
4052 self._test_encode('x' * 200 + '\n',
4053 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4054
4055 def test_encode_one_long_line(self):
4056 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4057
4058 def test_encode_shortest_maxlinelen(self):
4059 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060
R David Murrayb938c8c2011-03-24 12:19:26 -04004061 def test_encode_maxlinelen_too_small(self):
4062 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4063
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004064 def test_encode(self):
4065 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004066 eq(quoprimime.body_encode(''), '')
4067 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004068 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004069 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004070 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004071 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004072xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4073 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4074x xxxx xxxx xxxx xxxx=20""")
4075 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004076 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4077 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4079 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4080x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004081 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004082one line
4083
4084two line"""), """\
4085one line
4086
4087two line""")
4088
4089
Ezio Melottib3aedd42010-11-20 19:04:17 +00004090
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004091# Test the Charset class
4092class TestCharset(unittest.TestCase):
4093 def tearDown(self):
4094 from email import charset as CharsetModule
4095 try:
4096 del CharsetModule.CHARSETS['fake']
4097 except KeyError:
4098 pass
4099
Guido van Rossum9604e662007-08-30 03:46:43 +00004100 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004101 eq = self.assertEqual
4102 # Make sure us-ascii = no Unicode conversion
4103 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004104 eq(c.header_encode('Hello World!'), 'Hello World!')
4105 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004106 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004107 self.assertRaises(UnicodeError, c.header_encode, s)
4108 c = Charset('utf-8')
4109 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004110
4111 def test_body_encode(self):
4112 eq = self.assertEqual
4113 # Try a charset with QP body encoding
4114 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004115 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004116 # Try a charset with Base64 body encoding
4117 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004118 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004119 # Try a charset with None body encoding
4120 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004121 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004122 # Try the convert argument, where input codec != output codec
4123 c = Charset('euc-jp')
4124 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004125 # XXX FIXME
4126## try:
4127## eq('\x1b$B5FCO;~IW\x1b(B',
4128## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4129## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4130## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4131## except LookupError:
4132## # We probably don't have the Japanese codecs installed
4133## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004134 # Testing SF bug #625509, which we have to fake, since there are no
4135 # built-in encodings where the header encoding is QP but the body
4136 # encoding is not.
4137 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004138 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004139 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004140 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004141
4142 def test_unicode_charset_name(self):
4143 charset = Charset('us-ascii')
4144 self.assertEqual(str(charset), 'us-ascii')
4145 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4146
4147
Ezio Melottib3aedd42010-11-20 19:04:17 +00004148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004149# Test multilingual MIME headers.
4150class TestHeader(TestEmailBase):
4151 def test_simple(self):
4152 eq = self.ndiffAssertEqual
4153 h = Header('Hello World!')
4154 eq(h.encode(), 'Hello World!')
4155 h.append(' Goodbye World!')
4156 eq(h.encode(), 'Hello World! Goodbye World!')
4157
4158 def test_simple_surprise(self):
4159 eq = self.ndiffAssertEqual
4160 h = Header('Hello World!')
4161 eq(h.encode(), 'Hello World!')
4162 h.append('Goodbye World!')
4163 eq(h.encode(), 'Hello World! Goodbye World!')
4164
4165 def test_header_needs_no_decoding(self):
4166 h = 'no decoding needed'
4167 self.assertEqual(decode_header(h), [(h, None)])
4168
4169 def test_long(self):
4170 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4171 maxlinelen=76)
4172 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004173 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004174
4175 def test_multilingual(self):
4176 eq = self.ndiffAssertEqual
4177 g = Charset("iso-8859-1")
4178 cz = Charset("iso-8859-2")
4179 utf8 = Charset("utf-8")
4180 g_head = (b'Die Mieter treten hier ein werden mit einem '
4181 b'Foerderband komfortabel den Korridor entlang, '
4182 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4183 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4184 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4185 b'd\xf9vtipu.. ')
4186 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4187 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4188 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4189 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4190 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4191 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4192 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4193 '\u3044\u307e\u3059\u3002')
4194 h = Header(g_head, g)
4195 h.append(cz_head, cz)
4196 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004197 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004198 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004199=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4200 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4201 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4202 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004203 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4204 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4205 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4206 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004207 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4208 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4209 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4210 decoded = decode_header(enc)
4211 eq(len(decoded), 3)
4212 eq(decoded[0], (g_head, 'iso-8859-1'))
4213 eq(decoded[1], (cz_head, 'iso-8859-2'))
4214 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004215 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004216 eq(ustr,
4217 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4218 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4219 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4220 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4221 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4222 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4223 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4224 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4225 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4226 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4227 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4228 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4229 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4230 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4231 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4232 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4233 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004234 # Test make_header()
4235 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004236 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004237
4238 def test_empty_header_encode(self):
4239 h = Header()
4240 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004241
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004242 def test_header_ctor_default_args(self):
4243 eq = self.ndiffAssertEqual
4244 h = Header()
4245 eq(h, '')
4246 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004247 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004248
4249 def test_explicit_maxlinelen(self):
4250 eq = self.ndiffAssertEqual
4251 hstr = ('A very long line that must get split to something other '
4252 'than at the 76th character boundary to test the non-default '
4253 'behavior')
4254 h = Header(hstr)
4255 eq(h.encode(), '''\
4256A very long line that must get split to something other than at the 76th
4257 character boundary to test the non-default behavior''')
4258 eq(str(h), hstr)
4259 h = Header(hstr, header_name='Subject')
4260 eq(h.encode(), '''\
4261A very long line that must get split to something other than at the
4262 76th character boundary to test the non-default behavior''')
4263 eq(str(h), hstr)
4264 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4265 eq(h.encode(), hstr)
4266 eq(str(h), hstr)
4267
Guido van Rossum9604e662007-08-30 03:46:43 +00004268 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004269 eq = self.ndiffAssertEqual
4270 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004271 x = 'xxxx ' * 20
4272 h.append(x)
4273 s = h.encode()
4274 eq(s, """\
4275=?iso-8859-1?q?xxx?=
4276 =?iso-8859-1?q?x_?=
4277 =?iso-8859-1?q?xx?=
4278 =?iso-8859-1?q?xx?=
4279 =?iso-8859-1?q?_x?=
4280 =?iso-8859-1?q?xx?=
4281 =?iso-8859-1?q?x_?=
4282 =?iso-8859-1?q?xx?=
4283 =?iso-8859-1?q?xx?=
4284 =?iso-8859-1?q?_x?=
4285 =?iso-8859-1?q?xx?=
4286 =?iso-8859-1?q?x_?=
4287 =?iso-8859-1?q?xx?=
4288 =?iso-8859-1?q?xx?=
4289 =?iso-8859-1?q?_x?=
4290 =?iso-8859-1?q?xx?=
4291 =?iso-8859-1?q?x_?=
4292 =?iso-8859-1?q?xx?=
4293 =?iso-8859-1?q?xx?=
4294 =?iso-8859-1?q?_x?=
4295 =?iso-8859-1?q?xx?=
4296 =?iso-8859-1?q?x_?=
4297 =?iso-8859-1?q?xx?=
4298 =?iso-8859-1?q?xx?=
4299 =?iso-8859-1?q?_x?=
4300 =?iso-8859-1?q?xx?=
4301 =?iso-8859-1?q?x_?=
4302 =?iso-8859-1?q?xx?=
4303 =?iso-8859-1?q?xx?=
4304 =?iso-8859-1?q?_x?=
4305 =?iso-8859-1?q?xx?=
4306 =?iso-8859-1?q?x_?=
4307 =?iso-8859-1?q?xx?=
4308 =?iso-8859-1?q?xx?=
4309 =?iso-8859-1?q?_x?=
4310 =?iso-8859-1?q?xx?=
4311 =?iso-8859-1?q?x_?=
4312 =?iso-8859-1?q?xx?=
4313 =?iso-8859-1?q?xx?=
4314 =?iso-8859-1?q?_x?=
4315 =?iso-8859-1?q?xx?=
4316 =?iso-8859-1?q?x_?=
4317 =?iso-8859-1?q?xx?=
4318 =?iso-8859-1?q?xx?=
4319 =?iso-8859-1?q?_x?=
4320 =?iso-8859-1?q?xx?=
4321 =?iso-8859-1?q?x_?=
4322 =?iso-8859-1?q?xx?=
4323 =?iso-8859-1?q?xx?=
4324 =?iso-8859-1?q?_?=""")
4325 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004326 h = Header(charset='iso-8859-1', maxlinelen=40)
4327 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004328 s = h.encode()
4329 eq(s, """\
4330=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4331 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4332 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4333 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4334 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4335 eq(x, str(make_header(decode_header(s))))
4336
4337 def test_base64_splittable(self):
4338 eq = self.ndiffAssertEqual
4339 h = Header(charset='koi8-r', maxlinelen=20)
4340 x = 'xxxx ' * 20
4341 h.append(x)
4342 s = h.encode()
4343 eq(s, """\
4344=?koi8-r?b?eHh4?=
4345 =?koi8-r?b?eCB4?=
4346 =?koi8-r?b?eHh4?=
4347 =?koi8-r?b?IHh4?=
4348 =?koi8-r?b?eHgg?=
4349 =?koi8-r?b?eHh4?=
4350 =?koi8-r?b?eCB4?=
4351 =?koi8-r?b?eHh4?=
4352 =?koi8-r?b?IHh4?=
4353 =?koi8-r?b?eHgg?=
4354 =?koi8-r?b?eHh4?=
4355 =?koi8-r?b?eCB4?=
4356 =?koi8-r?b?eHh4?=
4357 =?koi8-r?b?IHh4?=
4358 =?koi8-r?b?eHgg?=
4359 =?koi8-r?b?eHh4?=
4360 =?koi8-r?b?eCB4?=
4361 =?koi8-r?b?eHh4?=
4362 =?koi8-r?b?IHh4?=
4363 =?koi8-r?b?eHgg?=
4364 =?koi8-r?b?eHh4?=
4365 =?koi8-r?b?eCB4?=
4366 =?koi8-r?b?eHh4?=
4367 =?koi8-r?b?IHh4?=
4368 =?koi8-r?b?eHgg?=
4369 =?koi8-r?b?eHh4?=
4370 =?koi8-r?b?eCB4?=
4371 =?koi8-r?b?eHh4?=
4372 =?koi8-r?b?IHh4?=
4373 =?koi8-r?b?eHgg?=
4374 =?koi8-r?b?eHh4?=
4375 =?koi8-r?b?eCB4?=
4376 =?koi8-r?b?eHh4?=
4377 =?koi8-r?b?IA==?=""")
4378 eq(x, str(make_header(decode_header(s))))
4379 h = Header(charset='koi8-r', maxlinelen=40)
4380 h.append(x)
4381 s = h.encode()
4382 eq(s, """\
4383=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4384 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4385 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4386 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4387 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4388 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4389 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004390
4391 def test_us_ascii_header(self):
4392 eq = self.assertEqual
4393 s = 'hello'
4394 x = decode_header(s)
4395 eq(x, [('hello', None)])
4396 h = make_header(x)
4397 eq(s, h.encode())
4398
4399 def test_string_charset(self):
4400 eq = self.assertEqual
4401 h = Header()
4402 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004403 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004404
4405## def test_unicode_error(self):
4406## raises = self.assertRaises
4407## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4408## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4409## h = Header()
4410## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4411## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4412## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4413
4414 def test_utf8_shortest(self):
4415 eq = self.assertEqual
4416 h = Header('p\xf6stal', 'utf-8')
4417 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4418 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4419 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4420
4421 def test_bad_8bit_header(self):
4422 raises = self.assertRaises
4423 eq = self.assertEqual
4424 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4425 raises(UnicodeError, Header, x)
4426 h = Header()
4427 raises(UnicodeError, h.append, x)
4428 e = x.decode('utf-8', 'replace')
4429 eq(str(Header(x, errors='replace')), e)
4430 h.append(x, errors='replace')
4431 eq(str(h), e)
4432
R David Murray041015c2011-03-25 15:10:55 -04004433 def test_escaped_8bit_header(self):
4434 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004435 e = x.decode('ascii', 'surrogateescape')
4436 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004437 self.assertEqual(str(h),
4438 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4439 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4440
R David Murraye5e366c2011-06-18 12:57:28 -04004441 def test_header_handles_binary_unknown8bit(self):
4442 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4443 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4444 self.assertEqual(str(h),
4445 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4446 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4447
4448 def test_make_header_handles_binary_unknown8bit(self):
4449 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4450 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4451 h2 = email.header.make_header(email.header.decode_header(h))
4452 self.assertEqual(str(h2),
4453 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4454 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4455
R David Murray041015c2011-03-25 15:10:55 -04004456 def test_modify_returned_list_does_not_change_header(self):
4457 h = Header('test')
4458 chunks = email.header.decode_header(h)
4459 chunks.append(('ascii', 'test2'))
4460 self.assertEqual(str(h), 'test')
4461
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004462 def test_encoded_adjacent_nonencoded(self):
4463 eq = self.assertEqual
4464 h = Header()
4465 h.append('hello', 'iso-8859-1')
4466 h.append('world')
4467 s = h.encode()
4468 eq(s, '=?iso-8859-1?q?hello?= world')
4469 h = make_header(decode_header(s))
4470 eq(h.encode(), s)
4471
4472 def test_whitespace_eater(self):
4473 eq = self.assertEqual
4474 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4475 parts = decode_header(s)
4476 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4477 hdr = make_header(parts)
4478 eq(hdr.encode(),
4479 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4480
4481 def test_broken_base64_header(self):
4482 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004483 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004484 raises(errors.HeaderParseError, decode_header, s)
4485
R. David Murray477efb32011-01-05 01:39:32 +00004486 def test_shift_jis_charset(self):
4487 h = Header('文', charset='shift_jis')
4488 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4489
R David Murrayde912762011-03-16 18:26:23 -04004490 def test_flatten_header_with_no_value(self):
4491 # Issue 11401 (regression from email 4.x) Note that the space after
4492 # the header doesn't reflect the input, but this is also the way
4493 # email 4.x behaved. At some point it would be nice to fix that.
4494 msg = email.message_from_string("EmptyHeader:")
4495 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4496
R David Murray01581ee2011-04-18 10:04:34 -04004497 def test_encode_preserves_leading_ws_on_value(self):
4498 msg = Message()
4499 msg['SomeHeader'] = ' value with leading ws'
4500 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4501
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004502
Ezio Melottib3aedd42010-11-20 19:04:17 +00004503
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004504# Test RFC 2231 header parameters (en/de)coding
4505class TestRFC2231(TestEmailBase):
4506 def test_get_param(self):
4507 eq = self.assertEqual
4508 msg = self._msgobj('msg_29.txt')
4509 eq(msg.get_param('title'),
4510 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4511 eq(msg.get_param('title', unquote=False),
4512 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4513
4514 def test_set_param(self):
4515 eq = self.ndiffAssertEqual
4516 msg = Message()
4517 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4518 charset='us-ascii')
4519 eq(msg.get_param('title'),
4520 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4521 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4522 charset='us-ascii', language='en')
4523 eq(msg.get_param('title'),
4524 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4525 msg = self._msgobj('msg_01.txt')
4526 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4527 charset='us-ascii', language='en')
4528 eq(msg.as_string(maxheaderlen=78), """\
4529Return-Path: <bbb@zzz.org>
4530Delivered-To: bbb@zzz.org
4531Received: by mail.zzz.org (Postfix, from userid 889)
4532\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4533MIME-Version: 1.0
4534Content-Transfer-Encoding: 7bit
4535Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4536From: bbb@ddd.com (John X. Doe)
4537To: bbb@zzz.org
4538Subject: This is a test message
4539Date: Fri, 4 May 2001 14:05:44 -0400
4540Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004541 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004542
4543
4544Hi,
4545
4546Do you like this message?
4547
4548-Me
4549""")
4550
R David Murraya2860e82011-04-16 09:20:30 -04004551 def test_set_param_requote(self):
4552 msg = Message()
4553 msg.set_param('title', 'foo')
4554 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4555 msg.set_param('title', 'bar', requote=False)
4556 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4557 # tspecial is still quoted.
4558 msg.set_param('title', "(bar)bell", requote=False)
4559 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4560
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004561 def test_del_param(self):
4562 eq = self.ndiffAssertEqual
4563 msg = self._msgobj('msg_01.txt')
4564 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4565 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4566 charset='us-ascii', language='en')
4567 msg.del_param('foo', header='Content-Type')
4568 eq(msg.as_string(maxheaderlen=78), """\
4569Return-Path: <bbb@zzz.org>
4570Delivered-To: bbb@zzz.org
4571Received: by mail.zzz.org (Postfix, from userid 889)
4572\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4573MIME-Version: 1.0
4574Content-Transfer-Encoding: 7bit
4575Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4576From: bbb@ddd.com (John X. Doe)
4577To: bbb@zzz.org
4578Subject: This is a test message
4579Date: Fri, 4 May 2001 14:05:44 -0400
4580Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004581 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004582
4583
4584Hi,
4585
4586Do you like this message?
4587
4588-Me
4589""")
4590
4591 def test_rfc2231_get_content_charset(self):
4592 eq = self.assertEqual
4593 msg = self._msgobj('msg_32.txt')
4594 eq(msg.get_content_charset(), 'us-ascii')
4595
R. David Murraydfd7eb02010-12-24 22:36:49 +00004596 def test_rfc2231_parse_rfc_quoting(self):
4597 m = textwrap.dedent('''\
4598 Content-Disposition: inline;
4599 \tfilename*0*=''This%20is%20even%20more%20;
4600 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4601 \tfilename*2="is it not.pdf"
4602
4603 ''')
4604 msg = email.message_from_string(m)
4605 self.assertEqual(msg.get_filename(),
4606 'This is even more ***fun*** is it not.pdf')
4607 self.assertEqual(m, msg.as_string())
4608
4609 def test_rfc2231_parse_extra_quoting(self):
4610 m = textwrap.dedent('''\
4611 Content-Disposition: inline;
4612 \tfilename*0*="''This%20is%20even%20more%20";
4613 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4614 \tfilename*2="is it not.pdf"
4615
4616 ''')
4617 msg = email.message_from_string(m)
4618 self.assertEqual(msg.get_filename(),
4619 'This is even more ***fun*** is it not.pdf')
4620 self.assertEqual(m, msg.as_string())
4621
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004622 def test_rfc2231_no_language_or_charset(self):
4623 m = '''\
4624Content-Transfer-Encoding: 8bit
4625Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4626Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4627
4628'''
4629 msg = email.message_from_string(m)
4630 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004631 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004632 self.assertEqual(
4633 param,
4634 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4635
4636 def test_rfc2231_no_language_or_charset_in_filename(self):
4637 m = '''\
4638Content-Disposition: inline;
4639\tfilename*0*="''This%20is%20even%20more%20";
4640\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4641\tfilename*2="is it not.pdf"
4642
4643'''
4644 msg = email.message_from_string(m)
4645 self.assertEqual(msg.get_filename(),
4646 'This is even more ***fun*** is it not.pdf')
4647
4648 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4649 m = '''\
4650Content-Disposition: inline;
4651\tfilename*0*="''This%20is%20even%20more%20";
4652\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4653\tfilename*2="is it not.pdf"
4654
4655'''
4656 msg = email.message_from_string(m)
4657 self.assertEqual(msg.get_filename(),
4658 'This is even more ***fun*** is it not.pdf')
4659
4660 def test_rfc2231_partly_encoded(self):
4661 m = '''\
4662Content-Disposition: inline;
4663\tfilename*0="''This%20is%20even%20more%20";
4664\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4665\tfilename*2="is it not.pdf"
4666
4667'''
4668 msg = email.message_from_string(m)
4669 self.assertEqual(
4670 msg.get_filename(),
4671 'This%20is%20even%20more%20***fun*** is it not.pdf')
4672
4673 def test_rfc2231_partly_nonencoded(self):
4674 m = '''\
4675Content-Disposition: inline;
4676\tfilename*0="This%20is%20even%20more%20";
4677\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4678\tfilename*2="is it not.pdf"
4679
4680'''
4681 msg = email.message_from_string(m)
4682 self.assertEqual(
4683 msg.get_filename(),
4684 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4685
4686 def test_rfc2231_no_language_or_charset_in_boundary(self):
4687 m = '''\
4688Content-Type: multipart/alternative;
4689\tboundary*0*="''This%20is%20even%20more%20";
4690\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4691\tboundary*2="is it not.pdf"
4692
4693'''
4694 msg = email.message_from_string(m)
4695 self.assertEqual(msg.get_boundary(),
4696 'This is even more ***fun*** is it not.pdf')
4697
4698 def test_rfc2231_no_language_or_charset_in_charset(self):
4699 # This is a nonsensical charset value, but tests the code anyway
4700 m = '''\
4701Content-Type: text/plain;
4702\tcharset*0*="This%20is%20even%20more%20";
4703\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4704\tcharset*2="is it not.pdf"
4705
4706'''
4707 msg = email.message_from_string(m)
4708 self.assertEqual(msg.get_content_charset(),
4709 'this is even more ***fun*** is it not.pdf')
4710
4711 def test_rfc2231_bad_encoding_in_filename(self):
4712 m = '''\
4713Content-Disposition: inline;
4714\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4715\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4716\tfilename*2="is it not.pdf"
4717
4718'''
4719 msg = email.message_from_string(m)
4720 self.assertEqual(msg.get_filename(),
4721 'This is even more ***fun*** is it not.pdf')
4722
4723 def test_rfc2231_bad_encoding_in_charset(self):
4724 m = """\
4725Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4726
4727"""
4728 msg = email.message_from_string(m)
4729 # This should return None because non-ascii characters in the charset
4730 # are not allowed.
4731 self.assertEqual(msg.get_content_charset(), None)
4732
4733 def test_rfc2231_bad_character_in_charset(self):
4734 m = """\
4735Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4736
4737"""
4738 msg = email.message_from_string(m)
4739 # This should return None because non-ascii characters in the charset
4740 # are not allowed.
4741 self.assertEqual(msg.get_content_charset(), None)
4742
4743 def test_rfc2231_bad_character_in_filename(self):
4744 m = '''\
4745Content-Disposition: inline;
4746\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4747\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4748\tfilename*2*="is it not.pdf%E2"
4749
4750'''
4751 msg = email.message_from_string(m)
4752 self.assertEqual(msg.get_filename(),
4753 'This is even more ***fun*** is it not.pdf\ufffd')
4754
4755 def test_rfc2231_unknown_encoding(self):
4756 m = """\
4757Content-Transfer-Encoding: 8bit
4758Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4759
4760"""
4761 msg = email.message_from_string(m)
4762 self.assertEqual(msg.get_filename(), 'myfile.txt')
4763
4764 def test_rfc2231_single_tick_in_filename_extended(self):
4765 eq = self.assertEqual
4766 m = """\
4767Content-Type: application/x-foo;
4768\tname*0*=\"Frank's\"; name*1*=\" Document\"
4769
4770"""
4771 msg = email.message_from_string(m)
4772 charset, language, s = msg.get_param('name')
4773 eq(charset, None)
4774 eq(language, None)
4775 eq(s, "Frank's Document")
4776
4777 def test_rfc2231_single_tick_in_filename(self):
4778 m = """\
4779Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4780
4781"""
4782 msg = email.message_from_string(m)
4783 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004784 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004785 self.assertEqual(param, "Frank's Document")
4786
4787 def test_rfc2231_tick_attack_extended(self):
4788 eq = self.assertEqual
4789 m = """\
4790Content-Type: application/x-foo;
4791\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4792
4793"""
4794 msg = email.message_from_string(m)
4795 charset, language, s = msg.get_param('name')
4796 eq(charset, 'us-ascii')
4797 eq(language, 'en-us')
4798 eq(s, "Frank's Document")
4799
4800 def test_rfc2231_tick_attack(self):
4801 m = """\
4802Content-Type: application/x-foo;
4803\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4804
4805"""
4806 msg = email.message_from_string(m)
4807 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004808 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004809 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4810
4811 def test_rfc2231_no_extended_values(self):
4812 eq = self.assertEqual
4813 m = """\
4814Content-Type: application/x-foo; name=\"Frank's Document\"
4815
4816"""
4817 msg = email.message_from_string(m)
4818 eq(msg.get_param('name'), "Frank's Document")
4819
4820 def test_rfc2231_encoded_then_unencoded_segments(self):
4821 eq = self.assertEqual
4822 m = """\
4823Content-Type: application/x-foo;
4824\tname*0*=\"us-ascii'en-us'My\";
4825\tname*1=\" Document\";
4826\tname*2*=\" For You\"
4827
4828"""
4829 msg = email.message_from_string(m)
4830 charset, language, s = msg.get_param('name')
4831 eq(charset, 'us-ascii')
4832 eq(language, 'en-us')
4833 eq(s, 'My Document For You')
4834
4835 def test_rfc2231_unencoded_then_encoded_segments(self):
4836 eq = self.assertEqual
4837 m = """\
4838Content-Type: application/x-foo;
4839\tname*0=\"us-ascii'en-us'My\";
4840\tname*1*=\" Document\";
4841\tname*2*=\" For You\"
4842
4843"""
4844 msg = email.message_from_string(m)
4845 charset, language, s = msg.get_param('name')
4846 eq(charset, 'us-ascii')
4847 eq(language, 'en-us')
4848 eq(s, 'My Document For You')
4849
4850
Ezio Melottib3aedd42010-11-20 19:04:17 +00004851
R. David Murraya8f480f2010-01-16 18:30:03 +00004852# Tests to ensure that signed parts of an email are completely preserved, as
4853# required by RFC1847 section 2.1. Note that these are incomplete, because the
4854# email package does not currently always preserve the body. See issue 1670765.
4855class TestSigned(TestEmailBase):
4856
4857 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004858 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004859 original = fp.read()
4860 msg = email.message_from_string(original)
4861 return original, msg
4862
4863 def _signed_parts_eq(self, original, result):
4864 # Extract the first mime part of each message
4865 import re
4866 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4867 inpart = repart.search(original).group(2)
4868 outpart = repart.search(result).group(2)
4869 self.assertEqual(outpart, inpart)
4870
4871 def test_long_headers_as_string(self):
4872 original, msg = self._msg_and_obj('msg_45.txt')
4873 result = msg.as_string()
4874 self._signed_parts_eq(original, result)
4875
4876 def test_long_headers_as_string_maxheaderlen(self):
4877 original, msg = self._msg_and_obj('msg_45.txt')
4878 result = msg.as_string(maxheaderlen=60)
4879 self._signed_parts_eq(original, result)
4880
4881 def test_long_headers_flatten(self):
4882 original, msg = self._msg_and_obj('msg_45.txt')
4883 fp = StringIO()
4884 Generator(fp).flatten(msg)
4885 result = fp.getvalue()
4886 self._signed_parts_eq(original, result)
4887
4888
Ezio Melottib3aedd42010-11-20 19:04:17 +00004889
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004890if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004891 unittest.main()