blob: 6fddd2b4fc547eef1b282d85a985f83859dbef1b [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +000012import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013
R. David Murray96fd54e2010-10-08 15:55:28 +000014from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015from itertools import chain
16
17import email
18
19from email.charset import Charset
20from email.header import Header, decode_header, make_header
21from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040022from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023from email.message import Message
24from email.mime.application import MIMEApplication
25from email.mime.audio import MIMEAudio
26from email.mime.text import MIMEText
27from email.mime.image import MIMEImage
28from email.mime.base import MIMEBase
29from email.mime.message import MIMEMessage
30from email.mime.multipart import MIMEMultipart
31from email import utils
32from email import errors
33from email import encoders
34from email import iterators
35from email import base64mime
36from email import quoprimime
37
R. David Murray96fd54e2010-10-08 15:55:28 +000038from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039from email.test import __file__ as landmark
40
R David Murray612528d2013-03-15 20:38:15 -040041# These imports are documented to work, but we are testing them using a
42# different path, so we import them here just to make sure they are importable.
43from email.parser import FeedParser, BytesFeedParser
44
Guido van Rossum8b3febe2007-08-30 01:15:14 +000045
46NL = '\n'
47EMPTYSTRING = ''
48SPACE = ' '
49
50
Ezio Melottib3aedd42010-11-20 19:04:17 +000051
Guido van Rossum8b3febe2007-08-30 01:15:14 +000052def openfile(filename, *args, **kws):
53 path = os.path.join(os.path.dirname(landmark), 'data', filename)
54 return open(path, *args, **kws)
55
56
Ezio Melottib3aedd42010-11-20 19:04:17 +000057
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058# Base test class
59class TestEmailBase(unittest.TestCase):
60 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000061 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000062 if first != second:
63 sfirst = str(first)
64 ssecond = str(second)
65 rfirst = [repr(line) for line in sfirst.splitlines()]
66 rsecond = [repr(line) for line in ssecond.splitlines()]
67 diff = difflib.ndiff(rfirst, rsecond)
68 raise self.failureException(NL + NL.join(diff))
69
70 def _msgobj(self, filename):
71 with openfile(findfile(filename)) as fp:
72 return email.message_from_file(fp)
73
R David Murraye67c6c52013-03-07 16:38:03 -050074 maxDiff = None
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075
Ezio Melottib3aedd42010-11-20 19:04:17 +000076
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077# Test various aspects of the Message class's API
78class TestMessageAPI(TestEmailBase):
79 def test_get_all(self):
80 eq = self.assertEqual
81 msg = self._msgobj('msg_20.txt')
82 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
83 eq(msg.get_all('xx', 'n/a'), 'n/a')
84
R. David Murraye5db2632010-11-20 15:10:13 +000085 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000086 eq = self.assertEqual
87 msg = Message()
88 eq(msg.get_charset(), None)
89 charset = Charset('iso-8859-1')
90 msg.set_charset(charset)
91 eq(msg['mime-version'], '1.0')
92 eq(msg.get_content_type(), 'text/plain')
93 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
94 eq(msg.get_param('charset'), 'iso-8859-1')
95 eq(msg['content-transfer-encoding'], 'quoted-printable')
96 eq(msg.get_charset().input_charset, 'iso-8859-1')
97 # Remove the charset
98 msg.set_charset(None)
99 eq(msg.get_charset(), None)
100 eq(msg['content-type'], 'text/plain')
101 # Try adding a charset when there's already MIME headers present
102 msg = Message()
103 msg['MIME-Version'] = '2.0'
104 msg['Content-Type'] = 'text/x-weird'
105 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
106 msg.set_charset(charset)
107 eq(msg['mime-version'], '2.0')
108 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
109 eq(msg['content-transfer-encoding'], 'quinted-puntable')
110
111 def test_set_charset_from_string(self):
112 eq = self.assertEqual
113 msg = Message()
114 msg.set_charset('us-ascii')
115 eq(msg.get_charset().input_charset, 'us-ascii')
116 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
117
118 def test_set_payload_with_charset(self):
119 msg = Message()
120 charset = Charset('iso-8859-1')
121 msg.set_payload('This is a string payload', charset)
122 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
123
124 def test_get_charsets(self):
125 eq = self.assertEqual
126
127 msg = self._msgobj('msg_08.txt')
128 charsets = msg.get_charsets()
129 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
130
131 msg = self._msgobj('msg_09.txt')
132 charsets = msg.get_charsets('dingbat')
133 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
134 'koi8-r'])
135
136 msg = self._msgobj('msg_12.txt')
137 charsets = msg.get_charsets()
138 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
139 'iso-8859-3', 'us-ascii', 'koi8-r'])
140
141 def test_get_filename(self):
142 eq = self.assertEqual
143
144 msg = self._msgobj('msg_04.txt')
145 filenames = [p.get_filename() for p in msg.get_payload()]
146 eq(filenames, ['msg.txt', 'msg.txt'])
147
148 msg = self._msgobj('msg_07.txt')
149 subpart = msg.get_payload(1)
150 eq(subpart.get_filename(), 'dingusfish.gif')
151
152 def test_get_filename_with_name_parameter(self):
153 eq = self.assertEqual
154
155 msg = self._msgobj('msg_44.txt')
156 filenames = [p.get_filename() for p in msg.get_payload()]
157 eq(filenames, ['msg.txt', 'msg.txt'])
158
159 def test_get_boundary(self):
160 eq = self.assertEqual
161 msg = self._msgobj('msg_07.txt')
162 # No quotes!
163 eq(msg.get_boundary(), 'BOUNDARY')
164
165 def test_set_boundary(self):
166 eq = self.assertEqual
167 # This one has no existing boundary parameter, but the Content-Type:
168 # header appears fifth.
169 msg = self._msgobj('msg_01.txt')
170 msg.set_boundary('BOUNDARY')
171 header, value = msg.items()[4]
172 eq(header.lower(), 'content-type')
173 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
174 # This one has a Content-Type: header, with a boundary, stuck in the
175 # middle of its headers. Make sure the order is preserved; it should
176 # be fifth.
177 msg = self._msgobj('msg_04.txt')
178 msg.set_boundary('BOUNDARY')
179 header, value = msg.items()[4]
180 eq(header.lower(), 'content-type')
181 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
182 # And this one has no Content-Type: header at all.
183 msg = self._msgobj('msg_03.txt')
184 self.assertRaises(errors.HeaderParseError,
185 msg.set_boundary, 'BOUNDARY')
186
R. David Murray73a559d2010-12-21 18:07:59 +0000187 def test_make_boundary(self):
188 msg = MIMEMultipart('form-data')
189 # Note that when the boundary gets created is an implementation
190 # detail and might change.
191 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
192 # Trigger creation of boundary
193 msg.as_string()
194 self.assertEqual(msg.items()[0][1][:33],
195 'multipart/form-data; boundary="==')
196 # XXX: there ought to be tests of the uniqueness of the boundary, too.
197
R. David Murray57c45ac2010-02-21 04:39:40 +0000198 def test_message_rfc822_only(self):
199 # Issue 7970: message/rfc822 not in multipart parsed by
200 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000201 with openfile(findfile('msg_46.txt')) as fp:
202 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000203 parser = HeaderParser()
204 msg = parser.parsestr(msgdata)
205 out = StringIO()
206 gen = Generator(out, True, 0)
207 gen.flatten(msg, False)
208 self.assertEqual(out.getvalue(), msgdata)
209
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000210 def test_get_decoded_payload(self):
211 eq = self.assertEqual
212 msg = self._msgobj('msg_10.txt')
213 # The outer message is a multipart
214 eq(msg.get_payload(decode=True), None)
215 # Subpart 1 is 7bit encoded
216 eq(msg.get_payload(0).get_payload(decode=True),
217 b'This is a 7bit encoded message.\n')
218 # Subpart 2 is quopri
219 eq(msg.get_payload(1).get_payload(decode=True),
220 b'\xa1This is a Quoted Printable encoded message!\n')
221 # Subpart 3 is base64
222 eq(msg.get_payload(2).get_payload(decode=True),
223 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000224 # Subpart 4 is base64 with a trailing newline, which
225 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000227 b'This is a Base64 encoded message.\n')
228 # Subpart 5 has no Content-Transfer-Encoding: header.
229 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000230 b'This has no Content-Transfer-Encoding: header.\n')
231
232 def test_get_decoded_uu_payload(self):
233 eq = self.assertEqual
234 msg = Message()
235 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
236 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
237 msg['content-transfer-encoding'] = cte
238 eq(msg.get_payload(decode=True), b'hello world')
239 # Now try some bogus data
240 msg.set_payload('foo')
241 eq(msg.get_payload(decode=True), b'foo')
242
R David Murraya2860e82011-04-16 09:20:30 -0400243 def test_get_payload_n_raises_on_non_multipart(self):
244 msg = Message()
245 self.assertRaises(TypeError, msg.get_payload, 1)
246
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000247 def test_decoded_generator(self):
248 eq = self.assertEqual
249 msg = self._msgobj('msg_07.txt')
250 with openfile('msg_17.txt') as fp:
251 text = fp.read()
252 s = StringIO()
253 g = DecodedGenerator(s)
254 g.flatten(msg)
255 eq(s.getvalue(), text)
256
257 def test__contains__(self):
258 msg = Message()
259 msg['From'] = 'Me'
260 msg['to'] = 'You'
261 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000262 self.assertTrue('from' in msg)
263 self.assertTrue('From' in msg)
264 self.assertTrue('FROM' in msg)
265 self.assertTrue('to' in msg)
266 self.assertTrue('To' in msg)
267 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000268
269 def test_as_string(self):
270 eq = self.ndiffAssertEqual
271 msg = self._msgobj('msg_01.txt')
272 with openfile('msg_01.txt') as fp:
273 text = fp.read()
274 eq(text, str(msg))
275 fullrepr = msg.as_string(unixfrom=True)
276 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000277 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000278 eq(text, NL.join(lines[1:]))
279
280 def test_bad_param(self):
281 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
282 self.assertEqual(msg.get_param('baz'), '')
283
284 def test_missing_filename(self):
285 msg = email.message_from_string("From: foo\n")
286 self.assertEqual(msg.get_filename(), None)
287
288 def test_bogus_filename(self):
289 msg = email.message_from_string(
290 "Content-Disposition: blarg; filename\n")
291 self.assertEqual(msg.get_filename(), '')
292
293 def test_missing_boundary(self):
294 msg = email.message_from_string("From: foo\n")
295 self.assertEqual(msg.get_boundary(), None)
296
297 def test_get_params(self):
298 eq = self.assertEqual
299 msg = email.message_from_string(
300 'X-Header: foo=one; bar=two; baz=three\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
303 msg = email.message_from_string(
304 'X-Header: foo; bar=one; baz=two\n')
305 eq(msg.get_params(header='x-header'),
306 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
307 eq(msg.get_params(), None)
308 msg = email.message_from_string(
309 'X-Header: foo; bar="one"; baz=two\n')
310 eq(msg.get_params(header='x-header'),
311 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
312
313 def test_get_param_liberal(self):
314 msg = Message()
315 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
316 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
317
318 def test_get_param(self):
319 eq = self.assertEqual
320 msg = email.message_from_string(
321 "X-Header: foo=one; bar=two; baz=three\n")
322 eq(msg.get_param('bar', header='x-header'), 'two')
323 eq(msg.get_param('quuz', header='x-header'), None)
324 eq(msg.get_param('quuz'), None)
325 msg = email.message_from_string(
326 'X-Header: foo; bar="one"; baz=two\n')
327 eq(msg.get_param('foo', header='x-header'), '')
328 eq(msg.get_param('bar', header='x-header'), 'one')
329 eq(msg.get_param('baz', header='x-header'), 'two')
330 # XXX: We are not RFC-2045 compliant! We cannot parse:
331 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
332 # msg.get_param("weird")
333 # yet.
334
335 def test_get_param_funky_continuation_lines(self):
336 msg = self._msgobj('msg_22.txt')
337 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
338
339 def test_get_param_with_semis_in_quotes(self):
340 msg = email.message_from_string(
341 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
342 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
343 self.assertEqual(msg.get_param('name', unquote=False),
344 '"Jim&amp;&amp;Jill"')
345
R. David Murrayd48739f2010-04-14 18:59:18 +0000346 def test_get_param_with_quotes(self):
347 msg = email.message_from_string(
348 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
349 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
350 msg = email.message_from_string(
351 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
352 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
353
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000354 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000355 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000356 msg = email.message_from_string('Header: exists')
357 unless('header' in msg)
358 unless('Header' in msg)
359 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000360 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000361
362 def test_set_param(self):
363 eq = self.assertEqual
364 msg = Message()
365 msg.set_param('charset', 'iso-2022-jp')
366 eq(msg.get_param('charset'), 'iso-2022-jp')
367 msg.set_param('importance', 'high value')
368 eq(msg.get_param('importance'), 'high value')
369 eq(msg.get_param('importance', unquote=False), '"high value"')
370 eq(msg.get_params(), [('text/plain', ''),
371 ('charset', 'iso-2022-jp'),
372 ('importance', 'high value')])
373 eq(msg.get_params(unquote=False), [('text/plain', ''),
374 ('charset', '"iso-2022-jp"'),
375 ('importance', '"high value"')])
376 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
377 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
378
379 def test_del_param(self):
380 eq = self.assertEqual
381 msg = self._msgobj('msg_05.txt')
382 eq(msg.get_params(),
383 [('multipart/report', ''), ('report-type', 'delivery-status'),
384 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
385 old_val = msg.get_param("report-type")
386 msg.del_param("report-type")
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
390 msg.set_param("report-type", old_val)
391 eq(msg.get_params(),
392 [('multipart/report', ''),
393 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
394 ('report-type', old_val)])
395
396 def test_del_param_on_other_header(self):
397 msg = Message()
398 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
399 msg.del_param('filename', 'content-disposition')
400 self.assertEqual(msg['content-disposition'], 'attachment')
401
R David Murraya2860e82011-04-16 09:20:30 -0400402 def test_del_param_on_nonexistent_header(self):
403 msg = Message()
404 msg.del_param('filename', 'content-disposition')
405
406 def test_del_nonexistent_param(self):
407 msg = Message()
408 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
409 existing_header = msg['Content-Type']
410 msg.del_param('foobar', header='Content-Type')
411 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
412
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000413 def test_set_type(self):
414 eq = self.assertEqual
415 msg = Message()
416 self.assertRaises(ValueError, msg.set_type, 'text')
417 msg.set_type('text/plain')
418 eq(msg['content-type'], 'text/plain')
419 msg.set_param('charset', 'us-ascii')
420 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
421 msg.set_type('text/html')
422 eq(msg['content-type'], 'text/html; charset="us-ascii"')
423
424 def test_set_type_on_other_header(self):
425 msg = Message()
426 msg['X-Content-Type'] = 'text/plain'
427 msg.set_type('application/octet-stream', 'X-Content-Type')
428 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
429
430 def test_get_content_type_missing(self):
431 msg = Message()
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_missing_with_default_type(self):
435 msg = Message()
436 msg.set_default_type('message/rfc822')
437 self.assertEqual(msg.get_content_type(), 'message/rfc822')
438
439 def test_get_content_type_from_message_implicit(self):
440 msg = self._msgobj('msg_30.txt')
441 self.assertEqual(msg.get_payload(0).get_content_type(),
442 'message/rfc822')
443
444 def test_get_content_type_from_message_explicit(self):
445 msg = self._msgobj('msg_28.txt')
446 self.assertEqual(msg.get_payload(0).get_content_type(),
447 'message/rfc822')
448
449 def test_get_content_type_from_message_text_plain_implicit(self):
450 msg = self._msgobj('msg_03.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_type_from_message_text_plain_explicit(self):
454 msg = self._msgobj('msg_01.txt')
455 self.assertEqual(msg.get_content_type(), 'text/plain')
456
457 def test_get_content_maintype_missing(self):
458 msg = Message()
459 self.assertEqual(msg.get_content_maintype(), 'text')
460
461 def test_get_content_maintype_missing_with_default_type(self):
462 msg = Message()
463 msg.set_default_type('message/rfc822')
464 self.assertEqual(msg.get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_implicit(self):
467 msg = self._msgobj('msg_30.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_explicit(self):
471 msg = self._msgobj('msg_28.txt')
472 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
473
474 def test_get_content_maintype_from_message_text_plain_implicit(self):
475 msg = self._msgobj('msg_03.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_maintype_from_message_text_plain_explicit(self):
479 msg = self._msgobj('msg_01.txt')
480 self.assertEqual(msg.get_content_maintype(), 'text')
481
482 def test_get_content_subtype_missing(self):
483 msg = Message()
484 self.assertEqual(msg.get_content_subtype(), 'plain')
485
486 def test_get_content_subtype_missing_with_default_type(self):
487 msg = Message()
488 msg.set_default_type('message/rfc822')
489 self.assertEqual(msg.get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_implicit(self):
492 msg = self._msgobj('msg_30.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_explicit(self):
496 msg = self._msgobj('msg_28.txt')
497 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
498
499 def test_get_content_subtype_from_message_text_plain_implicit(self):
500 msg = self._msgobj('msg_03.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_subtype_from_message_text_plain_explicit(self):
504 msg = self._msgobj('msg_01.txt')
505 self.assertEqual(msg.get_content_subtype(), 'plain')
506
507 def test_get_content_maintype_error(self):
508 msg = Message()
509 msg['Content-Type'] = 'no-slash-in-this-string'
510 self.assertEqual(msg.get_content_maintype(), 'text')
511
512 def test_get_content_subtype_error(self):
513 msg = Message()
514 msg['Content-Type'] = 'no-slash-in-this-string'
515 self.assertEqual(msg.get_content_subtype(), 'plain')
516
517 def test_replace_header(self):
518 eq = self.assertEqual
519 msg = Message()
520 msg.add_header('First', 'One')
521 msg.add_header('Second', 'Two')
522 msg.add_header('Third', 'Three')
523 eq(msg.keys(), ['First', 'Second', 'Third'])
524 eq(msg.values(), ['One', 'Two', 'Three'])
525 msg.replace_header('Second', 'Twenty')
526 eq(msg.keys(), ['First', 'Second', 'Third'])
527 eq(msg.values(), ['One', 'Twenty', 'Three'])
528 msg.add_header('First', 'Eleven')
529 msg.replace_header('First', 'One Hundred')
530 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
531 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
532 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
533
534 def test_broken_base64_payload(self):
535 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
536 msg = Message()
537 msg['content-type'] = 'audio/x-midi'
538 msg['content-transfer-encoding'] = 'base64'
539 msg.set_payload(x)
540 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000541 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000542
R David Murraya2860e82011-04-16 09:20:30 -0400543 def test_broken_unicode_payload(self):
544 # This test improves coverage but is not a compliance test.
545 # The behavior in this situation is currently undefined by the API.
546 x = 'this is a br\xf6ken thing to do'
547 msg = Message()
548 msg['content-type'] = 'text/plain'
549 msg['content-transfer-encoding'] = '8bit'
550 msg.set_payload(x)
551 self.assertEqual(msg.get_payload(decode=True),
552 bytes(x, 'raw-unicode-escape'))
553
554 def test_questionable_bytes_payload(self):
555 # This test improves coverage but is not a compliance test,
556 # since it involves poking inside the black box.
557 x = 'this is a quéstionable thing to do'.encode('utf-8')
558 msg = Message()
559 msg['content-type'] = 'text/plain; charset="utf-8"'
560 msg['content-transfer-encoding'] = '8bit'
561 msg._payload = x
562 self.assertEqual(msg.get_payload(decode=True), x)
563
R. David Murray7ec754b2010-12-13 23:51:19 +0000564 # Issue 1078919
565 def test_ascii_add_header(self):
566 msg = Message()
567 msg.add_header('Content-Disposition', 'attachment',
568 filename='bud.gif')
569 self.assertEqual('attachment; filename="bud.gif"',
570 msg['Content-Disposition'])
571
572 def test_noascii_add_header(self):
573 msg = Message()
574 msg.add_header('Content-Disposition', 'attachment',
575 filename="Fußballer.ppt")
576 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000577 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000578 msg['Content-Disposition'])
579
580 def test_nonascii_add_header_via_triple(self):
581 msg = Message()
582 msg.add_header('Content-Disposition', 'attachment',
583 filename=('iso-8859-1', '', 'Fußballer.ppt'))
584 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000585 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
586 msg['Content-Disposition'])
587
588 def test_ascii_add_header_with_tspecial(self):
589 msg = Message()
590 msg.add_header('Content-Disposition', 'attachment',
591 filename="windows [filename].ppt")
592 self.assertEqual(
593 'attachment; filename="windows [filename].ppt"',
594 msg['Content-Disposition'])
595
596 def test_nonascii_add_header_with_tspecial(self):
597 msg = Message()
598 msg.add_header('Content-Disposition', 'attachment',
599 filename="Fußballer [filename].ppt")
600 self.assertEqual(
601 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000602 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000603
R David Murraya2860e82011-04-16 09:20:30 -0400604 def test_add_header_with_name_only_param(self):
605 msg = Message()
606 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
607 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
608
609 def test_add_header_with_no_value(self):
610 msg = Message()
611 msg.add_header('X-Status', None)
612 self.assertEqual('', msg['X-Status'])
613
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000614 # Issue 5871: reject an attempt to embed a header inside a header value
615 # (header injection attack).
616 def test_embeded_header_via_Header_rejected(self):
617 msg = Message()
618 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
619 self.assertRaises(errors.HeaderParseError, msg.as_string)
620
621 def test_embeded_header_via_string_rejected(self):
622 msg = Message()
623 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
624 self.assertRaises(errors.HeaderParseError, msg.as_string)
625
R David Murray7441a7a2012-03-14 02:59:51 -0400626 def test_unicode_header_defaults_to_utf8_encoding(self):
627 # Issue 14291
628 m = MIMEText('abc\n')
629 m['Subject'] = 'É test'
630 self.assertEqual(str(m),textwrap.dedent("""\
631 Content-Type: text/plain; charset="us-ascii"
632 MIME-Version: 1.0
633 Content-Transfer-Encoding: 7bit
634 Subject: =?utf-8?q?=C3=89_test?=
635
636 abc
637 """))
638
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000639# Test the email.encoders module
640class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400641
642 def test_EncodersEncode_base64(self):
643 with openfile('PyBanner048.gif', 'rb') as fp:
644 bindata = fp.read()
645 mimed = email.mime.image.MIMEImage(bindata)
646 base64ed = mimed.get_payload()
647 # the transfer-encoded body lines should all be <=76 characters
648 lines = base64ed.split('\n')
649 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
650
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000651 def test_encode_empty_payload(self):
652 eq = self.assertEqual
653 msg = Message()
654 msg.set_charset('us-ascii')
655 eq(msg['content-transfer-encoding'], '7bit')
656
657 def test_default_cte(self):
658 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000659 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000660 msg = MIMEText('hello world')
661 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000662 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 msg = MIMEText('hello \xf8 world')
664 eq(msg['content-transfer-encoding'], '8bit')
665 # And now with a different charset
666 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
667 eq(msg['content-transfer-encoding'], 'quoted-printable')
668
R. David Murraye85200d2010-05-06 01:41:14 +0000669 def test_encode7or8bit(self):
670 # Make sure a charset whose input character set is 8bit but
671 # whose output character set is 7bit gets a transfer-encoding
672 # of 7bit.
673 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000674 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000675 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000676
R David Murrayf581b372013-02-05 10:49:49 -0500677 def test_qp_encode_latin1(self):
678 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
679 self.assertEqual(str(msg), textwrap.dedent("""\
680 MIME-Version: 1.0
681 Content-Type: text/text; charset="iso-8859-1"
682 Content-Transfer-Encoding: quoted-printable
683
684 =E1=F6
685 """))
686
687 def test_qp_encode_non_latin1(self):
688 # Issue 16948
689 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
690 self.assertEqual(str(msg), textwrap.dedent("""\
691 MIME-Version: 1.0
692 Content-Type: text/text; charset="iso-8859-2"
693 Content-Transfer-Encoding: quoted-printable
694
695 =BF
696 """))
697
Ezio Melottib3aedd42010-11-20 19:04:17 +0000698
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000699# Test long header wrapping
700class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400701
702 maxDiff = None
703
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000704 def test_split_long_continuation(self):
705 eq = self.ndiffAssertEqual
706 msg = email.message_from_string("""\
707Subject: bug demonstration
708\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
709\tmore text
710
711test
712""")
713 sfp = StringIO()
714 g = Generator(sfp)
715 g.flatten(msg)
716 eq(sfp.getvalue(), """\
717Subject: bug demonstration
718\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
719\tmore text
720
721test
722""")
723
724 def test_another_long_almost_unsplittable_header(self):
725 eq = self.ndiffAssertEqual
726 hstr = """\
727bug demonstration
728\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
729\tmore text"""
730 h = Header(hstr, continuation_ws='\t')
731 eq(h.encode(), """\
732bug demonstration
733\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
734\tmore text""")
735 h = Header(hstr.replace('\t', ' '))
736 eq(h.encode(), """\
737bug demonstration
738 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
739 more text""")
740
741 def test_long_nonstring(self):
742 eq = self.ndiffAssertEqual
743 g = Charset("iso-8859-1")
744 cz = Charset("iso-8859-2")
745 utf8 = Charset("utf-8")
746 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
747 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
748 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
749 b'bef\xf6rdert. ')
750 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
751 b'd\xf9vtipu.. ')
752 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
753 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
754 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
755 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
756 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
757 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
758 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
759 '\u3044\u307e\u3059\u3002')
760 h = Header(g_head, g, header_name='Subject')
761 h.append(cz_head, cz)
762 h.append(utf8_head, utf8)
763 msg = Message()
764 msg['Subject'] = h
765 sfp = StringIO()
766 g = Generator(sfp)
767 g.flatten(msg)
768 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000769Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
770 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
771 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
772 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
773 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
774 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
775 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
776 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
777 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
778 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
779 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000780
781""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000782 eq(h.encode(maxlinelen=76), """\
783=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
784 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
785 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
786 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
787 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
788 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
789 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
790 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
791 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
792 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
793 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000794
795 def test_long_header_encode(self):
796 eq = self.ndiffAssertEqual
797 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
798 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
799 header_name='X-Foobar-Spoink-Defrobnit')
800 eq(h.encode(), '''\
801wasnipoop; giraffes="very-long-necked-animals";
802 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
803
804 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
805 eq = self.ndiffAssertEqual
806 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
807 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
808 header_name='X-Foobar-Spoink-Defrobnit',
809 continuation_ws='\t')
810 eq(h.encode(), '''\
811wasnipoop; giraffes="very-long-necked-animals";
812 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
813
814 def test_long_header_encode_with_tab_continuation(self):
815 eq = self.ndiffAssertEqual
816 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
817 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
818 header_name='X-Foobar-Spoink-Defrobnit',
819 continuation_ws='\t')
820 eq(h.encode(), '''\
821wasnipoop; giraffes="very-long-necked-animals";
822\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
823
R David Murray3a6152f2011-03-14 21:13:03 -0400824 def test_header_encode_with_different_output_charset(self):
825 h = Header('文', 'euc-jp')
826 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
827
828 def test_long_header_encode_with_different_output_charset(self):
829 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
830 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
831 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
832 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
833 res = """\
834=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
835 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
836 self.assertEqual(h.encode(), res)
837
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000838 def test_header_splitter(self):
839 eq = self.ndiffAssertEqual
840 msg = MIMEText('')
841 # It'd be great if we could use add_header() here, but that doesn't
842 # guarantee an order of the parameters.
843 msg['X-Foobar-Spoink-Defrobnit'] = (
844 'wasnipoop; giraffes="very-long-necked-animals"; '
845 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
846 sfp = StringIO()
847 g = Generator(sfp)
848 g.flatten(msg)
849 eq(sfp.getvalue(), '''\
850Content-Type: text/plain; charset="us-ascii"
851MIME-Version: 1.0
852Content-Transfer-Encoding: 7bit
853X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
854 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
855
856''')
857
858 def test_no_semis_header_splitter(self):
859 eq = self.ndiffAssertEqual
860 msg = Message()
861 msg['From'] = 'test@dom.ain'
862 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
863 msg.set_payload('Test')
864 sfp = StringIO()
865 g = Generator(sfp)
866 g.flatten(msg)
867 eq(sfp.getvalue(), """\
868From: test@dom.ain
869References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
870 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
871
872Test""")
873
R David Murray7da4db12011-04-07 20:37:17 -0400874 def test_last_split_chunk_does_not_fit(self):
875 eq = self.ndiffAssertEqual
876 h = Header('Subject: the first part of this is short, but_the_second'
877 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
878 '_all_by_itself')
879 eq(h.encode(), """\
880Subject: the first part of this is short,
881 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
882
883 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
884 eq = self.ndiffAssertEqual
885 h = Header(', but_the_second'
886 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
887 '_all_by_itself')
888 eq(h.encode(), """\
889,
890 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
891
892 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
893 eq = self.ndiffAssertEqual
894 h = Header(', , but_the_second'
895 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
896 '_all_by_itself')
897 eq(h.encode(), """\
898, ,
899 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
900
901 def test_trailing_splitable_on_overlong_unsplitable(self):
902 eq = self.ndiffAssertEqual
903 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
904 'be_on_a_line_all_by_itself;')
905 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
906 "be_on_a_line_all_by_itself;")
907
908 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
909 eq = self.ndiffAssertEqual
910 h = Header('; '
911 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400912 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400913 eq(h.encode(), """\
914;
R David Murray01581ee2011-04-18 10:04:34 -0400915 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400916
R David Murraye1292a22011-04-07 20:54:03 -0400917 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400918 eq = self.ndiffAssertEqual
919 h = Header('This is a long line that has two whitespaces in a row. '
920 'This used to cause truncation of the header when folded')
921 eq(h.encode(), """\
922This is a long line that has two whitespaces in a row. This used to cause
923 truncation of the header when folded""")
924
R David Murray01581ee2011-04-18 10:04:34 -0400925 def test_splitter_split_on_punctuation_only_if_fws(self):
926 eq = self.ndiffAssertEqual
927 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
928 'they;arenotlegal;fold,points')
929 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
930 "arenotlegal;fold,points")
931
932 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
933 eq = self.ndiffAssertEqual
934 h = Header('this is a test where we need to have more than one line '
935 'before; our final line that is just too big to fit;; '
936 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
937 'be_on_a_line_all_by_itself;')
938 eq(h.encode(), """\
939this is a test where we need to have more than one line before;
940 our final line that is just too big to fit;;
941 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
942
943 def test_overlong_last_part_followed_by_split_point(self):
944 eq = self.ndiffAssertEqual
945 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
946 'be_on_a_line_all_by_itself ')
947 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
948 "should_be_on_a_line_all_by_itself ")
949
950 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
951 eq = self.ndiffAssertEqual
952 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
953 'before_our_final_line_; ; '
954 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
955 'be_on_a_line_all_by_itself; ')
956 eq(h.encode(), """\
957this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
958 ;
959 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
960
961 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
962 eq = self.ndiffAssertEqual
963 h = Header('this is a test where we need to have more than one line '
964 'before our final line; ; '
965 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
966 'be_on_a_line_all_by_itself; ')
967 eq(h.encode(), """\
968this is a test where we need to have more than one line before our final line;
969 ;
970 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
971
972 def test_long_header_with_whitespace_runs(self):
973 eq = self.ndiffAssertEqual
974 msg = Message()
975 msg['From'] = 'test@dom.ain'
976 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
977 msg.set_payload('Test')
978 sfp = StringIO()
979 g = Generator(sfp)
980 g.flatten(msg)
981 eq(sfp.getvalue(), """\
982From: test@dom.ain
983References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
984 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
985 <foo@dom.ain> <foo@dom.ain>\x20\x20
986
987Test""")
988
989 def test_long_run_with_semi_header_splitter(self):
990 eq = self.ndiffAssertEqual
991 msg = Message()
992 msg['From'] = 'test@dom.ain'
993 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
994 msg.set_payload('Test')
995 sfp = StringIO()
996 g = Generator(sfp)
997 g.flatten(msg)
998 eq(sfp.getvalue(), """\
999From: test@dom.ain
1000References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1001 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1002 <foo@dom.ain>; abc
1003
1004Test""")
1005
1006 def test_splitter_split_on_punctuation_only_if_fws(self):
1007 eq = self.ndiffAssertEqual
1008 msg = Message()
1009 msg['From'] = 'test@dom.ain'
1010 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1011 'they;arenotlegal;fold,points')
1012 msg.set_payload('Test')
1013 sfp = StringIO()
1014 g = Generator(sfp)
1015 g.flatten(msg)
1016 # XXX the space after the header should not be there.
1017 eq(sfp.getvalue(), """\
1018From: test@dom.ain
1019References:\x20
1020 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1021
1022Test""")
1023
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001024 def test_no_split_long_header(self):
1025 eq = self.ndiffAssertEqual
1026 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001027 h = Header(hstr)
1028 # These come on two lines because Headers are really field value
1029 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001030 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001031References:
1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1033 h = Header('x' * 80)
1034 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001035
1036 def test_splitting_multiple_long_lines(self):
1037 eq = self.ndiffAssertEqual
1038 hstr = """\
1039from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1040\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1041\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1042"""
1043 h = Header(hstr, continuation_ws='\t')
1044 eq(h.encode(), """\
1045from babylon.socal-raves.org (localhost [127.0.0.1]);
1046 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1047 for <mailman-admin@babylon.socal-raves.org>;
1048 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1049\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1050 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1051 for <mailman-admin@babylon.socal-raves.org>;
1052 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1053\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1054 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1055 for <mailman-admin@babylon.socal-raves.org>;
1056 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1057
1058 def test_splitting_first_line_only_is_long(self):
1059 eq = self.ndiffAssertEqual
1060 hstr = """\
1061from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1062\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1063\tid 17k4h5-00034i-00
1064\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1065 h = Header(hstr, maxlinelen=78, header_name='Received',
1066 continuation_ws='\t')
1067 eq(h.encode(), """\
1068from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1069 helo=cthulhu.gerg.ca)
1070\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1071\tid 17k4h5-00034i-00
1072\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1073
1074 def test_long_8bit_header(self):
1075 eq = self.ndiffAssertEqual
1076 msg = Message()
1077 h = Header('Britische Regierung gibt', 'iso-8859-1',
1078 header_name='Subject')
1079 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001080 eq(h.encode(maxlinelen=76), """\
1081=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1082 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001083 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001084 eq(msg.as_string(maxheaderlen=76), """\
1085Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1086 =?iso-8859-1?q?hore-Windkraftprojekte?=
1087
1088""")
1089 eq(msg.as_string(maxheaderlen=0), """\
1090Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001091
1092""")
1093
1094 def test_long_8bit_header_no_charset(self):
1095 eq = self.ndiffAssertEqual
1096 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001097 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1098 'f\xfcr Offshore-Windkraftprojekte '
1099 '<a-very-long-address@example.com>')
1100 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001101 eq(msg.as_string(maxheaderlen=78), """\
1102Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1103 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1104
1105""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001106 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001107 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001108 header_name='Reply-To')
1109 eq(msg.as_string(maxheaderlen=78), """\
1110Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1111 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001112
1113""")
1114
1115 def test_long_to_header(self):
1116 eq = self.ndiffAssertEqual
1117 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001118 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001119 '"Someone Test #B" <someone@umich.edu>, '
1120 '"Someone Test #C" <someone@eecs.umich.edu>, '
1121 '"Someone Test #D" <someone@eecs.umich.edu>')
1122 msg = Message()
1123 msg['To'] = to
1124 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001125To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001126 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001127 "Someone Test #C" <someone@eecs.umich.edu>,
1128 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129
1130''')
1131
1132 def test_long_line_after_append(self):
1133 eq = self.ndiffAssertEqual
1134 s = 'This is an example of string which has almost the limit of header length.'
1135 h = Header(s)
1136 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001137 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001138This is an example of string which has almost the limit of header length.
1139 Add another line.""")
1140
1141 def test_shorter_line_with_append(self):
1142 eq = self.ndiffAssertEqual
1143 s = 'This is a shorter line.'
1144 h = Header(s)
1145 h.append('Add another sentence. (Surprise?)')
1146 eq(h.encode(),
1147 'This is a shorter line. Add another sentence. (Surprise?)')
1148
1149 def test_long_field_name(self):
1150 eq = self.ndiffAssertEqual
1151 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001152 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1153 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1154 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1155 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156 h = Header(gs, 'iso-8859-1', header_name=fn)
1157 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001158 eq(h.encode(maxlinelen=76), """\
1159=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1160 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1161 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1162 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001163
1164 def test_long_received_header(self):
1165 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1166 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1167 'Wed, 05 Mar 2003 18:10:18 -0700')
1168 msg = Message()
1169 msg['Received-1'] = Header(h, continuation_ws='\t')
1170 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001171 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001172 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001173Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1174 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001175 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001176Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1177 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001178 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001179
1180""")
1181
1182 def test_string_headerinst_eq(self):
1183 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1184 'tu-muenchen.de> (David Bremner\'s message of '
1185 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1186 msg = Message()
1187 msg['Received-1'] = Header(h, header_name='Received-1',
1188 continuation_ws='\t')
1189 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001190 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001192Received-1:\x20
1193 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1194 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1195Received-2:\x20
1196 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1197 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001198
1199""")
1200
1201 def test_long_unbreakable_lines_with_continuation(self):
1202 eq = self.ndiffAssertEqual
1203 msg = Message()
1204 t = """\
1205iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1206 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1207 msg['Face-1'] = t
1208 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001209 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001210 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001211 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001213Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001214 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001216Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001217 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001219Face-3:\x20
1220 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1221 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001222
1223""")
1224
1225 def test_another_long_multiline_header(self):
1226 eq = self.ndiffAssertEqual
1227 m = ('Received: from siimage.com '
1228 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001229 'Microsoft SMTPSVC(5.0.2195.4905); '
1230 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231 msg = email.message_from_string(m)
1232 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001233Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1234 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001235
1236''')
1237
1238 def test_long_lines_with_different_header(self):
1239 eq = self.ndiffAssertEqual
1240 h = ('List-Unsubscribe: '
1241 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1242 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1243 '?subject=unsubscribe>')
1244 msg = Message()
1245 msg['List'] = h
1246 msg['List'] = Header(h, header_name='List')
1247 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001248List: List-Unsubscribe:
1249 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001250 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001251List: List-Unsubscribe:
1252 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001253 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001254
1255""")
1256
R. David Murray6f0022d2011-01-07 21:57:25 +00001257 def test_long_rfc2047_header_with_embedded_fws(self):
1258 h = Header(textwrap.dedent("""\
1259 We're going to pretend this header is in a non-ascii character set
1260 \tto see if line wrapping with encoded words and embedded
1261 folding white space works"""),
1262 charset='utf-8',
1263 header_name='Test')
1264 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1265 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1266 =?utf-8?q?cter_set?=
1267 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1268 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1269
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001270
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001271# Test mangling of "From " lines in the body of a message
1272class TestFromMangling(unittest.TestCase):
1273 def setUp(self):
1274 self.msg = Message()
1275 self.msg['From'] = 'aaa@bbb.org'
1276 self.msg.set_payload("""\
1277From the desk of A.A.A.:
1278Blah blah blah
1279""")
1280
1281 def test_mangled_from(self):
1282 s = StringIO()
1283 g = Generator(s, mangle_from_=True)
1284 g.flatten(self.msg)
1285 self.assertEqual(s.getvalue(), """\
1286From: aaa@bbb.org
1287
1288>From the desk of A.A.A.:
1289Blah blah blah
1290""")
1291
1292 def test_dont_mangle_from(self):
1293 s = StringIO()
1294 g = Generator(s, mangle_from_=False)
1295 g.flatten(self.msg)
1296 self.assertEqual(s.getvalue(), """\
1297From: aaa@bbb.org
1298
1299From the desk of A.A.A.:
1300Blah blah blah
1301""")
1302
R David Murray6a31bc62012-07-22 21:47:53 -04001303 def test_mangle_from_in_preamble_and_epilog(self):
1304 s = StringIO()
1305 g = Generator(s, mangle_from_=True)
1306 msg = email.message_from_string(textwrap.dedent("""\
1307 From: foo@bar.com
1308 Mime-Version: 1.0
1309 Content-Type: multipart/mixed; boundary=XXX
1310
1311 From somewhere unknown
1312
1313 --XXX
1314 Content-Type: text/plain
1315
1316 foo
1317
1318 --XXX--
1319
1320 From somewhere unknowable
1321 """))
1322 g.flatten(msg)
1323 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1324 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001325
R David Murray638d40b2012-08-24 11:14:13 -04001326 def test_mangled_from_with_bad_bytes(self):
1327 source = textwrap.dedent("""\
1328 Content-Type: text/plain; charset="utf-8"
1329 MIME-Version: 1.0
1330 Content-Transfer-Encoding: 8bit
1331 From: aaa@bbb.org
1332
1333 """).encode('utf-8')
1334 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1335 b = BytesIO()
1336 g = BytesGenerator(b, mangle_from_=True)
1337 g.flatten(msg)
1338 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1339
Ezio Melottib3aedd42010-11-20 19:04:17 +00001340
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001341# Test the basic MIMEAudio class
1342class TestMIMEAudio(unittest.TestCase):
1343 def setUp(self):
1344 # Make sure we pick up the audiotest.au that lives in email/test/data.
1345 # In Python, there's an audiotest.au living in Lib/test but that isn't
1346 # included in some binary distros that don't include the test
1347 # package. The trailing empty string on the .join() is significant
1348 # since findfile() will do a dirname().
1349 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1350 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1351 self._audiodata = fp.read()
1352 self._au = MIMEAudio(self._audiodata)
1353
1354 def test_guess_minor_type(self):
1355 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1356
1357 def test_encoding(self):
1358 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001359 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1360 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001361
1362 def test_checkSetMinor(self):
1363 au = MIMEAudio(self._audiodata, 'fish')
1364 self.assertEqual(au.get_content_type(), 'audio/fish')
1365
1366 def test_add_header(self):
1367 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001368 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001369 self._au.add_header('Content-Disposition', 'attachment',
1370 filename='audiotest.au')
1371 eq(self._au['content-disposition'],
1372 'attachment; filename="audiotest.au"')
1373 eq(self._au.get_params(header='content-disposition'),
1374 [('attachment', ''), ('filename', 'audiotest.au')])
1375 eq(self._au.get_param('filename', header='content-disposition'),
1376 'audiotest.au')
1377 missing = []
1378 eq(self._au.get_param('attachment', header='content-disposition'), '')
1379 unless(self._au.get_param('foo', failobj=missing,
1380 header='content-disposition') is missing)
1381 # Try some missing stuff
1382 unless(self._au.get_param('foobar', missing) is missing)
1383 unless(self._au.get_param('attachment', missing,
1384 header='foobar') is missing)
1385
1386
Ezio Melottib3aedd42010-11-20 19:04:17 +00001387
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001388# Test the basic MIMEImage class
1389class TestMIMEImage(unittest.TestCase):
1390 def setUp(self):
1391 with openfile('PyBanner048.gif', 'rb') as fp:
1392 self._imgdata = fp.read()
1393 self._im = MIMEImage(self._imgdata)
1394
1395 def test_guess_minor_type(self):
1396 self.assertEqual(self._im.get_content_type(), 'image/gif')
1397
1398 def test_encoding(self):
1399 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001400 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1401 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001402
1403 def test_checkSetMinor(self):
1404 im = MIMEImage(self._imgdata, 'fish')
1405 self.assertEqual(im.get_content_type(), 'image/fish')
1406
1407 def test_add_header(self):
1408 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001409 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001410 self._im.add_header('Content-Disposition', 'attachment',
1411 filename='dingusfish.gif')
1412 eq(self._im['content-disposition'],
1413 'attachment; filename="dingusfish.gif"')
1414 eq(self._im.get_params(header='content-disposition'),
1415 [('attachment', ''), ('filename', 'dingusfish.gif')])
1416 eq(self._im.get_param('filename', header='content-disposition'),
1417 'dingusfish.gif')
1418 missing = []
1419 eq(self._im.get_param('attachment', header='content-disposition'), '')
1420 unless(self._im.get_param('foo', failobj=missing,
1421 header='content-disposition') is missing)
1422 # Try some missing stuff
1423 unless(self._im.get_param('foobar', missing) is missing)
1424 unless(self._im.get_param('attachment', missing,
1425 header='foobar') is missing)
1426
1427
Ezio Melottib3aedd42010-11-20 19:04:17 +00001428
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001429# Test the basic MIMEApplication class
1430class TestMIMEApplication(unittest.TestCase):
1431 def test_headers(self):
1432 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001433 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001434 eq(msg.get_content_type(), 'application/octet-stream')
1435 eq(msg['content-transfer-encoding'], 'base64')
1436
1437 def test_body(self):
1438 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001439 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1440 msg = MIMEApplication(bytesdata)
1441 # whitespace in the cte encoded block is RFC-irrelevant.
1442 eq(msg.get_payload().strip(), '+vv8/f7/')
1443 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001444
R David Murrayec317a82013-02-11 10:51:28 -05001445 def test_binary_body_with_encode_7or8bit(self):
1446 # Issue 17171.
1447 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1448 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1449 # Treated as a string, this will be invalid code points.
1450 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1451 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1452 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1453 s = BytesIO()
1454 g = BytesGenerator(s)
1455 g.flatten(msg)
1456 wireform = s.getvalue()
1457 msg2 = email.message_from_bytes(wireform)
1458 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1459 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1460 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1461
1462 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001463 # Issue 16564: This does not produce an RFC valid message, since to be
1464 # valid it should have a CTE of binary. But the below works in
1465 # Python2, and is documented as working this way.
1466 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1467 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1468 # Treated as a string, this will be invalid code points.
1469 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1470 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1471 s = BytesIO()
1472 g = BytesGenerator(s)
1473 g.flatten(msg)
1474 wireform = s.getvalue()
1475 msg2 = email.message_from_bytes(wireform)
1476 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1477 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001478
Ezio Melottib3aedd42010-11-20 19:04:17 +00001479
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001480# Test the basic MIMEText class
1481class TestMIMEText(unittest.TestCase):
1482 def setUp(self):
1483 self._msg = MIMEText('hello there')
1484
1485 def test_types(self):
1486 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001487 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001488 eq(self._msg.get_content_type(), 'text/plain')
1489 eq(self._msg.get_param('charset'), 'us-ascii')
1490 missing = []
1491 unless(self._msg.get_param('foobar', missing) is missing)
1492 unless(self._msg.get_param('charset', missing, header='foobar')
1493 is missing)
1494
1495 def test_payload(self):
1496 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001497 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001498
1499 def test_charset(self):
1500 eq = self.assertEqual
1501 msg = MIMEText('hello there', _charset='us-ascii')
1502 eq(msg.get_charset().input_charset, 'us-ascii')
1503 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1504
R. David Murray850fc852010-06-03 01:58:28 +00001505 def test_7bit_input(self):
1506 eq = self.assertEqual
1507 msg = MIMEText('hello there', _charset='us-ascii')
1508 eq(msg.get_charset().input_charset, 'us-ascii')
1509 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1510
1511 def test_7bit_input_no_charset(self):
1512 eq = self.assertEqual
1513 msg = MIMEText('hello there')
1514 eq(msg.get_charset(), 'us-ascii')
1515 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1516 self.assertTrue('hello there' in msg.as_string())
1517
1518 def test_utf8_input(self):
1519 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1520 eq = self.assertEqual
1521 msg = MIMEText(teststr, _charset='utf-8')
1522 eq(msg.get_charset().output_charset, 'utf-8')
1523 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1524 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1525
1526 @unittest.skip("can't fix because of backward compat in email5, "
1527 "will fix in email6")
1528 def test_utf8_input_no_charset(self):
1529 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1530 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1531
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001532
Ezio Melottib3aedd42010-11-20 19:04:17 +00001533
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001534# Test complicated multipart/* messages
1535class TestMultipart(TestEmailBase):
1536 def setUp(self):
1537 with openfile('PyBanner048.gif', 'rb') as fp:
1538 data = fp.read()
1539 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1540 image = MIMEImage(data, name='dingusfish.gif')
1541 image.add_header('content-disposition', 'attachment',
1542 filename='dingusfish.gif')
1543 intro = MIMEText('''\
1544Hi there,
1545
1546This is the dingus fish.
1547''')
1548 container.attach(intro)
1549 container.attach(image)
1550 container['From'] = 'Barry <barry@digicool.com>'
1551 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1552 container['Subject'] = 'Here is your dingus fish'
1553
1554 now = 987809702.54848599
1555 timetuple = time.localtime(now)
1556 if timetuple[-1] == 0:
1557 tzsecs = time.timezone
1558 else:
1559 tzsecs = time.altzone
1560 if tzsecs > 0:
1561 sign = '-'
1562 else:
1563 sign = '+'
1564 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1565 container['Date'] = time.strftime(
1566 '%a, %d %b %Y %H:%M:%S',
1567 time.localtime(now)) + tzoffset
1568 self._msg = container
1569 self._im = image
1570 self._txt = intro
1571
1572 def test_hierarchy(self):
1573 # convenience
1574 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001575 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001576 raises = self.assertRaises
1577 # tests
1578 m = self._msg
1579 unless(m.is_multipart())
1580 eq(m.get_content_type(), 'multipart/mixed')
1581 eq(len(m.get_payload()), 2)
1582 raises(IndexError, m.get_payload, 2)
1583 m0 = m.get_payload(0)
1584 m1 = m.get_payload(1)
1585 unless(m0 is self._txt)
1586 unless(m1 is self._im)
1587 eq(m.get_payload(), [m0, m1])
1588 unless(not m0.is_multipart())
1589 unless(not m1.is_multipart())
1590
1591 def test_empty_multipart_idempotent(self):
1592 text = """\
1593Content-Type: multipart/mixed; boundary="BOUNDARY"
1594MIME-Version: 1.0
1595Subject: A subject
1596To: aperson@dom.ain
1597From: bperson@dom.ain
1598
1599
1600--BOUNDARY
1601
1602
1603--BOUNDARY--
1604"""
1605 msg = Parser().parsestr(text)
1606 self.ndiffAssertEqual(text, msg.as_string())
1607
1608 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1609 outer = MIMEBase('multipart', 'mixed')
1610 outer['Subject'] = 'A subject'
1611 outer['To'] = 'aperson@dom.ain'
1612 outer['From'] = 'bperson@dom.ain'
1613 outer.set_boundary('BOUNDARY')
1614 self.ndiffAssertEqual(outer.as_string(), '''\
1615Content-Type: multipart/mixed; boundary="BOUNDARY"
1616MIME-Version: 1.0
1617Subject: A subject
1618To: aperson@dom.ain
1619From: bperson@dom.ain
1620
1621--BOUNDARY
1622
1623--BOUNDARY--''')
1624
1625 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1626 outer = MIMEBase('multipart', 'mixed')
1627 outer['Subject'] = 'A subject'
1628 outer['To'] = 'aperson@dom.ain'
1629 outer['From'] = 'bperson@dom.ain'
1630 outer.preamble = ''
1631 outer.epilogue = ''
1632 outer.set_boundary('BOUNDARY')
1633 self.ndiffAssertEqual(outer.as_string(), '''\
1634Content-Type: multipart/mixed; boundary="BOUNDARY"
1635MIME-Version: 1.0
1636Subject: A subject
1637To: aperson@dom.ain
1638From: bperson@dom.ain
1639
1640
1641--BOUNDARY
1642
1643--BOUNDARY--
1644''')
1645
1646 def test_one_part_in_a_multipart(self):
1647 eq = self.ndiffAssertEqual
1648 outer = MIMEBase('multipart', 'mixed')
1649 outer['Subject'] = 'A subject'
1650 outer['To'] = 'aperson@dom.ain'
1651 outer['From'] = 'bperson@dom.ain'
1652 outer.set_boundary('BOUNDARY')
1653 msg = MIMEText('hello world')
1654 outer.attach(msg)
1655 eq(outer.as_string(), '''\
1656Content-Type: multipart/mixed; boundary="BOUNDARY"
1657MIME-Version: 1.0
1658Subject: A subject
1659To: aperson@dom.ain
1660From: bperson@dom.ain
1661
1662--BOUNDARY
1663Content-Type: text/plain; charset="us-ascii"
1664MIME-Version: 1.0
1665Content-Transfer-Encoding: 7bit
1666
1667hello world
1668--BOUNDARY--''')
1669
1670 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1671 eq = self.ndiffAssertEqual
1672 outer = MIMEBase('multipart', 'mixed')
1673 outer['Subject'] = 'A subject'
1674 outer['To'] = 'aperson@dom.ain'
1675 outer['From'] = 'bperson@dom.ain'
1676 outer.preamble = ''
1677 msg = MIMEText('hello world')
1678 outer.attach(msg)
1679 outer.set_boundary('BOUNDARY')
1680 eq(outer.as_string(), '''\
1681Content-Type: multipart/mixed; boundary="BOUNDARY"
1682MIME-Version: 1.0
1683Subject: A subject
1684To: aperson@dom.ain
1685From: bperson@dom.ain
1686
1687
1688--BOUNDARY
1689Content-Type: text/plain; charset="us-ascii"
1690MIME-Version: 1.0
1691Content-Transfer-Encoding: 7bit
1692
1693hello world
1694--BOUNDARY--''')
1695
1696
1697 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1698 eq = self.ndiffAssertEqual
1699 outer = MIMEBase('multipart', 'mixed')
1700 outer['Subject'] = 'A subject'
1701 outer['To'] = 'aperson@dom.ain'
1702 outer['From'] = 'bperson@dom.ain'
1703 outer.preamble = None
1704 msg = MIMEText('hello world')
1705 outer.attach(msg)
1706 outer.set_boundary('BOUNDARY')
1707 eq(outer.as_string(), '''\
1708Content-Type: multipart/mixed; boundary="BOUNDARY"
1709MIME-Version: 1.0
1710Subject: A subject
1711To: aperson@dom.ain
1712From: bperson@dom.ain
1713
1714--BOUNDARY
1715Content-Type: text/plain; charset="us-ascii"
1716MIME-Version: 1.0
1717Content-Transfer-Encoding: 7bit
1718
1719hello world
1720--BOUNDARY--''')
1721
1722
1723 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1724 eq = self.ndiffAssertEqual
1725 outer = MIMEBase('multipart', 'mixed')
1726 outer['Subject'] = 'A subject'
1727 outer['To'] = 'aperson@dom.ain'
1728 outer['From'] = 'bperson@dom.ain'
1729 outer.epilogue = None
1730 msg = MIMEText('hello world')
1731 outer.attach(msg)
1732 outer.set_boundary('BOUNDARY')
1733 eq(outer.as_string(), '''\
1734Content-Type: multipart/mixed; boundary="BOUNDARY"
1735MIME-Version: 1.0
1736Subject: A subject
1737To: aperson@dom.ain
1738From: bperson@dom.ain
1739
1740--BOUNDARY
1741Content-Type: text/plain; charset="us-ascii"
1742MIME-Version: 1.0
1743Content-Transfer-Encoding: 7bit
1744
1745hello world
1746--BOUNDARY--''')
1747
1748
1749 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1750 eq = self.ndiffAssertEqual
1751 outer = MIMEBase('multipart', 'mixed')
1752 outer['Subject'] = 'A subject'
1753 outer['To'] = 'aperson@dom.ain'
1754 outer['From'] = 'bperson@dom.ain'
1755 outer.epilogue = ''
1756 msg = MIMEText('hello world')
1757 outer.attach(msg)
1758 outer.set_boundary('BOUNDARY')
1759 eq(outer.as_string(), '''\
1760Content-Type: multipart/mixed; boundary="BOUNDARY"
1761MIME-Version: 1.0
1762Subject: A subject
1763To: aperson@dom.ain
1764From: bperson@dom.ain
1765
1766--BOUNDARY
1767Content-Type: text/plain; charset="us-ascii"
1768MIME-Version: 1.0
1769Content-Transfer-Encoding: 7bit
1770
1771hello world
1772--BOUNDARY--
1773''')
1774
1775
1776 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1777 eq = self.ndiffAssertEqual
1778 outer = MIMEBase('multipart', 'mixed')
1779 outer['Subject'] = 'A subject'
1780 outer['To'] = 'aperson@dom.ain'
1781 outer['From'] = 'bperson@dom.ain'
1782 outer.epilogue = '\n'
1783 msg = MIMEText('hello world')
1784 outer.attach(msg)
1785 outer.set_boundary('BOUNDARY')
1786 eq(outer.as_string(), '''\
1787Content-Type: multipart/mixed; boundary="BOUNDARY"
1788MIME-Version: 1.0
1789Subject: A subject
1790To: aperson@dom.ain
1791From: bperson@dom.ain
1792
1793--BOUNDARY
1794Content-Type: text/plain; charset="us-ascii"
1795MIME-Version: 1.0
1796Content-Transfer-Encoding: 7bit
1797
1798hello world
1799--BOUNDARY--
1800
1801''')
1802
1803 def test_message_external_body(self):
1804 eq = self.assertEqual
1805 msg = self._msgobj('msg_36.txt')
1806 eq(len(msg.get_payload()), 2)
1807 msg1 = msg.get_payload(1)
1808 eq(msg1.get_content_type(), 'multipart/alternative')
1809 eq(len(msg1.get_payload()), 2)
1810 for subpart in msg1.get_payload():
1811 eq(subpart.get_content_type(), 'message/external-body')
1812 eq(len(subpart.get_payload()), 1)
1813 subsubpart = subpart.get_payload(0)
1814 eq(subsubpart.get_content_type(), 'text/plain')
1815
1816 def test_double_boundary(self):
1817 # msg_37.txt is a multipart that contains two dash-boundary's in a
1818 # row. Our interpretation of RFC 2046 calls for ignoring the second
1819 # and subsequent boundaries.
1820 msg = self._msgobj('msg_37.txt')
1821 self.assertEqual(len(msg.get_payload()), 3)
1822
1823 def test_nested_inner_contains_outer_boundary(self):
1824 eq = self.ndiffAssertEqual
1825 # msg_38.txt has an inner part that contains outer boundaries. My
1826 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1827 # these are illegal and should be interpreted as unterminated inner
1828 # parts.
1829 msg = self._msgobj('msg_38.txt')
1830 sfp = StringIO()
1831 iterators._structure(msg, sfp)
1832 eq(sfp.getvalue(), """\
1833multipart/mixed
1834 multipart/mixed
1835 multipart/alternative
1836 text/plain
1837 text/plain
1838 text/plain
1839 text/plain
1840""")
1841
1842 def test_nested_with_same_boundary(self):
1843 eq = self.ndiffAssertEqual
1844 # msg 39.txt is similarly evil in that it's got inner parts that use
1845 # the same boundary as outer parts. Again, I believe the way this is
1846 # parsed is closest to the spirit of RFC 2046
1847 msg = self._msgobj('msg_39.txt')
1848 sfp = StringIO()
1849 iterators._structure(msg, sfp)
1850 eq(sfp.getvalue(), """\
1851multipart/mixed
1852 multipart/mixed
1853 multipart/alternative
1854 application/octet-stream
1855 application/octet-stream
1856 text/plain
1857""")
1858
1859 def test_boundary_in_non_multipart(self):
1860 msg = self._msgobj('msg_40.txt')
1861 self.assertEqual(msg.as_string(), '''\
1862MIME-Version: 1.0
1863Content-Type: text/html; boundary="--961284236552522269"
1864
1865----961284236552522269
1866Content-Type: text/html;
1867Content-Transfer-Encoding: 7Bit
1868
1869<html></html>
1870
1871----961284236552522269--
1872''')
1873
1874 def test_boundary_with_leading_space(self):
1875 eq = self.assertEqual
1876 msg = email.message_from_string('''\
1877MIME-Version: 1.0
1878Content-Type: multipart/mixed; boundary=" XXXX"
1879
1880-- XXXX
1881Content-Type: text/plain
1882
1883
1884-- XXXX
1885Content-Type: text/plain
1886
1887-- XXXX--
1888''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001889 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001890 eq(msg.get_boundary(), ' XXXX')
1891 eq(len(msg.get_payload()), 2)
1892
1893 def test_boundary_without_trailing_newline(self):
1894 m = Parser().parsestr("""\
1895Content-Type: multipart/mixed; boundary="===============0012394164=="
1896MIME-Version: 1.0
1897
1898--===============0012394164==
1899Content-Type: image/file1.jpg
1900MIME-Version: 1.0
1901Content-Transfer-Encoding: base64
1902
1903YXNkZg==
1904--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001905 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001906
1907
Ezio Melottib3aedd42010-11-20 19:04:17 +00001908
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001909# Test some badly formatted messages
1910class TestNonConformant(TestEmailBase):
1911 def test_parse_missing_minor_type(self):
1912 eq = self.assertEqual
1913 msg = self._msgobj('msg_14.txt')
1914 eq(msg.get_content_type(), 'text/plain')
1915 eq(msg.get_content_maintype(), 'text')
1916 eq(msg.get_content_subtype(), 'plain')
1917
1918 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001919 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001920 msg = self._msgobj('msg_15.txt')
1921 # XXX We can probably eventually do better
1922 inner = msg.get_payload(0)
1923 unless(hasattr(inner, 'defects'))
1924 self.assertEqual(len(inner.defects), 1)
1925 unless(isinstance(inner.defects[0],
1926 errors.StartBoundaryNotFoundDefect))
1927
1928 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001929 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001930 msg = self._msgobj('msg_25.txt')
1931 unless(isinstance(msg.get_payload(), str))
1932 self.assertEqual(len(msg.defects), 2)
1933 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1934 unless(isinstance(msg.defects[1],
1935 errors.MultipartInvariantViolationDefect))
1936
1937 def test_invalid_content_type(self):
1938 eq = self.assertEqual
1939 neq = self.ndiffAssertEqual
1940 msg = Message()
1941 # RFC 2045, $5.2 says invalid yields text/plain
1942 msg['Content-Type'] = 'text'
1943 eq(msg.get_content_maintype(), 'text')
1944 eq(msg.get_content_subtype(), 'plain')
1945 eq(msg.get_content_type(), 'text/plain')
1946 # Clear the old value and try something /really/ invalid
1947 del msg['content-type']
1948 msg['Content-Type'] = 'foo'
1949 eq(msg.get_content_maintype(), 'text')
1950 eq(msg.get_content_subtype(), 'plain')
1951 eq(msg.get_content_type(), 'text/plain')
1952 # Still, make sure that the message is idempotently generated
1953 s = StringIO()
1954 g = Generator(s)
1955 g.flatten(msg)
1956 neq(s.getvalue(), 'Content-Type: foo\n\n')
1957
1958 def test_no_start_boundary(self):
1959 eq = self.ndiffAssertEqual
1960 msg = self._msgobj('msg_31.txt')
1961 eq(msg.get_payload(), """\
1962--BOUNDARY
1963Content-Type: text/plain
1964
1965message 1
1966
1967--BOUNDARY
1968Content-Type: text/plain
1969
1970message 2
1971
1972--BOUNDARY--
1973""")
1974
1975 def test_no_separating_blank_line(self):
1976 eq = self.ndiffAssertEqual
1977 msg = self._msgobj('msg_35.txt')
1978 eq(msg.as_string(), """\
1979From: aperson@dom.ain
1980To: bperson@dom.ain
1981Subject: here's something interesting
1982
1983counter to RFC 2822, there's no separating newline here
1984""")
1985
1986 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001987 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988 msg = self._msgobj('msg_41.txt')
1989 unless(hasattr(msg, 'defects'))
1990 self.assertEqual(len(msg.defects), 2)
1991 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1992 unless(isinstance(msg.defects[1],
1993 errors.MultipartInvariantViolationDefect))
1994
1995 def test_missing_start_boundary(self):
1996 outer = self._msgobj('msg_42.txt')
1997 # The message structure is:
1998 #
1999 # multipart/mixed
2000 # text/plain
2001 # message/rfc822
2002 # multipart/mixed [*]
2003 #
2004 # [*] This message is missing its start boundary
2005 bad = outer.get_payload(1).get_payload(0)
2006 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002007 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002008 errors.StartBoundaryNotFoundDefect))
2009
2010 def test_first_line_is_continuation_header(self):
2011 eq = self.assertEqual
2012 m = ' Line 1\nLine 2\nLine 3'
2013 msg = email.message_from_string(m)
2014 eq(msg.keys(), [])
2015 eq(msg.get_payload(), 'Line 2\nLine 3')
2016 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002017 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002018 errors.FirstHeaderLineIsContinuationDefect))
2019 eq(msg.defects[0].line, ' Line 1\n')
2020
2021
Ezio Melottib3aedd42010-11-20 19:04:17 +00002022
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002023# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002024class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002025 def test_rfc2047_multiline(self):
2026 eq = self.assertEqual
2027 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2028 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2029 dh = decode_header(s)
2030 eq(dh, [
2031 (b'Re:', None),
2032 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2033 (b'baz foo bar', None),
2034 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2035 header = make_header(dh)
2036 eq(str(header),
2037 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002038 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002039Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2040 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002041
2042 def test_whitespace_eater_unicode(self):
2043 eq = self.assertEqual
2044 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2045 dh = decode_header(s)
2046 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2047 (b'Pirard <pirard@dom.ain>', None)])
2048 header = str(make_header(dh))
2049 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2050
2051 def test_whitespace_eater_unicode_2(self):
2052 eq = self.assertEqual
2053 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2054 dh = decode_header(s)
2055 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2056 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2057 hu = str(make_header(dh))
2058 eq(hu, 'The quick brown fox jumped over the lazy dog')
2059
2060 def test_rfc2047_missing_whitespace(self):
2061 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2062 dh = decode_header(s)
2063 self.assertEqual(dh, [(s, None)])
2064
2065 def test_rfc2047_with_whitespace(self):
2066 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2067 dh = decode_header(s)
2068 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2069 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2070 (b'sbord', None)])
2071
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002072 def test_rfc2047_B_bad_padding(self):
2073 s = '=?iso-8859-1?B?%s?='
2074 data = [ # only test complete bytes
2075 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2076 ('dmk=', b'vi'), ('dmk', b'vi')
2077 ]
2078 for q, a in data:
2079 dh = decode_header(s % q)
2080 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081
R. David Murray31e984c2010-10-01 15:40:20 +00002082 def test_rfc2047_Q_invalid_digits(self):
2083 # issue 10004.
2084 s = '=?iso-8659-1?Q?andr=e9=zz?='
2085 self.assertEqual(decode_header(s),
2086 [(b'andr\xe9=zz', 'iso-8659-1')])
2087
Ezio Melottib3aedd42010-11-20 19:04:17 +00002088
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089# Test the MIMEMessage class
2090class TestMIMEMessage(TestEmailBase):
2091 def setUp(self):
2092 with openfile('msg_11.txt') as fp:
2093 self._text = fp.read()
2094
2095 def test_type_error(self):
2096 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2097
2098 def test_valid_argument(self):
2099 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002100 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101 subject = 'A sub-message'
2102 m = Message()
2103 m['Subject'] = subject
2104 r = MIMEMessage(m)
2105 eq(r.get_content_type(), 'message/rfc822')
2106 payload = r.get_payload()
2107 unless(isinstance(payload, list))
2108 eq(len(payload), 1)
2109 subpart = payload[0]
2110 unless(subpart is m)
2111 eq(subpart['subject'], subject)
2112
2113 def test_bad_multipart(self):
2114 eq = self.assertEqual
2115 msg1 = Message()
2116 msg1['Subject'] = 'subpart 1'
2117 msg2 = Message()
2118 msg2['Subject'] = 'subpart 2'
2119 r = MIMEMessage(msg1)
2120 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2121
2122 def test_generate(self):
2123 # First craft the message to be encapsulated
2124 m = Message()
2125 m['Subject'] = 'An enclosed message'
2126 m.set_payload('Here is the body of the message.\n')
2127 r = MIMEMessage(m)
2128 r['Subject'] = 'The enclosing message'
2129 s = StringIO()
2130 g = Generator(s)
2131 g.flatten(r)
2132 self.assertEqual(s.getvalue(), """\
2133Content-Type: message/rfc822
2134MIME-Version: 1.0
2135Subject: The enclosing message
2136
2137Subject: An enclosed message
2138
2139Here is the body of the message.
2140""")
2141
2142 def test_parse_message_rfc822(self):
2143 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002144 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145 msg = self._msgobj('msg_11.txt')
2146 eq(msg.get_content_type(), 'message/rfc822')
2147 payload = msg.get_payload()
2148 unless(isinstance(payload, list))
2149 eq(len(payload), 1)
2150 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002151 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002152 eq(submsg['subject'], 'An enclosed message')
2153 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2154
2155 def test_dsn(self):
2156 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002157 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002158 # msg 16 is a Delivery Status Notification, see RFC 1894
2159 msg = self._msgobj('msg_16.txt')
2160 eq(msg.get_content_type(), 'multipart/report')
2161 unless(msg.is_multipart())
2162 eq(len(msg.get_payload()), 3)
2163 # Subpart 1 is a text/plain, human readable section
2164 subpart = msg.get_payload(0)
2165 eq(subpart.get_content_type(), 'text/plain')
2166 eq(subpart.get_payload(), """\
2167This report relates to a message you sent with the following header fields:
2168
2169 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2170 Date: Sun, 23 Sep 2001 20:10:55 -0700
2171 From: "Ian T. Henry" <henryi@oxy.edu>
2172 To: SoCal Raves <scr@socal-raves.org>
2173 Subject: [scr] yeah for Ians!!
2174
2175Your message cannot be delivered to the following recipients:
2176
2177 Recipient address: jangel1@cougar.noc.ucla.edu
2178 Reason: recipient reached disk quota
2179
2180""")
2181 # Subpart 2 contains the machine parsable DSN information. It
2182 # consists of two blocks of headers, represented by two nested Message
2183 # objects.
2184 subpart = msg.get_payload(1)
2185 eq(subpart.get_content_type(), 'message/delivery-status')
2186 eq(len(subpart.get_payload()), 2)
2187 # message/delivery-status should treat each block as a bunch of
2188 # headers, i.e. a bunch of Message objects.
2189 dsn1 = subpart.get_payload(0)
2190 unless(isinstance(dsn1, Message))
2191 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2192 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2193 # Try a missing one <wink>
2194 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2195 dsn2 = subpart.get_payload(1)
2196 unless(isinstance(dsn2, Message))
2197 eq(dsn2['action'], 'failed')
2198 eq(dsn2.get_params(header='original-recipient'),
2199 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2200 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2201 # Subpart 3 is the original message
2202 subpart = msg.get_payload(2)
2203 eq(subpart.get_content_type(), 'message/rfc822')
2204 payload = subpart.get_payload()
2205 unless(isinstance(payload, list))
2206 eq(len(payload), 1)
2207 subsubpart = payload[0]
2208 unless(isinstance(subsubpart, Message))
2209 eq(subsubpart.get_content_type(), 'text/plain')
2210 eq(subsubpart['message-id'],
2211 '<002001c144a6$8752e060$56104586@oxy.edu>')
2212
2213 def test_epilogue(self):
2214 eq = self.ndiffAssertEqual
2215 with openfile('msg_21.txt') as fp:
2216 text = fp.read()
2217 msg = Message()
2218 msg['From'] = 'aperson@dom.ain'
2219 msg['To'] = 'bperson@dom.ain'
2220 msg['Subject'] = 'Test'
2221 msg.preamble = 'MIME message'
2222 msg.epilogue = 'End of MIME message\n'
2223 msg1 = MIMEText('One')
2224 msg2 = MIMEText('Two')
2225 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2226 msg.attach(msg1)
2227 msg.attach(msg2)
2228 sfp = StringIO()
2229 g = Generator(sfp)
2230 g.flatten(msg)
2231 eq(sfp.getvalue(), text)
2232
2233 def test_no_nl_preamble(self):
2234 eq = self.ndiffAssertEqual
2235 msg = Message()
2236 msg['From'] = 'aperson@dom.ain'
2237 msg['To'] = 'bperson@dom.ain'
2238 msg['Subject'] = 'Test'
2239 msg.preamble = 'MIME message'
2240 msg.epilogue = ''
2241 msg1 = MIMEText('One')
2242 msg2 = MIMEText('Two')
2243 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2244 msg.attach(msg1)
2245 msg.attach(msg2)
2246 eq(msg.as_string(), """\
2247From: aperson@dom.ain
2248To: bperson@dom.ain
2249Subject: Test
2250Content-Type: multipart/mixed; boundary="BOUNDARY"
2251
2252MIME message
2253--BOUNDARY
2254Content-Type: text/plain; charset="us-ascii"
2255MIME-Version: 1.0
2256Content-Transfer-Encoding: 7bit
2257
2258One
2259--BOUNDARY
2260Content-Type: text/plain; charset="us-ascii"
2261MIME-Version: 1.0
2262Content-Transfer-Encoding: 7bit
2263
2264Two
2265--BOUNDARY--
2266""")
2267
2268 def test_default_type(self):
2269 eq = self.assertEqual
2270 with openfile('msg_30.txt') as fp:
2271 msg = email.message_from_file(fp)
2272 container1 = msg.get_payload(0)
2273 eq(container1.get_default_type(), 'message/rfc822')
2274 eq(container1.get_content_type(), 'message/rfc822')
2275 container2 = msg.get_payload(1)
2276 eq(container2.get_default_type(), 'message/rfc822')
2277 eq(container2.get_content_type(), 'message/rfc822')
2278 container1a = container1.get_payload(0)
2279 eq(container1a.get_default_type(), 'text/plain')
2280 eq(container1a.get_content_type(), 'text/plain')
2281 container2a = container2.get_payload(0)
2282 eq(container2a.get_default_type(), 'text/plain')
2283 eq(container2a.get_content_type(), 'text/plain')
2284
2285 def test_default_type_with_explicit_container_type(self):
2286 eq = self.assertEqual
2287 with openfile('msg_28.txt') as fp:
2288 msg = email.message_from_file(fp)
2289 container1 = msg.get_payload(0)
2290 eq(container1.get_default_type(), 'message/rfc822')
2291 eq(container1.get_content_type(), 'message/rfc822')
2292 container2 = msg.get_payload(1)
2293 eq(container2.get_default_type(), 'message/rfc822')
2294 eq(container2.get_content_type(), 'message/rfc822')
2295 container1a = container1.get_payload(0)
2296 eq(container1a.get_default_type(), 'text/plain')
2297 eq(container1a.get_content_type(), 'text/plain')
2298 container2a = container2.get_payload(0)
2299 eq(container2a.get_default_type(), 'text/plain')
2300 eq(container2a.get_content_type(), 'text/plain')
2301
2302 def test_default_type_non_parsed(self):
2303 eq = self.assertEqual
2304 neq = self.ndiffAssertEqual
2305 # Set up container
2306 container = MIMEMultipart('digest', 'BOUNDARY')
2307 container.epilogue = ''
2308 # Set up subparts
2309 subpart1a = MIMEText('message 1\n')
2310 subpart2a = MIMEText('message 2\n')
2311 subpart1 = MIMEMessage(subpart1a)
2312 subpart2 = MIMEMessage(subpart2a)
2313 container.attach(subpart1)
2314 container.attach(subpart2)
2315 eq(subpart1.get_content_type(), 'message/rfc822')
2316 eq(subpart1.get_default_type(), 'message/rfc822')
2317 eq(subpart2.get_content_type(), 'message/rfc822')
2318 eq(subpart2.get_default_type(), 'message/rfc822')
2319 neq(container.as_string(0), '''\
2320Content-Type: multipart/digest; boundary="BOUNDARY"
2321MIME-Version: 1.0
2322
2323--BOUNDARY
2324Content-Type: message/rfc822
2325MIME-Version: 1.0
2326
2327Content-Type: text/plain; charset="us-ascii"
2328MIME-Version: 1.0
2329Content-Transfer-Encoding: 7bit
2330
2331message 1
2332
2333--BOUNDARY
2334Content-Type: message/rfc822
2335MIME-Version: 1.0
2336
2337Content-Type: text/plain; charset="us-ascii"
2338MIME-Version: 1.0
2339Content-Transfer-Encoding: 7bit
2340
2341message 2
2342
2343--BOUNDARY--
2344''')
2345 del subpart1['content-type']
2346 del subpart1['mime-version']
2347 del subpart2['content-type']
2348 del subpart2['mime-version']
2349 eq(subpart1.get_content_type(), 'message/rfc822')
2350 eq(subpart1.get_default_type(), 'message/rfc822')
2351 eq(subpart2.get_content_type(), 'message/rfc822')
2352 eq(subpart2.get_default_type(), 'message/rfc822')
2353 neq(container.as_string(0), '''\
2354Content-Type: multipart/digest; boundary="BOUNDARY"
2355MIME-Version: 1.0
2356
2357--BOUNDARY
2358
2359Content-Type: text/plain; charset="us-ascii"
2360MIME-Version: 1.0
2361Content-Transfer-Encoding: 7bit
2362
2363message 1
2364
2365--BOUNDARY
2366
2367Content-Type: text/plain; charset="us-ascii"
2368MIME-Version: 1.0
2369Content-Transfer-Encoding: 7bit
2370
2371message 2
2372
2373--BOUNDARY--
2374''')
2375
2376 def test_mime_attachments_in_constructor(self):
2377 eq = self.assertEqual
2378 text1 = MIMEText('')
2379 text2 = MIMEText('')
2380 msg = MIMEMultipart(_subparts=(text1, text2))
2381 eq(len(msg.get_payload()), 2)
2382 eq(msg.get_payload(0), text1)
2383 eq(msg.get_payload(1), text2)
2384
Christian Heimes587c2bf2008-01-19 16:21:02 +00002385 def test_default_multipart_constructor(self):
2386 msg = MIMEMultipart()
2387 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002388
Ezio Melottib3aedd42010-11-20 19:04:17 +00002389
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002390# A general test of parser->model->generator idempotency. IOW, read a message
2391# in, parse it into a message object tree, then without touching the tree,
2392# regenerate the plain text. The original text and the transformed text
2393# should be identical. Note: that we ignore the Unix-From since that may
2394# contain a changed date.
2395class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002396
2397 linesep = '\n'
2398
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002399 def _msgobj(self, filename):
2400 with openfile(filename) as fp:
2401 data = fp.read()
2402 msg = email.message_from_string(data)
2403 return msg, data
2404
R. David Murray719a4492010-11-21 16:53:48 +00002405 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002406 eq = self.ndiffAssertEqual
2407 s = StringIO()
2408 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002409 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002410 eq(text, s.getvalue())
2411
2412 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002413 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 msg, text = self._msgobj('msg_01.txt')
2415 eq(msg.get_content_type(), 'text/plain')
2416 eq(msg.get_content_maintype(), 'text')
2417 eq(msg.get_content_subtype(), 'plain')
2418 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2419 eq(msg.get_param('charset'), 'us-ascii')
2420 eq(msg.preamble, None)
2421 eq(msg.epilogue, None)
2422 self._idempotent(msg, text)
2423
2424 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002425 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002426 msg, text = self._msgobj('msg_03.txt')
2427 eq(msg.get_content_type(), 'text/plain')
2428 eq(msg.get_params(), None)
2429 eq(msg.get_param('charset'), None)
2430 self._idempotent(msg, text)
2431
2432 def test_simple_multipart(self):
2433 msg, text = self._msgobj('msg_04.txt')
2434 self._idempotent(msg, text)
2435
2436 def test_MIME_digest(self):
2437 msg, text = self._msgobj('msg_02.txt')
2438 self._idempotent(msg, text)
2439
2440 def test_long_header(self):
2441 msg, text = self._msgobj('msg_27.txt')
2442 self._idempotent(msg, text)
2443
2444 def test_MIME_digest_with_part_headers(self):
2445 msg, text = self._msgobj('msg_28.txt')
2446 self._idempotent(msg, text)
2447
2448 def test_mixed_with_image(self):
2449 msg, text = self._msgobj('msg_06.txt')
2450 self._idempotent(msg, text)
2451
2452 def test_multipart_report(self):
2453 msg, text = self._msgobj('msg_05.txt')
2454 self._idempotent(msg, text)
2455
2456 def test_dsn(self):
2457 msg, text = self._msgobj('msg_16.txt')
2458 self._idempotent(msg, text)
2459
2460 def test_preamble_epilogue(self):
2461 msg, text = self._msgobj('msg_21.txt')
2462 self._idempotent(msg, text)
2463
2464 def test_multipart_one_part(self):
2465 msg, text = self._msgobj('msg_23.txt')
2466 self._idempotent(msg, text)
2467
2468 def test_multipart_no_parts(self):
2469 msg, text = self._msgobj('msg_24.txt')
2470 self._idempotent(msg, text)
2471
2472 def test_no_start_boundary(self):
2473 msg, text = self._msgobj('msg_31.txt')
2474 self._idempotent(msg, text)
2475
2476 def test_rfc2231_charset(self):
2477 msg, text = self._msgobj('msg_32.txt')
2478 self._idempotent(msg, text)
2479
2480 def test_more_rfc2231_parameters(self):
2481 msg, text = self._msgobj('msg_33.txt')
2482 self._idempotent(msg, text)
2483
2484 def test_text_plain_in_a_multipart_digest(self):
2485 msg, text = self._msgobj('msg_34.txt')
2486 self._idempotent(msg, text)
2487
2488 def test_nested_multipart_mixeds(self):
2489 msg, text = self._msgobj('msg_12a.txt')
2490 self._idempotent(msg, text)
2491
2492 def test_message_external_body_idempotent(self):
2493 msg, text = self._msgobj('msg_36.txt')
2494 self._idempotent(msg, text)
2495
R. David Murray719a4492010-11-21 16:53:48 +00002496 def test_message_delivery_status(self):
2497 msg, text = self._msgobj('msg_43.txt')
2498 self._idempotent(msg, text, unixfrom=True)
2499
R. David Murray96fd54e2010-10-08 15:55:28 +00002500 def test_message_signed_idempotent(self):
2501 msg, text = self._msgobj('msg_45.txt')
2502 self._idempotent(msg, text)
2503
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002504 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002505 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002506 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002507 # Get a message object and reset the seek pointer for other tests
2508 msg, text = self._msgobj('msg_05.txt')
2509 eq(msg.get_content_type(), 'multipart/report')
2510 # Test the Content-Type: parameters
2511 params = {}
2512 for pk, pv in msg.get_params():
2513 params[pk] = pv
2514 eq(params['report-type'], 'delivery-status')
2515 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002516 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2517 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518 eq(len(msg.get_payload()), 3)
2519 # Make sure the subparts are what we expect
2520 msg1 = msg.get_payload(0)
2521 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002522 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523 msg2 = msg.get_payload(1)
2524 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002525 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 msg3 = msg.get_payload(2)
2527 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002528 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002529 payload = msg3.get_payload()
2530 unless(isinstance(payload, list))
2531 eq(len(payload), 1)
2532 msg4 = payload[0]
2533 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002534 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002535
2536 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002537 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002538 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002539 msg, text = self._msgobj('msg_06.txt')
2540 # Check some of the outer headers
2541 eq(msg.get_content_type(), 'message/rfc822')
2542 # Make sure the payload is a list of exactly one sub-Message, and that
2543 # that submessage has a type of text/plain
2544 payload = msg.get_payload()
2545 unless(isinstance(payload, list))
2546 eq(len(payload), 1)
2547 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002548 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002549 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002550 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002551 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002552
2553
Ezio Melottib3aedd42010-11-20 19:04:17 +00002554
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555# Test various other bits of the package's functionality
2556class TestMiscellaneous(TestEmailBase):
2557 def test_message_from_string(self):
2558 with openfile('msg_01.txt') as fp:
2559 text = fp.read()
2560 msg = email.message_from_string(text)
2561 s = StringIO()
2562 # Don't wrap/continue long headers since we're trying to test
2563 # idempotency.
2564 g = Generator(s, maxheaderlen=0)
2565 g.flatten(msg)
2566 self.assertEqual(text, s.getvalue())
2567
2568 def test_message_from_file(self):
2569 with openfile('msg_01.txt') as fp:
2570 text = fp.read()
2571 fp.seek(0)
2572 msg = email.message_from_file(fp)
2573 s = StringIO()
2574 # Don't wrap/continue long headers since we're trying to test
2575 # idempotency.
2576 g = Generator(s, maxheaderlen=0)
2577 g.flatten(msg)
2578 self.assertEqual(text, s.getvalue())
2579
2580 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002581 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002582 with openfile('msg_01.txt') as fp:
2583 text = fp.read()
2584
2585 # Create a subclass
2586 class MyMessage(Message):
2587 pass
2588
2589 msg = email.message_from_string(text, MyMessage)
2590 unless(isinstance(msg, MyMessage))
2591 # Try something more complicated
2592 with openfile('msg_02.txt') as fp:
2593 text = fp.read()
2594 msg = email.message_from_string(text, MyMessage)
2595 for subpart in msg.walk():
2596 unless(isinstance(subpart, MyMessage))
2597
2598 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002599 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002600 # Create a subclass
2601 class MyMessage(Message):
2602 pass
2603
2604 with openfile('msg_01.txt') as fp:
2605 msg = email.message_from_file(fp, MyMessage)
2606 unless(isinstance(msg, MyMessage))
2607 # Try something more complicated
2608 with openfile('msg_02.txt') as fp:
2609 msg = email.message_from_file(fp, MyMessage)
2610 for subpart in msg.walk():
2611 unless(isinstance(subpart, MyMessage))
2612
2613 def test__all__(self):
2614 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002615 self.assertEqual(sorted(module.__all__), [
2616 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2617 'generator', 'header', 'iterators', 'message',
2618 'message_from_binary_file', 'message_from_bytes',
2619 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002620 'quoprimime', 'utils',
2621 ])
2622
2623 def test_formatdate(self):
2624 now = time.time()
2625 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2626 time.gmtime(now)[:6])
2627
2628 def test_formatdate_localtime(self):
2629 now = time.time()
2630 self.assertEqual(
2631 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2632 time.localtime(now)[:6])
2633
2634 def test_formatdate_usegmt(self):
2635 now = time.time()
2636 self.assertEqual(
2637 utils.formatdate(now, localtime=False),
2638 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2639 self.assertEqual(
2640 utils.formatdate(now, localtime=False, usegmt=True),
2641 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2642
2643 def test_parsedate_none(self):
2644 self.assertEqual(utils.parsedate(''), None)
2645
2646 def test_parsedate_compact(self):
2647 # The FWS after the comma is optional
2648 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2649 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2650
2651 def test_parsedate_no_dayofweek(self):
2652 eq = self.assertEqual
2653 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2654 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2655
2656 def test_parsedate_compact_no_dayofweek(self):
2657 eq = self.assertEqual
2658 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2659 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2660
R. David Murray4a62e892010-12-23 20:35:46 +00002661 def test_parsedate_no_space_before_positive_offset(self):
2662 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2663 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2664
2665 def test_parsedate_no_space_before_negative_offset(self):
2666 # Issue 1155362: we already handled '+' for this case.
2667 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2668 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2669
2670
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002671 def test_parsedate_acceptable_to_time_functions(self):
2672 eq = self.assertEqual
2673 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2674 t = int(time.mktime(timetup))
2675 eq(time.localtime(t)[:6], timetup[:6])
2676 eq(int(time.strftime('%Y', timetup)), 2003)
2677 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2678 t = int(time.mktime(timetup[:9]))
2679 eq(time.localtime(t)[:6], timetup[:6])
2680 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2681
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002682 def test_mktime_tz(self):
2683 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2684 -1, -1, -1, 0)), 0)
2685 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2686 -1, -1, -1, 1234)), -1234)
2687
R. David Murray219d1c82010-08-25 00:45:55 +00002688 def test_parsedate_y2k(self):
2689 """Test for parsing a date with a two-digit year.
2690
2691 Parsing a date with a two-digit year should return the correct
2692 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2693 obsoletes RFC822) requires four-digit years.
2694
2695 """
2696 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2697 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2698 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2699 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002701 def test_parseaddr_empty(self):
2702 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2703 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2704
2705 def test_noquote_dump(self):
2706 self.assertEqual(
2707 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2708 'A Silly Person <person@dom.ain>')
2709
2710 def test_escape_dump(self):
2711 self.assertEqual(
2712 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2713 r'"A \(Very\) Silly Person" <person@dom.ain>')
2714 a = r'A \(Special\) Person'
2715 b = 'person@dom.ain'
2716 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2717
2718 def test_escape_backslashes(self):
2719 self.assertEqual(
2720 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2721 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2722 a = r'Arthur \Backslash\ Foobar'
2723 b = 'person@dom.ain'
2724 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2725
2726 def test_name_with_dot(self):
2727 x = 'John X. Doe <jxd@example.com>'
2728 y = '"John X. Doe" <jxd@example.com>'
2729 a, b = ('John X. Doe', 'jxd@example.com')
2730 self.assertEqual(utils.parseaddr(x), (a, b))
2731 self.assertEqual(utils.parseaddr(y), (a, b))
2732 # formataddr() quotes the name if there's a dot in it
2733 self.assertEqual(utils.formataddr((a, b)), y)
2734
R. David Murray5397e862010-10-02 15:58:26 +00002735 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2736 # issue 10005. Note that in the third test the second pair of
2737 # backslashes is not actually a quoted pair because it is not inside a
2738 # comment or quoted string: the address being parsed has a quoted
2739 # string containing a quoted backslash, followed by 'example' and two
2740 # backslashes, followed by another quoted string containing a space and
2741 # the word 'example'. parseaddr copies those two backslashes
2742 # literally. Per rfc5322 this is not technically correct since a \ may
2743 # not appear in an address outside of a quoted string. It is probably
2744 # a sensible Postel interpretation, though.
2745 eq = self.assertEqual
2746 eq(utils.parseaddr('""example" example"@example.com'),
2747 ('', '""example" example"@example.com'))
2748 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2749 ('', '"\\"example\\" example"@example.com'))
2750 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2751 ('', '"\\\\"example\\\\" example"@example.com'))
2752
R. David Murray63563cd2010-12-18 18:25:38 +00002753 def test_parseaddr_preserves_spaces_in_local_part(self):
2754 # issue 9286. A normal RFC5322 local part should not contain any
2755 # folding white space, but legacy local parts can (they are a sequence
2756 # of atoms, not dotatoms). On the other hand we strip whitespace from
2757 # before the @ and around dots, on the assumption that the whitespace
2758 # around the punctuation is a mistake in what would otherwise be
2759 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2760 self.assertEqual(('', "merwok wok@xample.com"),
2761 utils.parseaddr("merwok wok@xample.com"))
2762 self.assertEqual(('', "merwok wok@xample.com"),
2763 utils.parseaddr("merwok wok@xample.com"))
2764 self.assertEqual(('', "merwok wok@xample.com"),
2765 utils.parseaddr(" merwok wok @xample.com"))
2766 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2767 utils.parseaddr('merwok"wok" wok@xample.com'))
2768 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2769 utils.parseaddr('merwok. wok . wok@xample.com'))
2770
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002771 def test_multiline_from_comment(self):
2772 x = """\
2773Foo
2774\tBar <foo@example.com>"""
2775 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2776
2777 def test_quote_dump(self):
2778 self.assertEqual(
2779 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2780 r'"A Silly; Person" <person@dom.ain>')
2781
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782 def test_charset_richcomparisons(self):
2783 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002784 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002785 cset1 = Charset()
2786 cset2 = Charset()
2787 eq(cset1, 'us-ascii')
2788 eq(cset1, 'US-ASCII')
2789 eq(cset1, 'Us-AsCiI')
2790 eq('us-ascii', cset1)
2791 eq('US-ASCII', cset1)
2792 eq('Us-AsCiI', cset1)
2793 ne(cset1, 'usascii')
2794 ne(cset1, 'USASCII')
2795 ne(cset1, 'UsAsCiI')
2796 ne('usascii', cset1)
2797 ne('USASCII', cset1)
2798 ne('UsAsCiI', cset1)
2799 eq(cset1, cset2)
2800 eq(cset2, cset1)
2801
2802 def test_getaddresses(self):
2803 eq = self.assertEqual
2804 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2805 'Bud Person <bperson@dom.ain>']),
2806 [('Al Person', 'aperson@dom.ain'),
2807 ('Bud Person', 'bperson@dom.ain')])
2808
2809 def test_getaddresses_nasty(self):
2810 eq = self.assertEqual
2811 eq(utils.getaddresses(['foo: ;']), [('', '')])
2812 eq(utils.getaddresses(
2813 ['[]*-- =~$']),
2814 [('', ''), ('', ''), ('', '*--')])
2815 eq(utils.getaddresses(
2816 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2817 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2818
2819 def test_getaddresses_embedded_comment(self):
2820 """Test proper handling of a nested comment"""
2821 eq = self.assertEqual
2822 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2823 eq(addrs[0][1], 'foo@bar.com')
2824
2825 def test_utils_quote_unquote(self):
2826 eq = self.assertEqual
2827 msg = Message()
2828 msg.add_header('content-disposition', 'attachment',
2829 filename='foo\\wacky"name')
2830 eq(msg.get_filename(), 'foo\\wacky"name')
2831
2832 def test_get_body_encoding_with_bogus_charset(self):
2833 charset = Charset('not a charset')
2834 self.assertEqual(charset.get_body_encoding(), 'base64')
2835
2836 def test_get_body_encoding_with_uppercase_charset(self):
2837 eq = self.assertEqual
2838 msg = Message()
2839 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2840 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2841 charsets = msg.get_charsets()
2842 eq(len(charsets), 1)
2843 eq(charsets[0], 'utf-8')
2844 charset = Charset(charsets[0])
2845 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002846 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002847 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2848 eq(msg.get_payload(decode=True), b'hello world')
2849 eq(msg['content-transfer-encoding'], 'base64')
2850 # Try another one
2851 msg = Message()
2852 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2853 charsets = msg.get_charsets()
2854 eq(len(charsets), 1)
2855 eq(charsets[0], 'us-ascii')
2856 charset = Charset(charsets[0])
2857 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2858 msg.set_payload('hello world', charset=charset)
2859 eq(msg.get_payload(), 'hello world')
2860 eq(msg['content-transfer-encoding'], '7bit')
2861
2862 def test_charsets_case_insensitive(self):
2863 lc = Charset('us-ascii')
2864 uc = Charset('US-ASCII')
2865 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2866
2867 def test_partial_falls_inside_message_delivery_status(self):
2868 eq = self.ndiffAssertEqual
2869 # The Parser interface provides chunks of data to FeedParser in 8192
2870 # byte gulps. SF bug #1076485 found one of those chunks inside
2871 # message/delivery-status header block, which triggered an
2872 # unreadline() of NeedMoreData.
2873 msg = self._msgobj('msg_43.txt')
2874 sfp = StringIO()
2875 iterators._structure(msg, sfp)
2876 eq(sfp.getvalue(), """\
2877multipart/report
2878 text/plain
2879 message/delivery-status
2880 text/plain
2881 text/plain
2882 text/plain
2883 text/plain
2884 text/plain
2885 text/plain
2886 text/plain
2887 text/plain
2888 text/plain
2889 text/plain
2890 text/plain
2891 text/plain
2892 text/plain
2893 text/plain
2894 text/plain
2895 text/plain
2896 text/plain
2897 text/plain
2898 text/plain
2899 text/plain
2900 text/plain
2901 text/plain
2902 text/plain
2903 text/plain
2904 text/plain
2905 text/plain
2906 text/rfc822-headers
2907""")
2908
R. David Murraya0b44b52010-12-02 21:47:19 +00002909 def test_make_msgid_domain(self):
2910 self.assertEqual(
2911 email.utils.make_msgid(domain='testdomain-string')[-19:],
2912 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002913
R David Murraye67c6c52013-03-07 16:38:03 -05002914 def test_Generator_linend(self):
2915 # Issue 14645.
2916 with openfile('msg_26.txt', newline='\n') as f:
2917 msgtxt = f.read()
2918 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2919 msg = email.message_from_string(msgtxt)
2920 s = StringIO()
2921 g = email.generator.Generator(s)
2922 g.flatten(msg)
2923 self.assertEqual(s.getvalue(), msgtxt_nl)
2924
2925 def test_BytesGenerator_linend(self):
2926 # Issue 14645.
2927 with openfile('msg_26.txt', newline='\n') as f:
2928 msgtxt = f.read()
2929 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2930 msg = email.message_from_string(msgtxt_nl)
2931 s = BytesIO()
2932 g = email.generator.BytesGenerator(s)
2933 g.flatten(msg, linesep='\r\n')
2934 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
2935
2936 def test_BytesGenerator_linend_with_non_ascii(self):
2937 # Issue 14645.
2938 with openfile('msg_26.txt', 'rb') as f:
2939 msgtxt = f.read()
2940 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
2941 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
2942 msg = email.message_from_bytes(msgtxt_nl)
2943 s = BytesIO()
2944 g = email.generator.BytesGenerator(s)
2945 g.flatten(msg, linesep='\r\n')
2946 self.assertEqual(s.getvalue(), msgtxt)
2947
Ezio Melottib3aedd42010-11-20 19:04:17 +00002948
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002949# Test the iterator/generators
2950class TestIterators(TestEmailBase):
2951 def test_body_line_iterator(self):
2952 eq = self.assertEqual
2953 neq = self.ndiffAssertEqual
2954 # First a simple non-multipart message
2955 msg = self._msgobj('msg_01.txt')
2956 it = iterators.body_line_iterator(msg)
2957 lines = list(it)
2958 eq(len(lines), 6)
2959 neq(EMPTYSTRING.join(lines), msg.get_payload())
2960 # Now a more complicated multipart
2961 msg = self._msgobj('msg_02.txt')
2962 it = iterators.body_line_iterator(msg)
2963 lines = list(it)
2964 eq(len(lines), 43)
2965 with openfile('msg_19.txt') as fp:
2966 neq(EMPTYSTRING.join(lines), fp.read())
2967
2968 def test_typed_subpart_iterator(self):
2969 eq = self.assertEqual
2970 msg = self._msgobj('msg_04.txt')
2971 it = iterators.typed_subpart_iterator(msg, 'text')
2972 lines = []
2973 subparts = 0
2974 for subpart in it:
2975 subparts += 1
2976 lines.append(subpart.get_payload())
2977 eq(subparts, 2)
2978 eq(EMPTYSTRING.join(lines), """\
2979a simple kind of mirror
2980to reflect upon our own
2981a simple kind of mirror
2982to reflect upon our own
2983""")
2984
2985 def test_typed_subpart_iterator_default_type(self):
2986 eq = self.assertEqual
2987 msg = self._msgobj('msg_03.txt')
2988 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2989 lines = []
2990 subparts = 0
2991 for subpart in it:
2992 subparts += 1
2993 lines.append(subpart.get_payload())
2994 eq(subparts, 1)
2995 eq(EMPTYSTRING.join(lines), """\
2996
2997Hi,
2998
2999Do you like this message?
3000
3001-Me
3002""")
3003
R. David Murray45bf773f2010-07-17 01:19:57 +00003004 def test_pushCR_LF(self):
3005 '''FeedParser BufferedSubFile.push() assumed it received complete
3006 line endings. A CR ending one push() followed by a LF starting
3007 the next push() added an empty line.
3008 '''
3009 imt = [
3010 ("a\r \n", 2),
3011 ("b", 0),
3012 ("c\n", 1),
3013 ("", 0),
3014 ("d\r\n", 1),
3015 ("e\r", 0),
3016 ("\nf", 1),
3017 ("\r\n", 1),
3018 ]
3019 from email.feedparser import BufferedSubFile, NeedMoreData
3020 bsf = BufferedSubFile()
3021 om = []
3022 nt = 0
3023 for il, n in imt:
3024 bsf.push(il)
3025 nt += n
3026 n1 = 0
3027 while True:
3028 ol = bsf.readline()
3029 if ol == NeedMoreData:
3030 break
3031 om.append(ol)
3032 n1 += 1
3033 self.assertTrue(n == n1)
3034 self.assertTrue(len(om) == nt)
3035 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3036
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003037
Ezio Melottib3aedd42010-11-20 19:04:17 +00003038
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039class TestParsers(TestEmailBase):
3040 def test_header_parser(self):
3041 eq = self.assertEqual
3042 # Parse only the headers of a complex multipart MIME document
3043 with openfile('msg_02.txt') as fp:
3044 msg = HeaderParser().parse(fp)
3045 eq(msg['from'], 'ppp-request@zzz.org')
3046 eq(msg['to'], 'ppp@zzz.org')
3047 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003048 self.assertFalse(msg.is_multipart())
3049 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003050
3051 def test_whitespace_continuation(self):
3052 eq = self.assertEqual
3053 # This message contains a line after the Subject: header that has only
3054 # whitespace, but it is not empty!
3055 msg = email.message_from_string("""\
3056From: aperson@dom.ain
3057To: bperson@dom.ain
3058Subject: the next line has a space on it
3059\x20
3060Date: Mon, 8 Apr 2002 15:09:19 -0400
3061Message-ID: spam
3062
3063Here's the message body
3064""")
3065 eq(msg['subject'], 'the next line has a space on it\n ')
3066 eq(msg['message-id'], 'spam')
3067 eq(msg.get_payload(), "Here's the message body\n")
3068
3069 def test_whitespace_continuation_last_header(self):
3070 eq = self.assertEqual
3071 # Like the previous test, but the subject line is the last
3072 # header.
3073 msg = email.message_from_string("""\
3074From: aperson@dom.ain
3075To: bperson@dom.ain
3076Date: Mon, 8 Apr 2002 15:09:19 -0400
3077Message-ID: spam
3078Subject: the next line has a space on it
3079\x20
3080
3081Here's the message body
3082""")
3083 eq(msg['subject'], 'the next line has a space on it\n ')
3084 eq(msg['message-id'], 'spam')
3085 eq(msg.get_payload(), "Here's the message body\n")
3086
3087 def test_crlf_separation(self):
3088 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003089 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003090 msg = Parser().parse(fp)
3091 eq(len(msg.get_payload()), 2)
3092 part1 = msg.get_payload(0)
3093 eq(part1.get_content_type(), 'text/plain')
3094 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3095 part2 = msg.get_payload(1)
3096 eq(part2.get_content_type(), 'application/riscos')
3097
R. David Murray8451c4b2010-10-23 22:19:56 +00003098 def test_crlf_flatten(self):
3099 # Using newline='\n' preserves the crlfs in this input file.
3100 with openfile('msg_26.txt', newline='\n') as fp:
3101 text = fp.read()
3102 msg = email.message_from_string(text)
3103 s = StringIO()
3104 g = Generator(s)
3105 g.flatten(msg, linesep='\r\n')
3106 self.assertEqual(s.getvalue(), text)
3107
3108 maxDiff = None
3109
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003110 def test_multipart_digest_with_extra_mime_headers(self):
3111 eq = self.assertEqual
3112 neq = self.ndiffAssertEqual
3113 with openfile('msg_28.txt') as fp:
3114 msg = email.message_from_file(fp)
3115 # Structure is:
3116 # multipart/digest
3117 # message/rfc822
3118 # text/plain
3119 # message/rfc822
3120 # text/plain
3121 eq(msg.is_multipart(), 1)
3122 eq(len(msg.get_payload()), 2)
3123 part1 = msg.get_payload(0)
3124 eq(part1.get_content_type(), 'message/rfc822')
3125 eq(part1.is_multipart(), 1)
3126 eq(len(part1.get_payload()), 1)
3127 part1a = part1.get_payload(0)
3128 eq(part1a.is_multipart(), 0)
3129 eq(part1a.get_content_type(), 'text/plain')
3130 neq(part1a.get_payload(), 'message 1\n')
3131 # next message/rfc822
3132 part2 = msg.get_payload(1)
3133 eq(part2.get_content_type(), 'message/rfc822')
3134 eq(part2.is_multipart(), 1)
3135 eq(len(part2.get_payload()), 1)
3136 part2a = part2.get_payload(0)
3137 eq(part2a.is_multipart(), 0)
3138 eq(part2a.get_content_type(), 'text/plain')
3139 neq(part2a.get_payload(), 'message 2\n')
3140
3141 def test_three_lines(self):
3142 # A bug report by Andrew McNamara
3143 lines = ['From: Andrew Person <aperson@dom.ain',
3144 'Subject: Test',
3145 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3146 msg = email.message_from_string(NL.join(lines))
3147 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3148
3149 def test_strip_line_feed_and_carriage_return_in_headers(self):
3150 eq = self.assertEqual
3151 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3152 value1 = 'text'
3153 value2 = 'more text'
3154 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3155 value1, value2)
3156 msg = email.message_from_string(m)
3157 eq(msg.get('Header'), value1)
3158 eq(msg.get('Next-Header'), value2)
3159
3160 def test_rfc2822_header_syntax(self):
3161 eq = self.assertEqual
3162 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3163 msg = email.message_from_string(m)
3164 eq(len(msg), 3)
3165 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3166 eq(msg.get_payload(), 'body')
3167
3168 def test_rfc2822_space_not_allowed_in_header(self):
3169 eq = self.assertEqual
3170 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3171 msg = email.message_from_string(m)
3172 eq(len(msg.keys()), 0)
3173
3174 def test_rfc2822_one_character_header(self):
3175 eq = self.assertEqual
3176 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3177 msg = email.message_from_string(m)
3178 headers = msg.keys()
3179 headers.sort()
3180 eq(headers, ['A', 'B', 'CC'])
3181 eq(msg.get_payload(), 'body')
3182
R. David Murray45e0e142010-06-16 02:19:40 +00003183 def test_CRLFLF_at_end_of_part(self):
3184 # issue 5610: feedparser should not eat two chars from body part ending
3185 # with "\r\n\n".
3186 m = (
3187 "From: foo@bar.com\n"
3188 "To: baz\n"
3189 "Mime-Version: 1.0\n"
3190 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3191 "\n"
3192 "--BOUNDARY\n"
3193 "Content-Type: text/plain\n"
3194 "\n"
3195 "body ending with CRLF newline\r\n"
3196 "\n"
3197 "--BOUNDARY--\n"
3198 )
3199 msg = email.message_from_string(m)
3200 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003201
Ezio Melottib3aedd42010-11-20 19:04:17 +00003202
R. David Murray96fd54e2010-10-08 15:55:28 +00003203class Test8BitBytesHandling(unittest.TestCase):
3204 # In Python3 all input is string, but that doesn't work if the actual input
3205 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3206 # decode byte streams using the surrogateescape error handler, and
3207 # reconvert to binary at appropriate places if we detect surrogates. This
3208 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3209 # but it does allow us to parse and preserve them, and to decode body
3210 # parts that use an 8bit CTE.
3211
3212 bodytest_msg = textwrap.dedent("""\
3213 From: foo@bar.com
3214 To: baz
3215 Mime-Version: 1.0
3216 Content-Type: text/plain; charset={charset}
3217 Content-Transfer-Encoding: {cte}
3218
3219 {bodyline}
3220 """)
3221
3222 def test_known_8bit_CTE(self):
3223 m = self.bodytest_msg.format(charset='utf-8',
3224 cte='8bit',
3225 bodyline='pöstal').encode('utf-8')
3226 msg = email.message_from_bytes(m)
3227 self.assertEqual(msg.get_payload(), "pöstal\n")
3228 self.assertEqual(msg.get_payload(decode=True),
3229 "pöstal\n".encode('utf-8'))
3230
3231 def test_unknown_8bit_CTE(self):
3232 m = self.bodytest_msg.format(charset='notavalidcharset',
3233 cte='8bit',
3234 bodyline='pöstal').encode('utf-8')
3235 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003236 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003237 self.assertEqual(msg.get_payload(decode=True),
3238 "pöstal\n".encode('utf-8'))
3239
3240 def test_8bit_in_quopri_body(self):
3241 # This is non-RFC compliant data...without 'decode' the library code
3242 # decodes the body using the charset from the headers, and because the
3243 # source byte really is utf-8 this works. This is likely to fail
3244 # against real dirty data (ie: produce mojibake), but the data is
3245 # invalid anyway so it is as good a guess as any. But this means that
3246 # this test just confirms the current behavior; that behavior is not
3247 # necessarily the best possible behavior. With 'decode' it is
3248 # returning the raw bytes, so that test should be of correct behavior,
3249 # or at least produce the same result that email4 did.
3250 m = self.bodytest_msg.format(charset='utf-8',
3251 cte='quoted-printable',
3252 bodyline='p=C3=B6stál').encode('utf-8')
3253 msg = email.message_from_bytes(m)
3254 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3255 self.assertEqual(msg.get_payload(decode=True),
3256 'pöstál\n'.encode('utf-8'))
3257
3258 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3259 # This is similar to the previous test, but proves that if the 8bit
3260 # byte is undecodeable in the specified charset, it gets replaced
3261 # by the unicode 'unknown' character. Again, this may or may not
3262 # be the ideal behavior. Note that if decode=False none of the
3263 # decoders will get involved, so this is the only test we need
3264 # for this behavior.
3265 m = self.bodytest_msg.format(charset='ascii',
3266 cte='quoted-printable',
3267 bodyline='p=C3=B6stál').encode('utf-8')
3268 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003269 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003270 self.assertEqual(msg.get_payload(decode=True),
3271 'pöstál\n'.encode('utf-8'))
3272
3273 def test_8bit_in_base64_body(self):
3274 # Sticking an 8bit byte in a base64 block makes it undecodable by
3275 # normal means, so the block is returned undecoded, but as bytes.
3276 m = self.bodytest_msg.format(charset='utf-8',
3277 cte='base64',
3278 bodyline='cMO2c3RhbAá=').encode('utf-8')
3279 msg = email.message_from_bytes(m)
3280 self.assertEqual(msg.get_payload(decode=True),
3281 'cMO2c3RhbAá=\n'.encode('utf-8'))
3282
3283 def test_8bit_in_uuencode_body(self):
3284 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3285 # normal means, so the block is returned undecoded, but as bytes.
3286 m = self.bodytest_msg.format(charset='utf-8',
3287 cte='uuencode',
3288 bodyline='<,.V<W1A; á ').encode('utf-8')
3289 msg = email.message_from_bytes(m)
3290 self.assertEqual(msg.get_payload(decode=True),
3291 '<,.V<W1A; á \n'.encode('utf-8'))
3292
3293
R. David Murray92532142011-01-07 23:25:30 +00003294 headertest_headers = (
3295 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3296 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3297 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3298 '\tJean de Baddie',
3299 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3300 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3301 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3302 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3303 )
3304 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3305 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003306
3307 def test_get_8bit_header(self):
3308 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003309 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3310 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003311
3312 def test_print_8bit_headers(self):
3313 msg = email.message_from_bytes(self.headertest_msg)
3314 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003315 textwrap.dedent("""\
3316 From: {}
3317 To: {}
3318 Subject: {}
3319 From: {}
3320
3321 Yes, they are flying.
3322 """).format(*[expected[1] for (_, expected) in
3323 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003324
3325 def test_values_with_8bit_headers(self):
3326 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003327 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003328 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003329 'b\uFFFD\uFFFDz',
3330 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3331 'coll\uFFFD\uFFFDgue, le pouf '
3332 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003333 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003334 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003335
3336 def test_items_with_8bit_headers(self):
3337 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003338 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003339 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003340 ('To', 'b\uFFFD\uFFFDz'),
3341 ('Subject', 'Maintenant je vous '
3342 'pr\uFFFD\uFFFDsente '
3343 'mon coll\uFFFD\uFFFDgue, le pouf '
3344 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3345 '\tJean de Baddie'),
3346 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003347
3348 def test_get_all_with_8bit_headers(self):
3349 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003350 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003351 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003352 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003353
R David Murraya2150232011-03-16 21:11:23 -04003354 def test_get_content_type_with_8bit(self):
3355 msg = email.message_from_bytes(textwrap.dedent("""\
3356 Content-Type: text/pl\xA7in; charset=utf-8
3357 """).encode('latin-1'))
3358 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3359 self.assertEqual(msg.get_content_maintype(), "text")
3360 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3361
3362 def test_get_params_with_8bit(self):
3363 msg = email.message_from_bytes(
3364 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3365 self.assertEqual(msg.get_params(header='x-header'),
3366 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3367 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3368 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3369 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3370
3371 def test_get_rfc2231_params_with_8bit(self):
3372 msg = email.message_from_bytes(textwrap.dedent("""\
3373 Content-Type: text/plain; charset=us-ascii;
3374 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3375 ).encode('latin-1'))
3376 self.assertEqual(msg.get_param('title'),
3377 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3378
3379 def test_set_rfc2231_params_with_8bit(self):
3380 msg = email.message_from_bytes(textwrap.dedent("""\
3381 Content-Type: text/plain; charset=us-ascii;
3382 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3383 ).encode('latin-1'))
3384 msg.set_param('title', 'test')
3385 self.assertEqual(msg.get_param('title'), 'test')
3386
3387 def test_del_rfc2231_params_with_8bit(self):
3388 msg = email.message_from_bytes(textwrap.dedent("""\
3389 Content-Type: text/plain; charset=us-ascii;
3390 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3391 ).encode('latin-1'))
3392 msg.del_param('title')
3393 self.assertEqual(msg.get_param('title'), None)
3394 self.assertEqual(msg.get_content_maintype(), 'text')
3395
3396 def test_get_payload_with_8bit_cte_header(self):
3397 msg = email.message_from_bytes(textwrap.dedent("""\
3398 Content-Transfer-Encoding: b\xa7se64
3399 Content-Type: text/plain; charset=latin-1
3400
3401 payload
3402 """).encode('latin-1'))
3403 self.assertEqual(msg.get_payload(), 'payload\n')
3404 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3405
R. David Murray96fd54e2010-10-08 15:55:28 +00003406 non_latin_bin_msg = textwrap.dedent("""\
3407 From: foo@bar.com
3408 To: báz
3409 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3410 \tJean de Baddie
3411 Mime-Version: 1.0
3412 Content-Type: text/plain; charset="utf-8"
3413 Content-Transfer-Encoding: 8bit
3414
3415 Да, они летят.
3416 """).encode('utf-8')
3417
3418 def test_bytes_generator(self):
3419 msg = email.message_from_bytes(self.non_latin_bin_msg)
3420 out = BytesIO()
3421 email.generator.BytesGenerator(out).flatten(msg)
3422 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3423
R. David Murray7372a072011-01-26 21:21:32 +00003424 def test_bytes_generator_handles_None_body(self):
3425 #Issue 11019
3426 msg = email.message.Message()
3427 out = BytesIO()
3428 email.generator.BytesGenerator(out).flatten(msg)
3429 self.assertEqual(out.getvalue(), b"\n")
3430
R. David Murray92532142011-01-07 23:25:30 +00003431 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003432 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003433 To: =?unknown-8bit?q?b=C3=A1z?=
3434 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3435 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3436 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003437 Mime-Version: 1.0
3438 Content-Type: text/plain; charset="utf-8"
3439 Content-Transfer-Encoding: base64
3440
3441 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3442 """)
3443
3444 def test_generator_handles_8bit(self):
3445 msg = email.message_from_bytes(self.non_latin_bin_msg)
3446 out = StringIO()
3447 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003448 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003449
3450 def test_bytes_generator_with_unix_from(self):
3451 # The unixfrom contains a current date, so we can't check it
3452 # literally. Just make sure the first word is 'From' and the
3453 # rest of the message matches the input.
3454 msg = email.message_from_bytes(self.non_latin_bin_msg)
3455 out = BytesIO()
3456 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3457 lines = out.getvalue().split(b'\n')
3458 self.assertEqual(lines[0].split()[0], b'From')
3459 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3460
R. David Murray92532142011-01-07 23:25:30 +00003461 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3462 non_latin_bin_msg_as7bit[2:4] = [
3463 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3464 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3465 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3466
R. David Murray96fd54e2010-10-08 15:55:28 +00003467 def test_message_from_binary_file(self):
3468 fn = 'test.msg'
3469 self.addCleanup(unlink, fn)
3470 with open(fn, 'wb') as testfile:
3471 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003472 with open(fn, 'rb') as testfile:
3473 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003474 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3475
3476 latin_bin_msg = textwrap.dedent("""\
3477 From: foo@bar.com
3478 To: Dinsdale
3479 Subject: Nudge nudge, wink, wink
3480 Mime-Version: 1.0
3481 Content-Type: text/plain; charset="latin-1"
3482 Content-Transfer-Encoding: 8bit
3483
3484 oh là là, know what I mean, know what I mean?
3485 """).encode('latin-1')
3486
3487 latin_bin_msg_as7bit = textwrap.dedent("""\
3488 From: foo@bar.com
3489 To: Dinsdale
3490 Subject: Nudge nudge, wink, wink
3491 Mime-Version: 1.0
3492 Content-Type: text/plain; charset="iso-8859-1"
3493 Content-Transfer-Encoding: quoted-printable
3494
3495 oh l=E0 l=E0, know what I mean, know what I mean?
3496 """)
3497
3498 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3499 m = email.message_from_bytes(self.latin_bin_msg)
3500 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3501
3502 def test_decoded_generator_emits_unicode_body(self):
3503 m = email.message_from_bytes(self.latin_bin_msg)
3504 out = StringIO()
3505 email.generator.DecodedGenerator(out).flatten(m)
3506 #DecodedHeader output contains an extra blank line compared
3507 #to the input message. RDM: not sure if this is a bug or not,
3508 #but it is not specific to the 8bit->7bit conversion.
3509 self.assertEqual(out.getvalue(),
3510 self.latin_bin_msg.decode('latin-1')+'\n')
3511
3512 def test_bytes_feedparser(self):
3513 bfp = email.feedparser.BytesFeedParser()
3514 for i in range(0, len(self.latin_bin_msg), 10):
3515 bfp.feed(self.latin_bin_msg[i:i+10])
3516 m = bfp.close()
3517 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3518
R. David Murray8451c4b2010-10-23 22:19:56 +00003519 def test_crlf_flatten(self):
3520 with openfile('msg_26.txt', 'rb') as fp:
3521 text = fp.read()
3522 msg = email.message_from_bytes(text)
3523 s = BytesIO()
3524 g = email.generator.BytesGenerator(s)
3525 g.flatten(msg, linesep='\r\n')
3526 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003527
3528 def test_8bit_multipart(self):
3529 # Issue 11605
3530 source = textwrap.dedent("""\
3531 Date: Fri, 18 Mar 2011 17:15:43 +0100
3532 To: foo@example.com
3533 From: foodwatch-Newsletter <bar@example.com>
3534 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3535 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3536 MIME-Version: 1.0
3537 Content-Type: multipart/alternative;
3538 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3539
3540 --b1_76a486bee62b0d200f33dc2ca08220ad
3541 Content-Type: text/plain; charset="utf-8"
3542 Content-Transfer-Encoding: 8bit
3543
3544 Guten Tag, ,
3545
3546 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3547 Nachrichten aus Japan.
3548
3549
3550 --b1_76a486bee62b0d200f33dc2ca08220ad
3551 Content-Type: text/html; charset="utf-8"
3552 Content-Transfer-Encoding: 8bit
3553
3554 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3555 "http://www.w3.org/TR/html4/loose.dtd">
3556 <html lang="de">
3557 <head>
3558 <title>foodwatch - Newsletter</title>
3559 </head>
3560 <body>
3561 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3562 die Nachrichten aus Japan.</p>
3563 </body>
3564 </html>
3565 --b1_76a486bee62b0d200f33dc2ca08220ad--
3566
3567 """).encode('utf-8')
3568 msg = email.message_from_bytes(source)
3569 s = BytesIO()
3570 g = email.generator.BytesGenerator(s)
3571 g.flatten(msg)
3572 self.assertEqual(s.getvalue(), source)
3573
R David Murray9fd170e2012-03-14 14:05:03 -04003574 def test_bytes_generator_b_encoding_linesep(self):
3575 # Issue 14062: b encoding was tacking on an extra \n.
3576 m = Message()
3577 # This has enough non-ascii that it should always end up b encoded.
3578 m['Subject'] = Header('žluťoučký kůň')
3579 s = BytesIO()
3580 g = email.generator.BytesGenerator(s)
3581 g.flatten(m, linesep='\r\n')
3582 self.assertEqual(
3583 s.getvalue(),
3584 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3585
3586 def test_generator_b_encoding_linesep(self):
3587 # Since this broke in ByteGenerator, test Generator for completeness.
3588 m = Message()
3589 # This has enough non-ascii that it should always end up b encoded.
3590 m['Subject'] = Header('žluťoučký kůň')
3591 s = StringIO()
3592 g = email.generator.Generator(s)
3593 g.flatten(m, linesep='\r\n')
3594 self.assertEqual(
3595 s.getvalue(),
3596 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3597
R. David Murray8451c4b2010-10-23 22:19:56 +00003598 maxDiff = None
3599
Ezio Melottib3aedd42010-11-20 19:04:17 +00003600
R. David Murray719a4492010-11-21 16:53:48 +00003601class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003602
R. David Murraye5db2632010-11-20 15:10:13 +00003603 maxDiff = None
3604
R. David Murray96fd54e2010-10-08 15:55:28 +00003605 def _msgobj(self, filename):
3606 with openfile(filename, 'rb') as fp:
3607 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003608 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003609 msg = email.message_from_bytes(data)
3610 return msg, data
3611
R. David Murray719a4492010-11-21 16:53:48 +00003612 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003613 b = BytesIO()
3614 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003615 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003616 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003617
R. David Murraye5db2632010-11-20 15:10:13 +00003618 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003619 # Not using self.blinesep here is intentional. This way the output
3620 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003621 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3622
3623
R. David Murray719a4492010-11-21 16:53:48 +00003624class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3625 TestIdempotent):
3626 linesep = '\n'
3627 blinesep = b'\n'
3628 normalize_linesep_regex = re.compile(br'\r\n')
3629
3630
3631class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3632 TestIdempotent):
3633 linesep = '\r\n'
3634 blinesep = b'\r\n'
3635 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3636
Ezio Melottib3aedd42010-11-20 19:04:17 +00003637
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003638class TestBase64(unittest.TestCase):
3639 def test_len(self):
3640 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003641 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003642 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003643 for size in range(15):
3644 if size == 0 : bsize = 0
3645 elif size <= 3 : bsize = 4
3646 elif size <= 6 : bsize = 8
3647 elif size <= 9 : bsize = 12
3648 elif size <= 12: bsize = 16
3649 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003650 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003651
3652 def test_decode(self):
3653 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003654 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003655 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003656
3657 def test_encode(self):
3658 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003659 eq(base64mime.body_encode(b''), b'')
3660 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003661 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003662 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003663 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003664 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003665eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3666eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3667eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3668eHh4eCB4eHh4IA==
3669""")
3670 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003671 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003672 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003673eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3674eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3675eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3676eHh4eCB4eHh4IA==\r
3677""")
3678
3679 def test_header_encode(self):
3680 eq = self.assertEqual
3681 he = base64mime.header_encode
3682 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003683 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3684 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003685 # Test the charset option
3686 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3687 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003688
3689
Ezio Melottib3aedd42010-11-20 19:04:17 +00003690
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003691class TestQuopri(unittest.TestCase):
3692 def setUp(self):
3693 # Set of characters (as byte integers) that don't need to be encoded
3694 # in headers.
3695 self.hlit = list(chain(
3696 range(ord('a'), ord('z') + 1),
3697 range(ord('A'), ord('Z') + 1),
3698 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003699 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003700 # Set of characters (as byte integers) that do need to be encoded in
3701 # headers.
3702 self.hnon = [c for c in range(256) if c not in self.hlit]
3703 assert len(self.hlit) + len(self.hnon) == 256
3704 # Set of characters (as byte integers) that don't need to be encoded
3705 # in bodies.
3706 self.blit = list(range(ord(' '), ord('~') + 1))
3707 self.blit.append(ord('\t'))
3708 self.blit.remove(ord('='))
3709 # Set of characters (as byte integers) that do need to be encoded in
3710 # bodies.
3711 self.bnon = [c for c in range(256) if c not in self.blit]
3712 assert len(self.blit) + len(self.bnon) == 256
3713
Guido van Rossum9604e662007-08-30 03:46:43 +00003714 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003715 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003716 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003717 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003719 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003720 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003721
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003724 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003726 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003727 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003728 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003729
3730 def test_header_quopri_len(self):
3731 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003732 eq(quoprimime.header_length(b'hello'), 5)
3733 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003734 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003735 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 # =?xxx?q?...?= means 10 extra characters
3737 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003738 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3739 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003741 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003742 # =?xxx?q?...?= means 10 extra characters
3743 10)
3744 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003745 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003746 'expected length 1 for %r' % chr(c))
3747 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003748 # Space is special; it's encoded to _
3749 if c == ord(' '):
3750 continue
3751 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003752 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003753 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003754
3755 def test_body_quopri_len(self):
3756 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003758 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003759 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003760 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003761
3762 def test_quote_unquote_idempotent(self):
3763 for x in range(256):
3764 c = chr(x)
3765 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3766
R David Murrayec1b5b82011-03-23 14:19:05 -04003767 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3768 if charset is None:
3769 encoded_header = quoprimime.header_encode(header)
3770 else:
3771 encoded_header = quoprimime.header_encode(header, charset)
3772 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003773
R David Murraycafd79d2011-03-23 15:25:55 -04003774 def test_header_encode_null(self):
3775 self._test_header_encode(b'', '')
3776
R David Murrayec1b5b82011-03-23 14:19:05 -04003777 def test_header_encode_one_word(self):
3778 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3779
3780 def test_header_encode_two_lines(self):
3781 self._test_header_encode(b'hello\nworld',
3782 '=?iso-8859-1?q?hello=0Aworld?=')
3783
3784 def test_header_encode_non_ascii(self):
3785 self._test_header_encode(b'hello\xc7there',
3786 '=?iso-8859-1?q?hello=C7there?=')
3787
3788 def test_header_encode_alt_charset(self):
3789 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3790 charset='iso-8859-2')
3791
3792 def _test_header_decode(self, encoded_header, expected_decoded_header):
3793 decoded_header = quoprimime.header_decode(encoded_header)
3794 self.assertEqual(decoded_header, expected_decoded_header)
3795
3796 def test_header_decode_null(self):
3797 self._test_header_decode('', '')
3798
3799 def test_header_decode_one_word(self):
3800 self._test_header_decode('hello', 'hello')
3801
3802 def test_header_decode_two_lines(self):
3803 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3804
3805 def test_header_decode_non_ascii(self):
3806 self._test_header_decode('hello=C7there', 'hello\xc7there')
3807
3808 def _test_decode(self, encoded, expected_decoded, eol=None):
3809 if eol is None:
3810 decoded = quoprimime.decode(encoded)
3811 else:
3812 decoded = quoprimime.decode(encoded, eol=eol)
3813 self.assertEqual(decoded, expected_decoded)
3814
3815 def test_decode_null_word(self):
3816 self._test_decode('', '')
3817
3818 def test_decode_null_line_null_word(self):
3819 self._test_decode('\r\n', '\n')
3820
3821 def test_decode_one_word(self):
3822 self._test_decode('hello', 'hello')
3823
3824 def test_decode_one_word_eol(self):
3825 self._test_decode('hello', 'hello', eol='X')
3826
3827 def test_decode_one_line(self):
3828 self._test_decode('hello\r\n', 'hello\n')
3829
3830 def test_decode_one_line_lf(self):
3831 self._test_decode('hello\n', 'hello\n')
3832
R David Murraycafd79d2011-03-23 15:25:55 -04003833 def test_decode_one_line_cr(self):
3834 self._test_decode('hello\r', 'hello\n')
3835
3836 def test_decode_one_line_nl(self):
3837 self._test_decode('hello\n', 'helloX', eol='X')
3838
3839 def test_decode_one_line_crnl(self):
3840 self._test_decode('hello\r\n', 'helloX', eol='X')
3841
R David Murrayec1b5b82011-03-23 14:19:05 -04003842 def test_decode_one_line_one_word(self):
3843 self._test_decode('hello\r\nworld', 'hello\nworld')
3844
3845 def test_decode_one_line_one_word_eol(self):
3846 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3847
3848 def test_decode_two_lines(self):
3849 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3850
R David Murraycafd79d2011-03-23 15:25:55 -04003851 def test_decode_two_lines_eol(self):
3852 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3853
R David Murrayec1b5b82011-03-23 14:19:05 -04003854 def test_decode_one_long_line(self):
3855 self._test_decode('Spam' * 250, 'Spam' * 250)
3856
3857 def test_decode_one_space(self):
3858 self._test_decode(' ', '')
3859
3860 def test_decode_multiple_spaces(self):
3861 self._test_decode(' ' * 5, '')
3862
3863 def test_decode_one_line_trailing_spaces(self):
3864 self._test_decode('hello \r\n', 'hello\n')
3865
3866 def test_decode_two_lines_trailing_spaces(self):
3867 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3868
3869 def test_decode_quoted_word(self):
3870 self._test_decode('=22quoted=20words=22', '"quoted words"')
3871
3872 def test_decode_uppercase_quoting(self):
3873 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3874
3875 def test_decode_lowercase_quoting(self):
3876 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3877
3878 def test_decode_soft_line_break(self):
3879 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3880
3881 def test_decode_false_quoting(self):
3882 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3883
3884 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3885 kwargs = {}
3886 if maxlinelen is None:
3887 # Use body_encode's default.
3888 maxlinelen = 76
3889 else:
3890 kwargs['maxlinelen'] = maxlinelen
3891 if eol is None:
3892 # Use body_encode's default.
3893 eol = '\n'
3894 else:
3895 kwargs['eol'] = eol
3896 encoded_body = quoprimime.body_encode(body, **kwargs)
3897 self.assertEqual(encoded_body, expected_encoded_body)
3898 if eol == '\n' or eol == '\r\n':
3899 # We know how to split the result back into lines, so maxlinelen
3900 # can be checked.
3901 for line in encoded_body.splitlines():
3902 self.assertLessEqual(len(line), maxlinelen)
3903
3904 def test_encode_null(self):
3905 self._test_encode('', '')
3906
3907 def test_encode_null_lines(self):
3908 self._test_encode('\n\n', '\n\n')
3909
3910 def test_encode_one_line(self):
3911 self._test_encode('hello\n', 'hello\n')
3912
3913 def test_encode_one_line_crlf(self):
3914 self._test_encode('hello\r\n', 'hello\n')
3915
3916 def test_encode_one_line_eol(self):
3917 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3918
3919 def test_encode_one_space(self):
3920 self._test_encode(' ', '=20')
3921
3922 def test_encode_one_line_one_space(self):
3923 self._test_encode(' \n', '=20\n')
3924
R David Murrayb938c8c2011-03-24 12:19:26 -04003925# XXX: body_encode() expect strings, but uses ord(char) from these strings
3926# to index into a 256-entry list. For code points above 255, this will fail.
3927# Should there be a check for 8-bit only ord() values in body, or at least
3928# a comment about the expected input?
3929
3930 def test_encode_two_lines_one_space(self):
3931 self._test_encode(' \n \n', '=20\n=20\n')
3932
R David Murrayec1b5b82011-03-23 14:19:05 -04003933 def test_encode_one_word_trailing_spaces(self):
3934 self._test_encode('hello ', 'hello =20')
3935
3936 def test_encode_one_line_trailing_spaces(self):
3937 self._test_encode('hello \n', 'hello =20\n')
3938
3939 def test_encode_one_word_trailing_tab(self):
3940 self._test_encode('hello \t', 'hello =09')
3941
3942 def test_encode_one_line_trailing_tab(self):
3943 self._test_encode('hello \t\n', 'hello =09\n')
3944
3945 def test_encode_trailing_space_before_maxlinelen(self):
3946 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3947
R David Murrayb938c8c2011-03-24 12:19:26 -04003948 def test_encode_trailing_space_at_maxlinelen(self):
3949 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3950
R David Murrayec1b5b82011-03-23 14:19:05 -04003951 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003952 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3953
3954 def test_encode_whitespace_lines(self):
3955 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003956
3957 def test_encode_quoted_equals(self):
3958 self._test_encode('a = b', 'a =3D b')
3959
3960 def test_encode_one_long_string(self):
3961 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3962
3963 def test_encode_one_long_line(self):
3964 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3965
3966 def test_encode_one_very_long_line(self):
3967 self._test_encode('x' * 200 + '\n',
3968 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3969
3970 def test_encode_one_long_line(self):
3971 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3972
3973 def test_encode_shortest_maxlinelen(self):
3974 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003975
R David Murrayb938c8c2011-03-24 12:19:26 -04003976 def test_encode_maxlinelen_too_small(self):
3977 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3978
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 def test_encode(self):
3980 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003981 eq(quoprimime.body_encode(''), '')
3982 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003984 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003986 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003987xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3988 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3989x xxxx xxxx xxxx xxxx=20""")
3990 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003991 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3992 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3994 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3995x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997one line
3998
3999two line"""), """\
4000one line
4001
4002two line""")
4003
4004
Ezio Melottib3aedd42010-11-20 19:04:17 +00004005
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004006# Test the Charset class
4007class TestCharset(unittest.TestCase):
4008 def tearDown(self):
4009 from email import charset as CharsetModule
4010 try:
4011 del CharsetModule.CHARSETS['fake']
4012 except KeyError:
4013 pass
4014
Guido van Rossum9604e662007-08-30 03:46:43 +00004015 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004016 eq = self.assertEqual
4017 # Make sure us-ascii = no Unicode conversion
4018 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004019 eq(c.header_encode('Hello World!'), 'Hello World!')
4020 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004022 self.assertRaises(UnicodeError, c.header_encode, s)
4023 c = Charset('utf-8')
4024 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004025
4026 def test_body_encode(self):
4027 eq = self.assertEqual
4028 # Try a charset with QP body encoding
4029 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004030 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004031 # Try a charset with Base64 body encoding
4032 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004033 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004034 # Try a charset with None body encoding
4035 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004036 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004037 # Try the convert argument, where input codec != output codec
4038 c = Charset('euc-jp')
4039 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004040 # XXX FIXME
4041## try:
4042## eq('\x1b$B5FCO;~IW\x1b(B',
4043## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4044## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4045## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4046## except LookupError:
4047## # We probably don't have the Japanese codecs installed
4048## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004049 # Testing SF bug #625509, which we have to fake, since there are no
4050 # built-in encodings where the header encoding is QP but the body
4051 # encoding is not.
4052 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004053 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004054 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004055 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004056
4057 def test_unicode_charset_name(self):
4058 charset = Charset('us-ascii')
4059 self.assertEqual(str(charset), 'us-ascii')
4060 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4061
4062
Ezio Melottib3aedd42010-11-20 19:04:17 +00004063
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004064# Test multilingual MIME headers.
4065class TestHeader(TestEmailBase):
4066 def test_simple(self):
4067 eq = self.ndiffAssertEqual
4068 h = Header('Hello World!')
4069 eq(h.encode(), 'Hello World!')
4070 h.append(' Goodbye World!')
4071 eq(h.encode(), 'Hello World! Goodbye World!')
4072
4073 def test_simple_surprise(self):
4074 eq = self.ndiffAssertEqual
4075 h = Header('Hello World!')
4076 eq(h.encode(), 'Hello World!')
4077 h.append('Goodbye World!')
4078 eq(h.encode(), 'Hello World! Goodbye World!')
4079
4080 def test_header_needs_no_decoding(self):
4081 h = 'no decoding needed'
4082 self.assertEqual(decode_header(h), [(h, None)])
4083
4084 def test_long(self):
4085 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4086 maxlinelen=76)
4087 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004088 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004089
4090 def test_multilingual(self):
4091 eq = self.ndiffAssertEqual
4092 g = Charset("iso-8859-1")
4093 cz = Charset("iso-8859-2")
4094 utf8 = Charset("utf-8")
4095 g_head = (b'Die Mieter treten hier ein werden mit einem '
4096 b'Foerderband komfortabel den Korridor entlang, '
4097 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4098 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4099 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4100 b'd\xf9vtipu.. ')
4101 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4102 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4103 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4104 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4105 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4106 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4107 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4108 '\u3044\u307e\u3059\u3002')
4109 h = Header(g_head, g)
4110 h.append(cz_head, cz)
4111 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004112 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004113 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004114=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4115 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4116 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4117 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004118 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4119 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4120 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4121 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004122 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4123 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4124 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4125 decoded = decode_header(enc)
4126 eq(len(decoded), 3)
4127 eq(decoded[0], (g_head, 'iso-8859-1'))
4128 eq(decoded[1], (cz_head, 'iso-8859-2'))
4129 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004130 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004131 eq(ustr,
4132 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4133 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4134 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4135 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4136 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4137 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4138 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4139 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4140 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4141 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4142 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4143 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4144 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4145 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4146 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4147 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4148 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004149 # Test make_header()
4150 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004151 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004152
4153 def test_empty_header_encode(self):
4154 h = Header()
4155 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004156
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004157 def test_header_ctor_default_args(self):
4158 eq = self.ndiffAssertEqual
4159 h = Header()
4160 eq(h, '')
4161 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004162 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004163
4164 def test_explicit_maxlinelen(self):
4165 eq = self.ndiffAssertEqual
4166 hstr = ('A very long line that must get split to something other '
4167 'than at the 76th character boundary to test the non-default '
4168 'behavior')
4169 h = Header(hstr)
4170 eq(h.encode(), '''\
4171A very long line that must get split to something other than at the 76th
4172 character boundary to test the non-default behavior''')
4173 eq(str(h), hstr)
4174 h = Header(hstr, header_name='Subject')
4175 eq(h.encode(), '''\
4176A very long line that must get split to something other than at the
4177 76th character boundary to test the non-default behavior''')
4178 eq(str(h), hstr)
4179 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4180 eq(h.encode(), hstr)
4181 eq(str(h), hstr)
4182
Guido van Rossum9604e662007-08-30 03:46:43 +00004183 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004184 eq = self.ndiffAssertEqual
4185 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004186 x = 'xxxx ' * 20
4187 h.append(x)
4188 s = h.encode()
4189 eq(s, """\
4190=?iso-8859-1?q?xxx?=
4191 =?iso-8859-1?q?x_?=
4192 =?iso-8859-1?q?xx?=
4193 =?iso-8859-1?q?xx?=
4194 =?iso-8859-1?q?_x?=
4195 =?iso-8859-1?q?xx?=
4196 =?iso-8859-1?q?x_?=
4197 =?iso-8859-1?q?xx?=
4198 =?iso-8859-1?q?xx?=
4199 =?iso-8859-1?q?_x?=
4200 =?iso-8859-1?q?xx?=
4201 =?iso-8859-1?q?x_?=
4202 =?iso-8859-1?q?xx?=
4203 =?iso-8859-1?q?xx?=
4204 =?iso-8859-1?q?_x?=
4205 =?iso-8859-1?q?xx?=
4206 =?iso-8859-1?q?x_?=
4207 =?iso-8859-1?q?xx?=
4208 =?iso-8859-1?q?xx?=
4209 =?iso-8859-1?q?_x?=
4210 =?iso-8859-1?q?xx?=
4211 =?iso-8859-1?q?x_?=
4212 =?iso-8859-1?q?xx?=
4213 =?iso-8859-1?q?xx?=
4214 =?iso-8859-1?q?_x?=
4215 =?iso-8859-1?q?xx?=
4216 =?iso-8859-1?q?x_?=
4217 =?iso-8859-1?q?xx?=
4218 =?iso-8859-1?q?xx?=
4219 =?iso-8859-1?q?_x?=
4220 =?iso-8859-1?q?xx?=
4221 =?iso-8859-1?q?x_?=
4222 =?iso-8859-1?q?xx?=
4223 =?iso-8859-1?q?xx?=
4224 =?iso-8859-1?q?_x?=
4225 =?iso-8859-1?q?xx?=
4226 =?iso-8859-1?q?x_?=
4227 =?iso-8859-1?q?xx?=
4228 =?iso-8859-1?q?xx?=
4229 =?iso-8859-1?q?_x?=
4230 =?iso-8859-1?q?xx?=
4231 =?iso-8859-1?q?x_?=
4232 =?iso-8859-1?q?xx?=
4233 =?iso-8859-1?q?xx?=
4234 =?iso-8859-1?q?_x?=
4235 =?iso-8859-1?q?xx?=
4236 =?iso-8859-1?q?x_?=
4237 =?iso-8859-1?q?xx?=
4238 =?iso-8859-1?q?xx?=
4239 =?iso-8859-1?q?_?=""")
4240 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004241 h = Header(charset='iso-8859-1', maxlinelen=40)
4242 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004243 s = h.encode()
4244 eq(s, """\
4245=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4246 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4247 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4248 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4249 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4250 eq(x, str(make_header(decode_header(s))))
4251
4252 def test_base64_splittable(self):
4253 eq = self.ndiffAssertEqual
4254 h = Header(charset='koi8-r', maxlinelen=20)
4255 x = 'xxxx ' * 20
4256 h.append(x)
4257 s = h.encode()
4258 eq(s, """\
4259=?koi8-r?b?eHh4?=
4260 =?koi8-r?b?eCB4?=
4261 =?koi8-r?b?eHh4?=
4262 =?koi8-r?b?IHh4?=
4263 =?koi8-r?b?eHgg?=
4264 =?koi8-r?b?eHh4?=
4265 =?koi8-r?b?eCB4?=
4266 =?koi8-r?b?eHh4?=
4267 =?koi8-r?b?IHh4?=
4268 =?koi8-r?b?eHgg?=
4269 =?koi8-r?b?eHh4?=
4270 =?koi8-r?b?eCB4?=
4271 =?koi8-r?b?eHh4?=
4272 =?koi8-r?b?IHh4?=
4273 =?koi8-r?b?eHgg?=
4274 =?koi8-r?b?eHh4?=
4275 =?koi8-r?b?eCB4?=
4276 =?koi8-r?b?eHh4?=
4277 =?koi8-r?b?IHh4?=
4278 =?koi8-r?b?eHgg?=
4279 =?koi8-r?b?eHh4?=
4280 =?koi8-r?b?eCB4?=
4281 =?koi8-r?b?eHh4?=
4282 =?koi8-r?b?IHh4?=
4283 =?koi8-r?b?eHgg?=
4284 =?koi8-r?b?eHh4?=
4285 =?koi8-r?b?eCB4?=
4286 =?koi8-r?b?eHh4?=
4287 =?koi8-r?b?IHh4?=
4288 =?koi8-r?b?eHgg?=
4289 =?koi8-r?b?eHh4?=
4290 =?koi8-r?b?eCB4?=
4291 =?koi8-r?b?eHh4?=
4292 =?koi8-r?b?IA==?=""")
4293 eq(x, str(make_header(decode_header(s))))
4294 h = Header(charset='koi8-r', maxlinelen=40)
4295 h.append(x)
4296 s = h.encode()
4297 eq(s, """\
4298=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4299 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4300 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4301 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4302 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4303 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4304 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004305
4306 def test_us_ascii_header(self):
4307 eq = self.assertEqual
4308 s = 'hello'
4309 x = decode_header(s)
4310 eq(x, [('hello', None)])
4311 h = make_header(x)
4312 eq(s, h.encode())
4313
4314 def test_string_charset(self):
4315 eq = self.assertEqual
4316 h = Header()
4317 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004318 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004319
4320## def test_unicode_error(self):
4321## raises = self.assertRaises
4322## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4323## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4324## h = Header()
4325## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4326## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4327## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4328
4329 def test_utf8_shortest(self):
4330 eq = self.assertEqual
4331 h = Header('p\xf6stal', 'utf-8')
4332 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4333 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4334 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4335
4336 def test_bad_8bit_header(self):
4337 raises = self.assertRaises
4338 eq = self.assertEqual
4339 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4340 raises(UnicodeError, Header, x)
4341 h = Header()
4342 raises(UnicodeError, h.append, x)
4343 e = x.decode('utf-8', 'replace')
4344 eq(str(Header(x, errors='replace')), e)
4345 h.append(x, errors='replace')
4346 eq(str(h), e)
4347
R David Murray041015c2011-03-25 15:10:55 -04004348 def test_escaped_8bit_header(self):
4349 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004350 e = x.decode('ascii', 'surrogateescape')
4351 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004352 self.assertEqual(str(h),
4353 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4354 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4355
R David Murraye5e366c2011-06-18 12:57:28 -04004356 def test_header_handles_binary_unknown8bit(self):
4357 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4358 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4359 self.assertEqual(str(h),
4360 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4361 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4362
4363 def test_make_header_handles_binary_unknown8bit(self):
4364 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4365 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4366 h2 = email.header.make_header(email.header.decode_header(h))
4367 self.assertEqual(str(h2),
4368 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4369 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4370
R David Murray041015c2011-03-25 15:10:55 -04004371 def test_modify_returned_list_does_not_change_header(self):
4372 h = Header('test')
4373 chunks = email.header.decode_header(h)
4374 chunks.append(('ascii', 'test2'))
4375 self.assertEqual(str(h), 'test')
4376
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377 def test_encoded_adjacent_nonencoded(self):
4378 eq = self.assertEqual
4379 h = Header()
4380 h.append('hello', 'iso-8859-1')
4381 h.append('world')
4382 s = h.encode()
4383 eq(s, '=?iso-8859-1?q?hello?= world')
4384 h = make_header(decode_header(s))
4385 eq(h.encode(), s)
4386
4387 def test_whitespace_eater(self):
4388 eq = self.assertEqual
4389 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4390 parts = decode_header(s)
4391 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4392 hdr = make_header(parts)
4393 eq(hdr.encode(),
4394 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4395
4396 def test_broken_base64_header(self):
4397 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004398 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004399 raises(errors.HeaderParseError, decode_header, s)
4400
R. David Murray477efb32011-01-05 01:39:32 +00004401 def test_shift_jis_charset(self):
4402 h = Header('文', charset='shift_jis')
4403 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4404
R David Murrayde912762011-03-16 18:26:23 -04004405 def test_flatten_header_with_no_value(self):
4406 # Issue 11401 (regression from email 4.x) Note that the space after
4407 # the header doesn't reflect the input, but this is also the way
4408 # email 4.x behaved. At some point it would be nice to fix that.
4409 msg = email.message_from_string("EmptyHeader:")
4410 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4411
R David Murray01581ee2011-04-18 10:04:34 -04004412 def test_encode_preserves_leading_ws_on_value(self):
4413 msg = Message()
4414 msg['SomeHeader'] = ' value with leading ws'
4415 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4416
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004417
Ezio Melottib3aedd42010-11-20 19:04:17 +00004418
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004419# Test RFC 2231 header parameters (en/de)coding
4420class TestRFC2231(TestEmailBase):
4421 def test_get_param(self):
4422 eq = self.assertEqual
4423 msg = self._msgobj('msg_29.txt')
4424 eq(msg.get_param('title'),
4425 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4426 eq(msg.get_param('title', unquote=False),
4427 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4428
4429 def test_set_param(self):
4430 eq = self.ndiffAssertEqual
4431 msg = Message()
4432 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4433 charset='us-ascii')
4434 eq(msg.get_param('title'),
4435 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4436 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4437 charset='us-ascii', language='en')
4438 eq(msg.get_param('title'),
4439 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4440 msg = self._msgobj('msg_01.txt')
4441 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4442 charset='us-ascii', language='en')
4443 eq(msg.as_string(maxheaderlen=78), """\
4444Return-Path: <bbb@zzz.org>
4445Delivered-To: bbb@zzz.org
4446Received: by mail.zzz.org (Postfix, from userid 889)
4447\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4448MIME-Version: 1.0
4449Content-Transfer-Encoding: 7bit
4450Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4451From: bbb@ddd.com (John X. Doe)
4452To: bbb@zzz.org
4453Subject: This is a test message
4454Date: Fri, 4 May 2001 14:05:44 -0400
4455Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004456 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004457
4458
4459Hi,
4460
4461Do you like this message?
4462
4463-Me
4464""")
4465
R David Murraya2860e82011-04-16 09:20:30 -04004466 def test_set_param_requote(self):
4467 msg = Message()
4468 msg.set_param('title', 'foo')
4469 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4470 msg.set_param('title', 'bar', requote=False)
4471 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4472 # tspecial is still quoted.
4473 msg.set_param('title', "(bar)bell", requote=False)
4474 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4475
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004476 def test_del_param(self):
4477 eq = self.ndiffAssertEqual
4478 msg = self._msgobj('msg_01.txt')
4479 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4480 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4481 charset='us-ascii', language='en')
4482 msg.del_param('foo', header='Content-Type')
4483 eq(msg.as_string(maxheaderlen=78), """\
4484Return-Path: <bbb@zzz.org>
4485Delivered-To: bbb@zzz.org
4486Received: by mail.zzz.org (Postfix, from userid 889)
4487\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4488MIME-Version: 1.0
4489Content-Transfer-Encoding: 7bit
4490Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4491From: bbb@ddd.com (John X. Doe)
4492To: bbb@zzz.org
4493Subject: This is a test message
4494Date: Fri, 4 May 2001 14:05:44 -0400
4495Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004496 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004497
4498
4499Hi,
4500
4501Do you like this message?
4502
4503-Me
4504""")
4505
4506 def test_rfc2231_get_content_charset(self):
4507 eq = self.assertEqual
4508 msg = self._msgobj('msg_32.txt')
4509 eq(msg.get_content_charset(), 'us-ascii')
4510
R. David Murraydfd7eb02010-12-24 22:36:49 +00004511 def test_rfc2231_parse_rfc_quoting(self):
4512 m = textwrap.dedent('''\
4513 Content-Disposition: inline;
4514 \tfilename*0*=''This%20is%20even%20more%20;
4515 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4516 \tfilename*2="is it not.pdf"
4517
4518 ''')
4519 msg = email.message_from_string(m)
4520 self.assertEqual(msg.get_filename(),
4521 'This is even more ***fun*** is it not.pdf')
4522 self.assertEqual(m, msg.as_string())
4523
4524 def test_rfc2231_parse_extra_quoting(self):
4525 m = textwrap.dedent('''\
4526 Content-Disposition: inline;
4527 \tfilename*0*="''This%20is%20even%20more%20";
4528 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4529 \tfilename*2="is it not.pdf"
4530
4531 ''')
4532 msg = email.message_from_string(m)
4533 self.assertEqual(msg.get_filename(),
4534 'This is even more ***fun*** is it not.pdf')
4535 self.assertEqual(m, msg.as_string())
4536
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004537 def test_rfc2231_no_language_or_charset(self):
4538 m = '''\
4539Content-Transfer-Encoding: 8bit
4540Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4541Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4542
4543'''
4544 msg = email.message_from_string(m)
4545 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004546 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004547 self.assertEqual(
4548 param,
4549 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4550
4551 def test_rfc2231_no_language_or_charset_in_filename(self):
4552 m = '''\
4553Content-Disposition: inline;
4554\tfilename*0*="''This%20is%20even%20more%20";
4555\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4556\tfilename*2="is it not.pdf"
4557
4558'''
4559 msg = email.message_from_string(m)
4560 self.assertEqual(msg.get_filename(),
4561 'This is even more ***fun*** is it not.pdf')
4562
4563 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4564 m = '''\
4565Content-Disposition: inline;
4566\tfilename*0*="''This%20is%20even%20more%20";
4567\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4568\tfilename*2="is it not.pdf"
4569
4570'''
4571 msg = email.message_from_string(m)
4572 self.assertEqual(msg.get_filename(),
4573 'This is even more ***fun*** is it not.pdf')
4574
4575 def test_rfc2231_partly_encoded(self):
4576 m = '''\
4577Content-Disposition: inline;
4578\tfilename*0="''This%20is%20even%20more%20";
4579\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4580\tfilename*2="is it not.pdf"
4581
4582'''
4583 msg = email.message_from_string(m)
4584 self.assertEqual(
4585 msg.get_filename(),
4586 'This%20is%20even%20more%20***fun*** is it not.pdf')
4587
4588 def test_rfc2231_partly_nonencoded(self):
4589 m = '''\
4590Content-Disposition: inline;
4591\tfilename*0="This%20is%20even%20more%20";
4592\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4593\tfilename*2="is it not.pdf"
4594
4595'''
4596 msg = email.message_from_string(m)
4597 self.assertEqual(
4598 msg.get_filename(),
4599 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4600
4601 def test_rfc2231_no_language_or_charset_in_boundary(self):
4602 m = '''\
4603Content-Type: multipart/alternative;
4604\tboundary*0*="''This%20is%20even%20more%20";
4605\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4606\tboundary*2="is it not.pdf"
4607
4608'''
4609 msg = email.message_from_string(m)
4610 self.assertEqual(msg.get_boundary(),
4611 'This is even more ***fun*** is it not.pdf')
4612
4613 def test_rfc2231_no_language_or_charset_in_charset(self):
4614 # This is a nonsensical charset value, but tests the code anyway
4615 m = '''\
4616Content-Type: text/plain;
4617\tcharset*0*="This%20is%20even%20more%20";
4618\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4619\tcharset*2="is it not.pdf"
4620
4621'''
4622 msg = email.message_from_string(m)
4623 self.assertEqual(msg.get_content_charset(),
4624 'this is even more ***fun*** is it not.pdf')
4625
4626 def test_rfc2231_bad_encoding_in_filename(self):
4627 m = '''\
4628Content-Disposition: inline;
4629\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4630\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4631\tfilename*2="is it not.pdf"
4632
4633'''
4634 msg = email.message_from_string(m)
4635 self.assertEqual(msg.get_filename(),
4636 'This is even more ***fun*** is it not.pdf')
4637
4638 def test_rfc2231_bad_encoding_in_charset(self):
4639 m = """\
4640Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4641
4642"""
4643 msg = email.message_from_string(m)
4644 # This should return None because non-ascii characters in the charset
4645 # are not allowed.
4646 self.assertEqual(msg.get_content_charset(), None)
4647
4648 def test_rfc2231_bad_character_in_charset(self):
4649 m = """\
4650Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4651
4652"""
4653 msg = email.message_from_string(m)
4654 # This should return None because non-ascii characters in the charset
4655 # are not allowed.
4656 self.assertEqual(msg.get_content_charset(), None)
4657
4658 def test_rfc2231_bad_character_in_filename(self):
4659 m = '''\
4660Content-Disposition: inline;
4661\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4662\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4663\tfilename*2*="is it not.pdf%E2"
4664
4665'''
4666 msg = email.message_from_string(m)
4667 self.assertEqual(msg.get_filename(),
4668 'This is even more ***fun*** is it not.pdf\ufffd')
4669
4670 def test_rfc2231_unknown_encoding(self):
4671 m = """\
4672Content-Transfer-Encoding: 8bit
4673Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4674
4675"""
4676 msg = email.message_from_string(m)
4677 self.assertEqual(msg.get_filename(), 'myfile.txt')
4678
4679 def test_rfc2231_single_tick_in_filename_extended(self):
4680 eq = self.assertEqual
4681 m = """\
4682Content-Type: application/x-foo;
4683\tname*0*=\"Frank's\"; name*1*=\" Document\"
4684
4685"""
4686 msg = email.message_from_string(m)
4687 charset, language, s = msg.get_param('name')
4688 eq(charset, None)
4689 eq(language, None)
4690 eq(s, "Frank's Document")
4691
4692 def test_rfc2231_single_tick_in_filename(self):
4693 m = """\
4694Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4695
4696"""
4697 msg = email.message_from_string(m)
4698 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004699 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004700 self.assertEqual(param, "Frank's Document")
4701
4702 def test_rfc2231_tick_attack_extended(self):
4703 eq = self.assertEqual
4704 m = """\
4705Content-Type: application/x-foo;
4706\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4707
4708"""
4709 msg = email.message_from_string(m)
4710 charset, language, s = msg.get_param('name')
4711 eq(charset, 'us-ascii')
4712 eq(language, 'en-us')
4713 eq(s, "Frank's Document")
4714
4715 def test_rfc2231_tick_attack(self):
4716 m = """\
4717Content-Type: application/x-foo;
4718\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4719
4720"""
4721 msg = email.message_from_string(m)
4722 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004723 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004724 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4725
4726 def test_rfc2231_no_extended_values(self):
4727 eq = self.assertEqual
4728 m = """\
4729Content-Type: application/x-foo; name=\"Frank's Document\"
4730
4731"""
4732 msg = email.message_from_string(m)
4733 eq(msg.get_param('name'), "Frank's Document")
4734
4735 def test_rfc2231_encoded_then_unencoded_segments(self):
4736 eq = self.assertEqual
4737 m = """\
4738Content-Type: application/x-foo;
4739\tname*0*=\"us-ascii'en-us'My\";
4740\tname*1=\" Document\";
4741\tname*2*=\" For You\"
4742
4743"""
4744 msg = email.message_from_string(m)
4745 charset, language, s = msg.get_param('name')
4746 eq(charset, 'us-ascii')
4747 eq(language, 'en-us')
4748 eq(s, 'My Document For You')
4749
4750 def test_rfc2231_unencoded_then_encoded_segments(self):
4751 eq = self.assertEqual
4752 m = """\
4753Content-Type: application/x-foo;
4754\tname*0=\"us-ascii'en-us'My\";
4755\tname*1*=\" Document\";
4756\tname*2*=\" For You\"
4757
4758"""
4759 msg = email.message_from_string(m)
4760 charset, language, s = msg.get_param('name')
4761 eq(charset, 'us-ascii')
4762 eq(language, 'en-us')
4763 eq(s, 'My Document For You')
4764
4765
Ezio Melottib3aedd42010-11-20 19:04:17 +00004766
R. David Murraya8f480f2010-01-16 18:30:03 +00004767# Tests to ensure that signed parts of an email are completely preserved, as
4768# required by RFC1847 section 2.1. Note that these are incomplete, because the
4769# email package does not currently always preserve the body. See issue 1670765.
4770class TestSigned(TestEmailBase):
4771
4772 def _msg_and_obj(self, filename):
4773 with openfile(findfile(filename)) as fp:
4774 original = fp.read()
4775 msg = email.message_from_string(original)
4776 return original, msg
4777
4778 def _signed_parts_eq(self, original, result):
4779 # Extract the first mime part of each message
4780 import re
4781 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4782 inpart = repart.search(original).group(2)
4783 outpart = repart.search(result).group(2)
4784 self.assertEqual(outpart, inpart)
4785
4786 def test_long_headers_as_string(self):
4787 original, msg = self._msg_and_obj('msg_45.txt')
4788 result = msg.as_string()
4789 self._signed_parts_eq(original, result)
4790
4791 def test_long_headers_as_string_maxheaderlen(self):
4792 original, msg = self._msg_and_obj('msg_45.txt')
4793 result = msg.as_string(maxheaderlen=60)
4794 self._signed_parts_eq(original, result)
4795
4796 def test_long_headers_flatten(self):
4797 original, msg = self._msg_and_obj('msg_45.txt')
4798 fp = StringIO()
4799 Generator(fp).flatten(msg)
4800 result = fp.getvalue()
4801 self._signed_parts_eq(original, result)
4802
4803
Ezio Melottib3aedd42010-11-20 19:04:17 +00004804
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004805def _testclasses():
4806 mod = sys.modules[__name__]
4807 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4808
4809
4810def suite():
4811 suite = unittest.TestSuite()
4812 for testclass in _testclasses():
4813 suite.addTest(unittest.makeSuite(testclass))
4814 return suite
4815
4816
4817def test_main():
4818 for testclass in _testclasses():
4819 run_unittest(testclass)
4820
4821
Ezio Melottib3aedd42010-11-20 19:04:17 +00004822
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004823if __name__ == '__main__':
4824 unittest.main(defaultTest='suite')