blob: a1ceb7f1b54cd1e0c4f3bb9ae5c512522a1dacbd [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +000012import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013
R. David Murray96fd54e2010-10-08 15:55:28 +000014from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015from itertools import chain
16
17import email
18
19from email.charset import Charset
20from email.header import Header, decode_header, make_header
21from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040022from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000023from email.message import Message
24from email.mime.application import MIMEApplication
25from email.mime.audio import MIMEAudio
26from email.mime.text import MIMEText
27from email.mime.image import MIMEImage
28from email.mime.base import MIMEBase
29from email.mime.message import MIMEMessage
30from email.mime.multipart import MIMEMultipart
31from email import utils
32from email import errors
33from email import encoders
34from email import iterators
35from email import base64mime
36from email import quoprimime
37
R. David Murray96fd54e2010-10-08 15:55:28 +000038from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039from email.test import __file__ as landmark
40
41
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Ezio Melottib3aedd42010-11-20 19:04:17 +000047
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048def openfile(filename, *args, **kws):
49 path = os.path.join(os.path.dirname(landmark), 'data', filename)
50 return open(path, *args, **kws)
51
52
Ezio Melottib3aedd42010-11-20 19:04:17 +000053
Guido van Rossum8b3febe2007-08-30 01:15:14 +000054# Base test class
55class TestEmailBase(unittest.TestCase):
56 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000057 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058 if first != second:
59 sfirst = str(first)
60 ssecond = str(second)
61 rfirst = [repr(line) for line in sfirst.splitlines()]
62 rsecond = [repr(line) for line in ssecond.splitlines()]
63 diff = difflib.ndiff(rfirst, rsecond)
64 raise self.failureException(NL + NL.join(diff))
65
66 def _msgobj(self, filename):
67 with openfile(findfile(filename)) as fp:
68 return email.message_from_file(fp)
69
R David Murraye67c6c52013-03-07 16:38:03 -050070 maxDiff = None
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
R David Murray7441a7a2012-03-14 02:59:51 -0400622 def test_unicode_header_defaults_to_utf8_encoding(self):
623 # Issue 14291
624 m = MIMEText('abc\n')
625 m['Subject'] = 'É test'
626 self.assertEqual(str(m),textwrap.dedent("""\
627 Content-Type: text/plain; charset="us-ascii"
628 MIME-Version: 1.0
629 Content-Transfer-Encoding: 7bit
630 Subject: =?utf-8?q?=C3=89_test?=
631
632 abc
633 """))
634
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635# Test the email.encoders module
636class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400637
638 def test_EncodersEncode_base64(self):
639 with openfile('PyBanner048.gif', 'rb') as fp:
640 bindata = fp.read()
641 mimed = email.mime.image.MIMEImage(bindata)
642 base64ed = mimed.get_payload()
643 # the transfer-encoded body lines should all be <=76 characters
644 lines = base64ed.split('\n')
645 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
646
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000647 def test_encode_empty_payload(self):
648 eq = self.assertEqual
649 msg = Message()
650 msg.set_charset('us-ascii')
651 eq(msg['content-transfer-encoding'], '7bit')
652
653 def test_default_cte(self):
654 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000655 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656 msg = MIMEText('hello world')
657 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000658 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 msg = MIMEText('hello \xf8 world')
660 eq(msg['content-transfer-encoding'], '8bit')
661 # And now with a different charset
662 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
663 eq(msg['content-transfer-encoding'], 'quoted-printable')
664
R. David Murraye85200d2010-05-06 01:41:14 +0000665 def test_encode7or8bit(self):
666 # Make sure a charset whose input character set is 8bit but
667 # whose output character set is 7bit gets a transfer-encoding
668 # of 7bit.
669 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000670 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000671 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
R David Murrayf581b372013-02-05 10:49:49 -0500673 def test_qp_encode_latin1(self):
674 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
675 self.assertEqual(str(msg), textwrap.dedent("""\
676 MIME-Version: 1.0
677 Content-Type: text/text; charset="iso-8859-1"
678 Content-Transfer-Encoding: quoted-printable
679
680 =E1=F6
681 """))
682
683 def test_qp_encode_non_latin1(self):
684 # Issue 16948
685 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
686 self.assertEqual(str(msg), textwrap.dedent("""\
687 MIME-Version: 1.0
688 Content-Type: text/text; charset="iso-8859-2"
689 Content-Transfer-Encoding: quoted-printable
690
691 =BF
692 """))
693
Ezio Melottib3aedd42010-11-20 19:04:17 +0000694
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000695# Test long header wrapping
696class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400697
698 maxDiff = None
699
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000700 def test_split_long_continuation(self):
701 eq = self.ndiffAssertEqual
702 msg = email.message_from_string("""\
703Subject: bug demonstration
704\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
705\tmore text
706
707test
708""")
709 sfp = StringIO()
710 g = Generator(sfp)
711 g.flatten(msg)
712 eq(sfp.getvalue(), """\
713Subject: bug demonstration
714\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
715\tmore text
716
717test
718""")
719
720 def test_another_long_almost_unsplittable_header(self):
721 eq = self.ndiffAssertEqual
722 hstr = """\
723bug demonstration
724\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
725\tmore text"""
726 h = Header(hstr, continuation_ws='\t')
727 eq(h.encode(), """\
728bug demonstration
729\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
730\tmore text""")
731 h = Header(hstr.replace('\t', ' '))
732 eq(h.encode(), """\
733bug demonstration
734 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
735 more text""")
736
737 def test_long_nonstring(self):
738 eq = self.ndiffAssertEqual
739 g = Charset("iso-8859-1")
740 cz = Charset("iso-8859-2")
741 utf8 = Charset("utf-8")
742 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
743 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
744 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
745 b'bef\xf6rdert. ')
746 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
747 b'd\xf9vtipu.. ')
748 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
749 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
750 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
751 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
752 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
753 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
754 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
755 '\u3044\u307e\u3059\u3002')
756 h = Header(g_head, g, header_name='Subject')
757 h.append(cz_head, cz)
758 h.append(utf8_head, utf8)
759 msg = Message()
760 msg['Subject'] = h
761 sfp = StringIO()
762 g = Generator(sfp)
763 g.flatten(msg)
764 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000765Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
766 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
767 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
768 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
769 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
770 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
771 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
772 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
773 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
774 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
775 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000776
777""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000778 eq(h.encode(maxlinelen=76), """\
779=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
780 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
781 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
782 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
783 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
784 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
785 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
786 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
787 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
788 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
789 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000790
791 def test_long_header_encode(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit')
796 eq(h.encode(), '''\
797wasnipoop; giraffes="very-long-necked-animals";
798 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
799
800 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
801 eq = self.ndiffAssertEqual
802 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
803 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
804 header_name='X-Foobar-Spoink-Defrobnit',
805 continuation_ws='\t')
806 eq(h.encode(), '''\
807wasnipoop; giraffes="very-long-necked-animals";
808 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
809
810 def test_long_header_encode_with_tab_continuation(self):
811 eq = self.ndiffAssertEqual
812 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
813 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
814 header_name='X-Foobar-Spoink-Defrobnit',
815 continuation_ws='\t')
816 eq(h.encode(), '''\
817wasnipoop; giraffes="very-long-necked-animals";
818\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
819
R David Murray3a6152f2011-03-14 21:13:03 -0400820 def test_header_encode_with_different_output_charset(self):
821 h = Header('文', 'euc-jp')
822 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
823
824 def test_long_header_encode_with_different_output_charset(self):
825 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
826 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
827 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
828 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
829 res = """\
830=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
831 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
832 self.assertEqual(h.encode(), res)
833
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834 def test_header_splitter(self):
835 eq = self.ndiffAssertEqual
836 msg = MIMEText('')
837 # It'd be great if we could use add_header() here, but that doesn't
838 # guarantee an order of the parameters.
839 msg['X-Foobar-Spoink-Defrobnit'] = (
840 'wasnipoop; giraffes="very-long-necked-animals"; '
841 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
842 sfp = StringIO()
843 g = Generator(sfp)
844 g.flatten(msg)
845 eq(sfp.getvalue(), '''\
846Content-Type: text/plain; charset="us-ascii"
847MIME-Version: 1.0
848Content-Transfer-Encoding: 7bit
849X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
850 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
851
852''')
853
854 def test_no_semis_header_splitter(self):
855 eq = self.ndiffAssertEqual
856 msg = Message()
857 msg['From'] = 'test@dom.ain'
858 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
859 msg.set_payload('Test')
860 sfp = StringIO()
861 g = Generator(sfp)
862 g.flatten(msg)
863 eq(sfp.getvalue(), """\
864From: test@dom.ain
865References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
866 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
867
868Test""")
869
R David Murray7da4db12011-04-07 20:37:17 -0400870 def test_last_split_chunk_does_not_fit(self):
871 eq = self.ndiffAssertEqual
872 h = Header('Subject: the first part of this is short, but_the_second'
873 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
874 '_all_by_itself')
875 eq(h.encode(), """\
876Subject: the first part of this is short,
877 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
878
879 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
880 eq = self.ndiffAssertEqual
881 h = Header(', but_the_second'
882 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
883 '_all_by_itself')
884 eq(h.encode(), """\
885,
886 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
887
888 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
889 eq = self.ndiffAssertEqual
890 h = Header(', , but_the_second'
891 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
892 '_all_by_itself')
893 eq(h.encode(), """\
894, ,
895 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
896
897 def test_trailing_splitable_on_overlong_unsplitable(self):
898 eq = self.ndiffAssertEqual
899 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
900 'be_on_a_line_all_by_itself;')
901 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
902 "be_on_a_line_all_by_itself;")
903
904 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
905 eq = self.ndiffAssertEqual
906 h = Header('; '
907 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400908 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400909 eq(h.encode(), """\
910;
R David Murray01581ee2011-04-18 10:04:34 -0400911 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400912
R David Murraye1292a22011-04-07 20:54:03 -0400913 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400914 eq = self.ndiffAssertEqual
915 h = Header('This is a long line that has two whitespaces in a row. '
916 'This used to cause truncation of the header when folded')
917 eq(h.encode(), """\
918This is a long line that has two whitespaces in a row. This used to cause
919 truncation of the header when folded""")
920
R David Murray01581ee2011-04-18 10:04:34 -0400921 def test_splitter_split_on_punctuation_only_if_fws(self):
922 eq = self.ndiffAssertEqual
923 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
924 'they;arenotlegal;fold,points')
925 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
926 "arenotlegal;fold,points")
927
928 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
929 eq = self.ndiffAssertEqual
930 h = Header('this is a test where we need to have more than one line '
931 'before; our final line that is just too big to fit;; '
932 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
933 'be_on_a_line_all_by_itself;')
934 eq(h.encode(), """\
935this is a test where we need to have more than one line before;
936 our final line that is just too big to fit;;
937 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
938
939 def test_overlong_last_part_followed_by_split_point(self):
940 eq = self.ndiffAssertEqual
941 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
942 'be_on_a_line_all_by_itself ')
943 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
944 "should_be_on_a_line_all_by_itself ")
945
946 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
947 eq = self.ndiffAssertEqual
948 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
949 'before_our_final_line_; ; '
950 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
951 'be_on_a_line_all_by_itself; ')
952 eq(h.encode(), """\
953this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
954 ;
955 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
956
957 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
958 eq = self.ndiffAssertEqual
959 h = Header('this is a test where we need to have more than one line '
960 'before our final line; ; '
961 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
962 'be_on_a_line_all_by_itself; ')
963 eq(h.encode(), """\
964this is a test where we need to have more than one line before our final line;
965 ;
966 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
967
968 def test_long_header_with_whitespace_runs(self):
969 eq = self.ndiffAssertEqual
970 msg = Message()
971 msg['From'] = 'test@dom.ain'
972 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
973 msg.set_payload('Test')
974 sfp = StringIO()
975 g = Generator(sfp)
976 g.flatten(msg)
977 eq(sfp.getvalue(), """\
978From: test@dom.ain
979References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
980 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
981 <foo@dom.ain> <foo@dom.ain>\x20\x20
982
983Test""")
984
985 def test_long_run_with_semi_header_splitter(self):
986 eq = self.ndiffAssertEqual
987 msg = Message()
988 msg['From'] = 'test@dom.ain'
989 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
990 msg.set_payload('Test')
991 sfp = StringIO()
992 g = Generator(sfp)
993 g.flatten(msg)
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
997 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
998 <foo@dom.ain>; abc
999
1000Test""")
1001
1002 def test_splitter_split_on_punctuation_only_if_fws(self):
1003 eq = self.ndiffAssertEqual
1004 msg = Message()
1005 msg['From'] = 'test@dom.ain'
1006 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1007 'they;arenotlegal;fold,points')
1008 msg.set_payload('Test')
1009 sfp = StringIO()
1010 g = Generator(sfp)
1011 g.flatten(msg)
1012 # XXX the space after the header should not be there.
1013 eq(sfp.getvalue(), """\
1014From: test@dom.ain
1015References:\x20
1016 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1017
1018Test""")
1019
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020 def test_no_split_long_header(self):
1021 eq = self.ndiffAssertEqual
1022 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001023 h = Header(hstr)
1024 # These come on two lines because Headers are really field value
1025 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001026 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001027References:
1028 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1029 h = Header('x' * 80)
1030 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001031
1032 def test_splitting_multiple_long_lines(self):
1033 eq = self.ndiffAssertEqual
1034 hstr = """\
1035from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1036\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1037\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1038"""
1039 h = Header(hstr, continuation_ws='\t')
1040 eq(h.encode(), """\
1041from babylon.socal-raves.org (localhost [127.0.0.1]);
1042 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1043 for <mailman-admin@babylon.socal-raves.org>;
1044 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1046 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1047 for <mailman-admin@babylon.socal-raves.org>;
1048 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1049\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1050 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1051 for <mailman-admin@babylon.socal-raves.org>;
1052 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1053
1054 def test_splitting_first_line_only_is_long(self):
1055 eq = self.ndiffAssertEqual
1056 hstr = """\
1057from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1058\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1059\tid 17k4h5-00034i-00
1060\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1061 h = Header(hstr, maxlinelen=78, header_name='Received',
1062 continuation_ws='\t')
1063 eq(h.encode(), """\
1064from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1065 helo=cthulhu.gerg.ca)
1066\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1067\tid 17k4h5-00034i-00
1068\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1069
1070 def test_long_8bit_header(self):
1071 eq = self.ndiffAssertEqual
1072 msg = Message()
1073 h = Header('Britische Regierung gibt', 'iso-8859-1',
1074 header_name='Subject')
1075 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001076 eq(h.encode(maxlinelen=76), """\
1077=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1078 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001080 eq(msg.as_string(maxheaderlen=76), """\
1081Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1082 =?iso-8859-1?q?hore-Windkraftprojekte?=
1083
1084""")
1085 eq(msg.as_string(maxheaderlen=0), """\
1086Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087
1088""")
1089
1090 def test_long_8bit_header_no_charset(self):
1091 eq = self.ndiffAssertEqual
1092 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001093 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1094 'f\xfcr Offshore-Windkraftprojekte '
1095 '<a-very-long-address@example.com>')
1096 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001097 eq(msg.as_string(maxheaderlen=78), """\
1098Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1099 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1100
1101""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001102 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001103 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001104 header_name='Reply-To')
1105 eq(msg.as_string(maxheaderlen=78), """\
1106Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1107 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108
1109""")
1110
1111 def test_long_to_header(self):
1112 eq = self.ndiffAssertEqual
1113 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001114 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 '"Someone Test #B" <someone@umich.edu>, '
1116 '"Someone Test #C" <someone@eecs.umich.edu>, '
1117 '"Someone Test #D" <someone@eecs.umich.edu>')
1118 msg = Message()
1119 msg['To'] = to
1120 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001121To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001122 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001123 "Someone Test #C" <someone@eecs.umich.edu>,
1124 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125
1126''')
1127
1128 def test_long_line_after_append(self):
1129 eq = self.ndiffAssertEqual
1130 s = 'This is an example of string which has almost the limit of header length.'
1131 h = Header(s)
1132 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001133 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134This is an example of string which has almost the limit of header length.
1135 Add another line.""")
1136
1137 def test_shorter_line_with_append(self):
1138 eq = self.ndiffAssertEqual
1139 s = 'This is a shorter line.'
1140 h = Header(s)
1141 h.append('Add another sentence. (Surprise?)')
1142 eq(h.encode(),
1143 'This is a shorter line. Add another sentence. (Surprise?)')
1144
1145 def test_long_field_name(self):
1146 eq = self.ndiffAssertEqual
1147 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001148 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1149 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1150 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1151 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152 h = Header(gs, 'iso-8859-1', header_name=fn)
1153 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001154 eq(h.encode(maxlinelen=76), """\
1155=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1156 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1157 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1158 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159
1160 def test_long_received_header(self):
1161 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1162 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1163 'Wed, 05 Mar 2003 18:10:18 -0700')
1164 msg = Message()
1165 msg['Received-1'] = Header(h, continuation_ws='\t')
1166 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001167 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1170 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001171 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001172Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1173 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_string_headerinst_eq(self):
1179 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1180 'tu-muenchen.de> (David Bremner\'s message of '
1181 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1182 msg = Message()
1183 msg['Received-1'] = Header(h, header_name='Received-1',
1184 continuation_ws='\t')
1185 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001186 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001188Received-1:\x20
1189 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1190 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1191Received-2:\x20
1192 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1193 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194
1195""")
1196
1197 def test_long_unbreakable_lines_with_continuation(self):
1198 eq = self.ndiffAssertEqual
1199 msg = Message()
1200 t = """\
1201iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1202 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1203 msg['Face-1'] = t
1204 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001205 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001206 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001207 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001209Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001210 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001212Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001215Face-3:\x20
1216 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1217 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218
1219""")
1220
1221 def test_another_long_multiline_header(self):
1222 eq = self.ndiffAssertEqual
1223 m = ('Received: from siimage.com '
1224 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001225 'Microsoft SMTPSVC(5.0.2195.4905); '
1226 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227 msg = email.message_from_string(m)
1228 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001229Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1230 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232''')
1233
1234 def test_long_lines_with_different_header(self):
1235 eq = self.ndiffAssertEqual
1236 h = ('List-Unsubscribe: '
1237 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1238 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1239 '?subject=unsubscribe>')
1240 msg = Message()
1241 msg['List'] = h
1242 msg['List'] = Header(h, header_name='List')
1243 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001244List: List-Unsubscribe:
1245 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001246 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001247List: List-Unsubscribe:
1248 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001249 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001250
1251""")
1252
R. David Murray6f0022d2011-01-07 21:57:25 +00001253 def test_long_rfc2047_header_with_embedded_fws(self):
1254 h = Header(textwrap.dedent("""\
1255 We're going to pretend this header is in a non-ascii character set
1256 \tto see if line wrapping with encoded words and embedded
1257 folding white space works"""),
1258 charset='utf-8',
1259 header_name='Test')
1260 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1261 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1262 =?utf-8?q?cter_set?=
1263 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1264 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001267# Test mangling of "From " lines in the body of a message
1268class TestFromMangling(unittest.TestCase):
1269 def setUp(self):
1270 self.msg = Message()
1271 self.msg['From'] = 'aaa@bbb.org'
1272 self.msg.set_payload("""\
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277 def test_mangled_from(self):
1278 s = StringIO()
1279 g = Generator(s, mangle_from_=True)
1280 g.flatten(self.msg)
1281 self.assertEqual(s.getvalue(), """\
1282From: aaa@bbb.org
1283
1284>From the desk of A.A.A.:
1285Blah blah blah
1286""")
1287
1288 def test_dont_mangle_from(self):
1289 s = StringIO()
1290 g = Generator(s, mangle_from_=False)
1291 g.flatten(self.msg)
1292 self.assertEqual(s.getvalue(), """\
1293From: aaa@bbb.org
1294
1295From the desk of A.A.A.:
1296Blah blah blah
1297""")
1298
R David Murray6a31bc62012-07-22 21:47:53 -04001299 def test_mangle_from_in_preamble_and_epilog(self):
1300 s = StringIO()
1301 g = Generator(s, mangle_from_=True)
1302 msg = email.message_from_string(textwrap.dedent("""\
1303 From: foo@bar.com
1304 Mime-Version: 1.0
1305 Content-Type: multipart/mixed; boundary=XXX
1306
1307 From somewhere unknown
1308
1309 --XXX
1310 Content-Type: text/plain
1311
1312 foo
1313
1314 --XXX--
1315
1316 From somewhere unknowable
1317 """))
1318 g.flatten(msg)
1319 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1320 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
R David Murray638d40b2012-08-24 11:14:13 -04001322 def test_mangled_from_with_bad_bytes(self):
1323 source = textwrap.dedent("""\
1324 Content-Type: text/plain; charset="utf-8"
1325 MIME-Version: 1.0
1326 Content-Transfer-Encoding: 8bit
1327 From: aaa@bbb.org
1328
1329 """).encode('utf-8')
1330 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1331 b = BytesIO()
1332 g = BytesGenerator(b, mangle_from_=True)
1333 g.flatten(msg)
1334 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1335
Ezio Melottib3aedd42010-11-20 19:04:17 +00001336
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001337# Test the basic MIMEAudio class
1338class TestMIMEAudio(unittest.TestCase):
1339 def setUp(self):
1340 # Make sure we pick up the audiotest.au that lives in email/test/data.
1341 # In Python, there's an audiotest.au living in Lib/test but that isn't
1342 # included in some binary distros that don't include the test
1343 # package. The trailing empty string on the .join() is significant
1344 # since findfile() will do a dirname().
1345 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1346 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1347 self._audiodata = fp.read()
1348 self._au = MIMEAudio(self._audiodata)
1349
1350 def test_guess_minor_type(self):
1351 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1352
1353 def test_encoding(self):
1354 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001355 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1356 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357
1358 def test_checkSetMinor(self):
1359 au = MIMEAudio(self._audiodata, 'fish')
1360 self.assertEqual(au.get_content_type(), 'audio/fish')
1361
1362 def test_add_header(self):
1363 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001364 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365 self._au.add_header('Content-Disposition', 'attachment',
1366 filename='audiotest.au')
1367 eq(self._au['content-disposition'],
1368 'attachment; filename="audiotest.au"')
1369 eq(self._au.get_params(header='content-disposition'),
1370 [('attachment', ''), ('filename', 'audiotest.au')])
1371 eq(self._au.get_param('filename', header='content-disposition'),
1372 'audiotest.au')
1373 missing = []
1374 eq(self._au.get_param('attachment', header='content-disposition'), '')
1375 unless(self._au.get_param('foo', failobj=missing,
1376 header='content-disposition') is missing)
1377 # Try some missing stuff
1378 unless(self._au.get_param('foobar', missing) is missing)
1379 unless(self._au.get_param('attachment', missing,
1380 header='foobar') is missing)
1381
1382
Ezio Melottib3aedd42010-11-20 19:04:17 +00001383
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384# Test the basic MIMEImage class
1385class TestMIMEImage(unittest.TestCase):
1386 def setUp(self):
1387 with openfile('PyBanner048.gif', 'rb') as fp:
1388 self._imgdata = fp.read()
1389 self._im = MIMEImage(self._imgdata)
1390
1391 def test_guess_minor_type(self):
1392 self.assertEqual(self._im.get_content_type(), 'image/gif')
1393
1394 def test_encoding(self):
1395 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001396 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1397 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001398
1399 def test_checkSetMinor(self):
1400 im = MIMEImage(self._imgdata, 'fish')
1401 self.assertEqual(im.get_content_type(), 'image/fish')
1402
1403 def test_add_header(self):
1404 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001405 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001406 self._im.add_header('Content-Disposition', 'attachment',
1407 filename='dingusfish.gif')
1408 eq(self._im['content-disposition'],
1409 'attachment; filename="dingusfish.gif"')
1410 eq(self._im.get_params(header='content-disposition'),
1411 [('attachment', ''), ('filename', 'dingusfish.gif')])
1412 eq(self._im.get_param('filename', header='content-disposition'),
1413 'dingusfish.gif')
1414 missing = []
1415 eq(self._im.get_param('attachment', header='content-disposition'), '')
1416 unless(self._im.get_param('foo', failobj=missing,
1417 header='content-disposition') is missing)
1418 # Try some missing stuff
1419 unless(self._im.get_param('foobar', missing) is missing)
1420 unless(self._im.get_param('attachment', missing,
1421 header='foobar') is missing)
1422
1423
Ezio Melottib3aedd42010-11-20 19:04:17 +00001424
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001425# Test the basic MIMEApplication class
1426class TestMIMEApplication(unittest.TestCase):
1427 def test_headers(self):
1428 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001429 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001430 eq(msg.get_content_type(), 'application/octet-stream')
1431 eq(msg['content-transfer-encoding'], 'base64')
1432
1433 def test_body(self):
1434 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001435 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1436 msg = MIMEApplication(bytesdata)
1437 # whitespace in the cte encoded block is RFC-irrelevant.
1438 eq(msg.get_payload().strip(), '+vv8/f7/')
1439 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001440
R David Murrayec317a82013-02-11 10:51:28 -05001441 def test_binary_body_with_encode_7or8bit(self):
1442 # Issue 17171.
1443 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1444 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1445 # Treated as a string, this will be invalid code points.
1446 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1447 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1448 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1449 s = BytesIO()
1450 g = BytesGenerator(s)
1451 g.flatten(msg)
1452 wireform = s.getvalue()
1453 msg2 = email.message_from_bytes(wireform)
1454 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1455 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1456 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1457
1458 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001459 # Issue 16564: This does not produce an RFC valid message, since to be
1460 # valid it should have a CTE of binary. But the below works in
1461 # Python2, and is documented as working this way.
1462 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1463 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1464 # Treated as a string, this will be invalid code points.
1465 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1466 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1467 s = BytesIO()
1468 g = BytesGenerator(s)
1469 g.flatten(msg)
1470 wireform = s.getvalue()
1471 msg2 = email.message_from_bytes(wireform)
1472 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1473 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001474
Ezio Melottib3aedd42010-11-20 19:04:17 +00001475
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476# Test the basic MIMEText class
1477class TestMIMEText(unittest.TestCase):
1478 def setUp(self):
1479 self._msg = MIMEText('hello there')
1480
1481 def test_types(self):
1482 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001483 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001484 eq(self._msg.get_content_type(), 'text/plain')
1485 eq(self._msg.get_param('charset'), 'us-ascii')
1486 missing = []
1487 unless(self._msg.get_param('foobar', missing) is missing)
1488 unless(self._msg.get_param('charset', missing, header='foobar')
1489 is missing)
1490
1491 def test_payload(self):
1492 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001493 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001494
1495 def test_charset(self):
1496 eq = self.assertEqual
1497 msg = MIMEText('hello there', _charset='us-ascii')
1498 eq(msg.get_charset().input_charset, 'us-ascii')
1499 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1500
R. David Murray850fc852010-06-03 01:58:28 +00001501 def test_7bit_input(self):
1502 eq = self.assertEqual
1503 msg = MIMEText('hello there', _charset='us-ascii')
1504 eq(msg.get_charset().input_charset, 'us-ascii')
1505 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1506
1507 def test_7bit_input_no_charset(self):
1508 eq = self.assertEqual
1509 msg = MIMEText('hello there')
1510 eq(msg.get_charset(), 'us-ascii')
1511 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1512 self.assertTrue('hello there' in msg.as_string())
1513
1514 def test_utf8_input(self):
1515 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1516 eq = self.assertEqual
1517 msg = MIMEText(teststr, _charset='utf-8')
1518 eq(msg.get_charset().output_charset, 'utf-8')
1519 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1520 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1521
1522 @unittest.skip("can't fix because of backward compat in email5, "
1523 "will fix in email6")
1524 def test_utf8_input_no_charset(self):
1525 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1526 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1527
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001528
Ezio Melottib3aedd42010-11-20 19:04:17 +00001529
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001530# Test complicated multipart/* messages
1531class TestMultipart(TestEmailBase):
1532 def setUp(self):
1533 with openfile('PyBanner048.gif', 'rb') as fp:
1534 data = fp.read()
1535 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1536 image = MIMEImage(data, name='dingusfish.gif')
1537 image.add_header('content-disposition', 'attachment',
1538 filename='dingusfish.gif')
1539 intro = MIMEText('''\
1540Hi there,
1541
1542This is the dingus fish.
1543''')
1544 container.attach(intro)
1545 container.attach(image)
1546 container['From'] = 'Barry <barry@digicool.com>'
1547 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1548 container['Subject'] = 'Here is your dingus fish'
1549
1550 now = 987809702.54848599
1551 timetuple = time.localtime(now)
1552 if timetuple[-1] == 0:
1553 tzsecs = time.timezone
1554 else:
1555 tzsecs = time.altzone
1556 if tzsecs > 0:
1557 sign = '-'
1558 else:
1559 sign = '+'
1560 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1561 container['Date'] = time.strftime(
1562 '%a, %d %b %Y %H:%M:%S',
1563 time.localtime(now)) + tzoffset
1564 self._msg = container
1565 self._im = image
1566 self._txt = intro
1567
1568 def test_hierarchy(self):
1569 # convenience
1570 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001571 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001572 raises = self.assertRaises
1573 # tests
1574 m = self._msg
1575 unless(m.is_multipart())
1576 eq(m.get_content_type(), 'multipart/mixed')
1577 eq(len(m.get_payload()), 2)
1578 raises(IndexError, m.get_payload, 2)
1579 m0 = m.get_payload(0)
1580 m1 = m.get_payload(1)
1581 unless(m0 is self._txt)
1582 unless(m1 is self._im)
1583 eq(m.get_payload(), [m0, m1])
1584 unless(not m0.is_multipart())
1585 unless(not m1.is_multipart())
1586
1587 def test_empty_multipart_idempotent(self):
1588 text = """\
1589Content-Type: multipart/mixed; boundary="BOUNDARY"
1590MIME-Version: 1.0
1591Subject: A subject
1592To: aperson@dom.ain
1593From: bperson@dom.ain
1594
1595
1596--BOUNDARY
1597
1598
1599--BOUNDARY--
1600"""
1601 msg = Parser().parsestr(text)
1602 self.ndiffAssertEqual(text, msg.as_string())
1603
1604 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1605 outer = MIMEBase('multipart', 'mixed')
1606 outer['Subject'] = 'A subject'
1607 outer['To'] = 'aperson@dom.ain'
1608 outer['From'] = 'bperson@dom.ain'
1609 outer.set_boundary('BOUNDARY')
1610 self.ndiffAssertEqual(outer.as_string(), '''\
1611Content-Type: multipart/mixed; boundary="BOUNDARY"
1612MIME-Version: 1.0
1613Subject: A subject
1614To: aperson@dom.ain
1615From: bperson@dom.ain
1616
1617--BOUNDARY
1618
1619--BOUNDARY--''')
1620
1621 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1622 outer = MIMEBase('multipart', 'mixed')
1623 outer['Subject'] = 'A subject'
1624 outer['To'] = 'aperson@dom.ain'
1625 outer['From'] = 'bperson@dom.ain'
1626 outer.preamble = ''
1627 outer.epilogue = ''
1628 outer.set_boundary('BOUNDARY')
1629 self.ndiffAssertEqual(outer.as_string(), '''\
1630Content-Type: multipart/mixed; boundary="BOUNDARY"
1631MIME-Version: 1.0
1632Subject: A subject
1633To: aperson@dom.ain
1634From: bperson@dom.ain
1635
1636
1637--BOUNDARY
1638
1639--BOUNDARY--
1640''')
1641
1642 def test_one_part_in_a_multipart(self):
1643 eq = self.ndiffAssertEqual
1644 outer = MIMEBase('multipart', 'mixed')
1645 outer['Subject'] = 'A subject'
1646 outer['To'] = 'aperson@dom.ain'
1647 outer['From'] = 'bperson@dom.ain'
1648 outer.set_boundary('BOUNDARY')
1649 msg = MIMEText('hello world')
1650 outer.attach(msg)
1651 eq(outer.as_string(), '''\
1652Content-Type: multipart/mixed; boundary="BOUNDARY"
1653MIME-Version: 1.0
1654Subject: A subject
1655To: aperson@dom.ain
1656From: bperson@dom.ain
1657
1658--BOUNDARY
1659Content-Type: text/plain; charset="us-ascii"
1660MIME-Version: 1.0
1661Content-Transfer-Encoding: 7bit
1662
1663hello world
1664--BOUNDARY--''')
1665
1666 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1667 eq = self.ndiffAssertEqual
1668 outer = MIMEBase('multipart', 'mixed')
1669 outer['Subject'] = 'A subject'
1670 outer['To'] = 'aperson@dom.ain'
1671 outer['From'] = 'bperson@dom.ain'
1672 outer.preamble = ''
1673 msg = MIMEText('hello world')
1674 outer.attach(msg)
1675 outer.set_boundary('BOUNDARY')
1676 eq(outer.as_string(), '''\
1677Content-Type: multipart/mixed; boundary="BOUNDARY"
1678MIME-Version: 1.0
1679Subject: A subject
1680To: aperson@dom.ain
1681From: bperson@dom.ain
1682
1683
1684--BOUNDARY
1685Content-Type: text/plain; charset="us-ascii"
1686MIME-Version: 1.0
1687Content-Transfer-Encoding: 7bit
1688
1689hello world
1690--BOUNDARY--''')
1691
1692
1693 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1694 eq = self.ndiffAssertEqual
1695 outer = MIMEBase('multipart', 'mixed')
1696 outer['Subject'] = 'A subject'
1697 outer['To'] = 'aperson@dom.ain'
1698 outer['From'] = 'bperson@dom.ain'
1699 outer.preamble = None
1700 msg = MIMEText('hello world')
1701 outer.attach(msg)
1702 outer.set_boundary('BOUNDARY')
1703 eq(outer.as_string(), '''\
1704Content-Type: multipart/mixed; boundary="BOUNDARY"
1705MIME-Version: 1.0
1706Subject: A subject
1707To: aperson@dom.ain
1708From: bperson@dom.ain
1709
1710--BOUNDARY
1711Content-Type: text/plain; charset="us-ascii"
1712MIME-Version: 1.0
1713Content-Transfer-Encoding: 7bit
1714
1715hello world
1716--BOUNDARY--''')
1717
1718
1719 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1720 eq = self.ndiffAssertEqual
1721 outer = MIMEBase('multipart', 'mixed')
1722 outer['Subject'] = 'A subject'
1723 outer['To'] = 'aperson@dom.ain'
1724 outer['From'] = 'bperson@dom.ain'
1725 outer.epilogue = None
1726 msg = MIMEText('hello world')
1727 outer.attach(msg)
1728 outer.set_boundary('BOUNDARY')
1729 eq(outer.as_string(), '''\
1730Content-Type: multipart/mixed; boundary="BOUNDARY"
1731MIME-Version: 1.0
1732Subject: A subject
1733To: aperson@dom.ain
1734From: bperson@dom.ain
1735
1736--BOUNDARY
1737Content-Type: text/plain; charset="us-ascii"
1738MIME-Version: 1.0
1739Content-Transfer-Encoding: 7bit
1740
1741hello world
1742--BOUNDARY--''')
1743
1744
1745 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1746 eq = self.ndiffAssertEqual
1747 outer = MIMEBase('multipart', 'mixed')
1748 outer['Subject'] = 'A subject'
1749 outer['To'] = 'aperson@dom.ain'
1750 outer['From'] = 'bperson@dom.ain'
1751 outer.epilogue = ''
1752 msg = MIMEText('hello world')
1753 outer.attach(msg)
1754 outer.set_boundary('BOUNDARY')
1755 eq(outer.as_string(), '''\
1756Content-Type: multipart/mixed; boundary="BOUNDARY"
1757MIME-Version: 1.0
1758Subject: A subject
1759To: aperson@dom.ain
1760From: bperson@dom.ain
1761
1762--BOUNDARY
1763Content-Type: text/plain; charset="us-ascii"
1764MIME-Version: 1.0
1765Content-Transfer-Encoding: 7bit
1766
1767hello world
1768--BOUNDARY--
1769''')
1770
1771
1772 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1773 eq = self.ndiffAssertEqual
1774 outer = MIMEBase('multipart', 'mixed')
1775 outer['Subject'] = 'A subject'
1776 outer['To'] = 'aperson@dom.ain'
1777 outer['From'] = 'bperson@dom.ain'
1778 outer.epilogue = '\n'
1779 msg = MIMEText('hello world')
1780 outer.attach(msg)
1781 outer.set_boundary('BOUNDARY')
1782 eq(outer.as_string(), '''\
1783Content-Type: multipart/mixed; boundary="BOUNDARY"
1784MIME-Version: 1.0
1785Subject: A subject
1786To: aperson@dom.ain
1787From: bperson@dom.ain
1788
1789--BOUNDARY
1790Content-Type: text/plain; charset="us-ascii"
1791MIME-Version: 1.0
1792Content-Transfer-Encoding: 7bit
1793
1794hello world
1795--BOUNDARY--
1796
1797''')
1798
1799 def test_message_external_body(self):
1800 eq = self.assertEqual
1801 msg = self._msgobj('msg_36.txt')
1802 eq(len(msg.get_payload()), 2)
1803 msg1 = msg.get_payload(1)
1804 eq(msg1.get_content_type(), 'multipart/alternative')
1805 eq(len(msg1.get_payload()), 2)
1806 for subpart in msg1.get_payload():
1807 eq(subpart.get_content_type(), 'message/external-body')
1808 eq(len(subpart.get_payload()), 1)
1809 subsubpart = subpart.get_payload(0)
1810 eq(subsubpart.get_content_type(), 'text/plain')
1811
1812 def test_double_boundary(self):
1813 # msg_37.txt is a multipart that contains two dash-boundary's in a
1814 # row. Our interpretation of RFC 2046 calls for ignoring the second
1815 # and subsequent boundaries.
1816 msg = self._msgobj('msg_37.txt')
1817 self.assertEqual(len(msg.get_payload()), 3)
1818
1819 def test_nested_inner_contains_outer_boundary(self):
1820 eq = self.ndiffAssertEqual
1821 # msg_38.txt has an inner part that contains outer boundaries. My
1822 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1823 # these are illegal and should be interpreted as unterminated inner
1824 # parts.
1825 msg = self._msgobj('msg_38.txt')
1826 sfp = StringIO()
1827 iterators._structure(msg, sfp)
1828 eq(sfp.getvalue(), """\
1829multipart/mixed
1830 multipart/mixed
1831 multipart/alternative
1832 text/plain
1833 text/plain
1834 text/plain
1835 text/plain
1836""")
1837
1838 def test_nested_with_same_boundary(self):
1839 eq = self.ndiffAssertEqual
1840 # msg 39.txt is similarly evil in that it's got inner parts that use
1841 # the same boundary as outer parts. Again, I believe the way this is
1842 # parsed is closest to the spirit of RFC 2046
1843 msg = self._msgobj('msg_39.txt')
1844 sfp = StringIO()
1845 iterators._structure(msg, sfp)
1846 eq(sfp.getvalue(), """\
1847multipart/mixed
1848 multipart/mixed
1849 multipart/alternative
1850 application/octet-stream
1851 application/octet-stream
1852 text/plain
1853""")
1854
1855 def test_boundary_in_non_multipart(self):
1856 msg = self._msgobj('msg_40.txt')
1857 self.assertEqual(msg.as_string(), '''\
1858MIME-Version: 1.0
1859Content-Type: text/html; boundary="--961284236552522269"
1860
1861----961284236552522269
1862Content-Type: text/html;
1863Content-Transfer-Encoding: 7Bit
1864
1865<html></html>
1866
1867----961284236552522269--
1868''')
1869
1870 def test_boundary_with_leading_space(self):
1871 eq = self.assertEqual
1872 msg = email.message_from_string('''\
1873MIME-Version: 1.0
1874Content-Type: multipart/mixed; boundary=" XXXX"
1875
1876-- XXXX
1877Content-Type: text/plain
1878
1879
1880-- XXXX
1881Content-Type: text/plain
1882
1883-- XXXX--
1884''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001885 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001886 eq(msg.get_boundary(), ' XXXX')
1887 eq(len(msg.get_payload()), 2)
1888
1889 def test_boundary_without_trailing_newline(self):
1890 m = Parser().parsestr("""\
1891Content-Type: multipart/mixed; boundary="===============0012394164=="
1892MIME-Version: 1.0
1893
1894--===============0012394164==
1895Content-Type: image/file1.jpg
1896MIME-Version: 1.0
1897Content-Transfer-Encoding: base64
1898
1899YXNkZg==
1900--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001901 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001902
1903
Ezio Melottib3aedd42010-11-20 19:04:17 +00001904
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001905# Test some badly formatted messages
1906class TestNonConformant(TestEmailBase):
1907 def test_parse_missing_minor_type(self):
1908 eq = self.assertEqual
1909 msg = self._msgobj('msg_14.txt')
1910 eq(msg.get_content_type(), 'text/plain')
1911 eq(msg.get_content_maintype(), 'text')
1912 eq(msg.get_content_subtype(), 'plain')
1913
1914 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001915 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001916 msg = self._msgobj('msg_15.txt')
1917 # XXX We can probably eventually do better
1918 inner = msg.get_payload(0)
1919 unless(hasattr(inner, 'defects'))
1920 self.assertEqual(len(inner.defects), 1)
1921 unless(isinstance(inner.defects[0],
1922 errors.StartBoundaryNotFoundDefect))
1923
1924 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001925 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001926 msg = self._msgobj('msg_25.txt')
1927 unless(isinstance(msg.get_payload(), str))
1928 self.assertEqual(len(msg.defects), 2)
1929 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1930 unless(isinstance(msg.defects[1],
1931 errors.MultipartInvariantViolationDefect))
1932
1933 def test_invalid_content_type(self):
1934 eq = self.assertEqual
1935 neq = self.ndiffAssertEqual
1936 msg = Message()
1937 # RFC 2045, $5.2 says invalid yields text/plain
1938 msg['Content-Type'] = 'text'
1939 eq(msg.get_content_maintype(), 'text')
1940 eq(msg.get_content_subtype(), 'plain')
1941 eq(msg.get_content_type(), 'text/plain')
1942 # Clear the old value and try something /really/ invalid
1943 del msg['content-type']
1944 msg['Content-Type'] = 'foo'
1945 eq(msg.get_content_maintype(), 'text')
1946 eq(msg.get_content_subtype(), 'plain')
1947 eq(msg.get_content_type(), 'text/plain')
1948 # Still, make sure that the message is idempotently generated
1949 s = StringIO()
1950 g = Generator(s)
1951 g.flatten(msg)
1952 neq(s.getvalue(), 'Content-Type: foo\n\n')
1953
1954 def test_no_start_boundary(self):
1955 eq = self.ndiffAssertEqual
1956 msg = self._msgobj('msg_31.txt')
1957 eq(msg.get_payload(), """\
1958--BOUNDARY
1959Content-Type: text/plain
1960
1961message 1
1962
1963--BOUNDARY
1964Content-Type: text/plain
1965
1966message 2
1967
1968--BOUNDARY--
1969""")
1970
1971 def test_no_separating_blank_line(self):
1972 eq = self.ndiffAssertEqual
1973 msg = self._msgobj('msg_35.txt')
1974 eq(msg.as_string(), """\
1975From: aperson@dom.ain
1976To: bperson@dom.ain
1977Subject: here's something interesting
1978
1979counter to RFC 2822, there's no separating newline here
1980""")
1981
1982 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001983 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001984 msg = self._msgobj('msg_41.txt')
1985 unless(hasattr(msg, 'defects'))
1986 self.assertEqual(len(msg.defects), 2)
1987 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1988 unless(isinstance(msg.defects[1],
1989 errors.MultipartInvariantViolationDefect))
1990
1991 def test_missing_start_boundary(self):
1992 outer = self._msgobj('msg_42.txt')
1993 # The message structure is:
1994 #
1995 # multipart/mixed
1996 # text/plain
1997 # message/rfc822
1998 # multipart/mixed [*]
1999 #
2000 # [*] This message is missing its start boundary
2001 bad = outer.get_payload(1).get_payload(0)
2002 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002003 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 errors.StartBoundaryNotFoundDefect))
2005
2006 def test_first_line_is_continuation_header(self):
2007 eq = self.assertEqual
2008 m = ' Line 1\nLine 2\nLine 3'
2009 msg = email.message_from_string(m)
2010 eq(msg.keys(), [])
2011 eq(msg.get_payload(), 'Line 2\nLine 3')
2012 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002013 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002014 errors.FirstHeaderLineIsContinuationDefect))
2015 eq(msg.defects[0].line, ' Line 1\n')
2016
2017
Ezio Melottib3aedd42010-11-20 19:04:17 +00002018
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002019# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002020class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002021 def test_rfc2047_multiline(self):
2022 eq = self.assertEqual
2023 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2024 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2025 dh = decode_header(s)
2026 eq(dh, [
2027 (b'Re:', None),
2028 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2029 (b'baz foo bar', None),
2030 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2031 header = make_header(dh)
2032 eq(str(header),
2033 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002034 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002035Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2036 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002037
2038 def test_whitespace_eater_unicode(self):
2039 eq = self.assertEqual
2040 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2041 dh = decode_header(s)
2042 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2043 (b'Pirard <pirard@dom.ain>', None)])
2044 header = str(make_header(dh))
2045 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2046
2047 def test_whitespace_eater_unicode_2(self):
2048 eq = self.assertEqual
2049 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2050 dh = decode_header(s)
2051 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2052 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2053 hu = str(make_header(dh))
2054 eq(hu, 'The quick brown fox jumped over the lazy dog')
2055
2056 def test_rfc2047_missing_whitespace(self):
2057 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2058 dh = decode_header(s)
2059 self.assertEqual(dh, [(s, None)])
2060
2061 def test_rfc2047_with_whitespace(self):
2062 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2063 dh = decode_header(s)
2064 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2065 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2066 (b'sbord', None)])
2067
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002068 def test_rfc2047_B_bad_padding(self):
2069 s = '=?iso-8859-1?B?%s?='
2070 data = [ # only test complete bytes
2071 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2072 ('dmk=', b'vi'), ('dmk', b'vi')
2073 ]
2074 for q, a in data:
2075 dh = decode_header(s % q)
2076 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002077
R. David Murray31e984c2010-10-01 15:40:20 +00002078 def test_rfc2047_Q_invalid_digits(self):
2079 # issue 10004.
2080 s = '=?iso-8659-1?Q?andr=e9=zz?='
2081 self.assertEqual(decode_header(s),
2082 [(b'andr\xe9=zz', 'iso-8659-1')])
2083
Ezio Melottib3aedd42010-11-20 19:04:17 +00002084
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002085# Test the MIMEMessage class
2086class TestMIMEMessage(TestEmailBase):
2087 def setUp(self):
2088 with openfile('msg_11.txt') as fp:
2089 self._text = fp.read()
2090
2091 def test_type_error(self):
2092 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2093
2094 def test_valid_argument(self):
2095 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002096 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002097 subject = 'A sub-message'
2098 m = Message()
2099 m['Subject'] = subject
2100 r = MIMEMessage(m)
2101 eq(r.get_content_type(), 'message/rfc822')
2102 payload = r.get_payload()
2103 unless(isinstance(payload, list))
2104 eq(len(payload), 1)
2105 subpart = payload[0]
2106 unless(subpart is m)
2107 eq(subpart['subject'], subject)
2108
2109 def test_bad_multipart(self):
2110 eq = self.assertEqual
2111 msg1 = Message()
2112 msg1['Subject'] = 'subpart 1'
2113 msg2 = Message()
2114 msg2['Subject'] = 'subpart 2'
2115 r = MIMEMessage(msg1)
2116 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2117
2118 def test_generate(self):
2119 # First craft the message to be encapsulated
2120 m = Message()
2121 m['Subject'] = 'An enclosed message'
2122 m.set_payload('Here is the body of the message.\n')
2123 r = MIMEMessage(m)
2124 r['Subject'] = 'The enclosing message'
2125 s = StringIO()
2126 g = Generator(s)
2127 g.flatten(r)
2128 self.assertEqual(s.getvalue(), """\
2129Content-Type: message/rfc822
2130MIME-Version: 1.0
2131Subject: The enclosing message
2132
2133Subject: An enclosed message
2134
2135Here is the body of the message.
2136""")
2137
2138 def test_parse_message_rfc822(self):
2139 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002140 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002141 msg = self._msgobj('msg_11.txt')
2142 eq(msg.get_content_type(), 'message/rfc822')
2143 payload = msg.get_payload()
2144 unless(isinstance(payload, list))
2145 eq(len(payload), 1)
2146 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002147 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002148 eq(submsg['subject'], 'An enclosed message')
2149 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2150
2151 def test_dsn(self):
2152 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002153 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154 # msg 16 is a Delivery Status Notification, see RFC 1894
2155 msg = self._msgobj('msg_16.txt')
2156 eq(msg.get_content_type(), 'multipart/report')
2157 unless(msg.is_multipart())
2158 eq(len(msg.get_payload()), 3)
2159 # Subpart 1 is a text/plain, human readable section
2160 subpart = msg.get_payload(0)
2161 eq(subpart.get_content_type(), 'text/plain')
2162 eq(subpart.get_payload(), """\
2163This report relates to a message you sent with the following header fields:
2164
2165 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2166 Date: Sun, 23 Sep 2001 20:10:55 -0700
2167 From: "Ian T. Henry" <henryi@oxy.edu>
2168 To: SoCal Raves <scr@socal-raves.org>
2169 Subject: [scr] yeah for Ians!!
2170
2171Your message cannot be delivered to the following recipients:
2172
2173 Recipient address: jangel1@cougar.noc.ucla.edu
2174 Reason: recipient reached disk quota
2175
2176""")
2177 # Subpart 2 contains the machine parsable DSN information. It
2178 # consists of two blocks of headers, represented by two nested Message
2179 # objects.
2180 subpart = msg.get_payload(1)
2181 eq(subpart.get_content_type(), 'message/delivery-status')
2182 eq(len(subpart.get_payload()), 2)
2183 # message/delivery-status should treat each block as a bunch of
2184 # headers, i.e. a bunch of Message objects.
2185 dsn1 = subpart.get_payload(0)
2186 unless(isinstance(dsn1, Message))
2187 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2188 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2189 # Try a missing one <wink>
2190 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2191 dsn2 = subpart.get_payload(1)
2192 unless(isinstance(dsn2, Message))
2193 eq(dsn2['action'], 'failed')
2194 eq(dsn2.get_params(header='original-recipient'),
2195 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2196 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2197 # Subpart 3 is the original message
2198 subpart = msg.get_payload(2)
2199 eq(subpart.get_content_type(), 'message/rfc822')
2200 payload = subpart.get_payload()
2201 unless(isinstance(payload, list))
2202 eq(len(payload), 1)
2203 subsubpart = payload[0]
2204 unless(isinstance(subsubpart, Message))
2205 eq(subsubpart.get_content_type(), 'text/plain')
2206 eq(subsubpart['message-id'],
2207 '<002001c144a6$8752e060$56104586@oxy.edu>')
2208
2209 def test_epilogue(self):
2210 eq = self.ndiffAssertEqual
2211 with openfile('msg_21.txt') as fp:
2212 text = fp.read()
2213 msg = Message()
2214 msg['From'] = 'aperson@dom.ain'
2215 msg['To'] = 'bperson@dom.ain'
2216 msg['Subject'] = 'Test'
2217 msg.preamble = 'MIME message'
2218 msg.epilogue = 'End of MIME message\n'
2219 msg1 = MIMEText('One')
2220 msg2 = MIMEText('Two')
2221 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2222 msg.attach(msg1)
2223 msg.attach(msg2)
2224 sfp = StringIO()
2225 g = Generator(sfp)
2226 g.flatten(msg)
2227 eq(sfp.getvalue(), text)
2228
2229 def test_no_nl_preamble(self):
2230 eq = self.ndiffAssertEqual
2231 msg = Message()
2232 msg['From'] = 'aperson@dom.ain'
2233 msg['To'] = 'bperson@dom.ain'
2234 msg['Subject'] = 'Test'
2235 msg.preamble = 'MIME message'
2236 msg.epilogue = ''
2237 msg1 = MIMEText('One')
2238 msg2 = MIMEText('Two')
2239 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2240 msg.attach(msg1)
2241 msg.attach(msg2)
2242 eq(msg.as_string(), """\
2243From: aperson@dom.ain
2244To: bperson@dom.ain
2245Subject: Test
2246Content-Type: multipart/mixed; boundary="BOUNDARY"
2247
2248MIME message
2249--BOUNDARY
2250Content-Type: text/plain; charset="us-ascii"
2251MIME-Version: 1.0
2252Content-Transfer-Encoding: 7bit
2253
2254One
2255--BOUNDARY
2256Content-Type: text/plain; charset="us-ascii"
2257MIME-Version: 1.0
2258Content-Transfer-Encoding: 7bit
2259
2260Two
2261--BOUNDARY--
2262""")
2263
2264 def test_default_type(self):
2265 eq = self.assertEqual
2266 with openfile('msg_30.txt') as fp:
2267 msg = email.message_from_file(fp)
2268 container1 = msg.get_payload(0)
2269 eq(container1.get_default_type(), 'message/rfc822')
2270 eq(container1.get_content_type(), 'message/rfc822')
2271 container2 = msg.get_payload(1)
2272 eq(container2.get_default_type(), 'message/rfc822')
2273 eq(container2.get_content_type(), 'message/rfc822')
2274 container1a = container1.get_payload(0)
2275 eq(container1a.get_default_type(), 'text/plain')
2276 eq(container1a.get_content_type(), 'text/plain')
2277 container2a = container2.get_payload(0)
2278 eq(container2a.get_default_type(), 'text/plain')
2279 eq(container2a.get_content_type(), 'text/plain')
2280
2281 def test_default_type_with_explicit_container_type(self):
2282 eq = self.assertEqual
2283 with openfile('msg_28.txt') as fp:
2284 msg = email.message_from_file(fp)
2285 container1 = msg.get_payload(0)
2286 eq(container1.get_default_type(), 'message/rfc822')
2287 eq(container1.get_content_type(), 'message/rfc822')
2288 container2 = msg.get_payload(1)
2289 eq(container2.get_default_type(), 'message/rfc822')
2290 eq(container2.get_content_type(), 'message/rfc822')
2291 container1a = container1.get_payload(0)
2292 eq(container1a.get_default_type(), 'text/plain')
2293 eq(container1a.get_content_type(), 'text/plain')
2294 container2a = container2.get_payload(0)
2295 eq(container2a.get_default_type(), 'text/plain')
2296 eq(container2a.get_content_type(), 'text/plain')
2297
2298 def test_default_type_non_parsed(self):
2299 eq = self.assertEqual
2300 neq = self.ndiffAssertEqual
2301 # Set up container
2302 container = MIMEMultipart('digest', 'BOUNDARY')
2303 container.epilogue = ''
2304 # Set up subparts
2305 subpart1a = MIMEText('message 1\n')
2306 subpart2a = MIMEText('message 2\n')
2307 subpart1 = MIMEMessage(subpart1a)
2308 subpart2 = MIMEMessage(subpart2a)
2309 container.attach(subpart1)
2310 container.attach(subpart2)
2311 eq(subpart1.get_content_type(), 'message/rfc822')
2312 eq(subpart1.get_default_type(), 'message/rfc822')
2313 eq(subpart2.get_content_type(), 'message/rfc822')
2314 eq(subpart2.get_default_type(), 'message/rfc822')
2315 neq(container.as_string(0), '''\
2316Content-Type: multipart/digest; boundary="BOUNDARY"
2317MIME-Version: 1.0
2318
2319--BOUNDARY
2320Content-Type: message/rfc822
2321MIME-Version: 1.0
2322
2323Content-Type: text/plain; charset="us-ascii"
2324MIME-Version: 1.0
2325Content-Transfer-Encoding: 7bit
2326
2327message 1
2328
2329--BOUNDARY
2330Content-Type: message/rfc822
2331MIME-Version: 1.0
2332
2333Content-Type: text/plain; charset="us-ascii"
2334MIME-Version: 1.0
2335Content-Transfer-Encoding: 7bit
2336
2337message 2
2338
2339--BOUNDARY--
2340''')
2341 del subpart1['content-type']
2342 del subpart1['mime-version']
2343 del subpart2['content-type']
2344 del subpart2['mime-version']
2345 eq(subpart1.get_content_type(), 'message/rfc822')
2346 eq(subpart1.get_default_type(), 'message/rfc822')
2347 eq(subpart2.get_content_type(), 'message/rfc822')
2348 eq(subpart2.get_default_type(), 'message/rfc822')
2349 neq(container.as_string(0), '''\
2350Content-Type: multipart/digest; boundary="BOUNDARY"
2351MIME-Version: 1.0
2352
2353--BOUNDARY
2354
2355Content-Type: text/plain; charset="us-ascii"
2356MIME-Version: 1.0
2357Content-Transfer-Encoding: 7bit
2358
2359message 1
2360
2361--BOUNDARY
2362
2363Content-Type: text/plain; charset="us-ascii"
2364MIME-Version: 1.0
2365Content-Transfer-Encoding: 7bit
2366
2367message 2
2368
2369--BOUNDARY--
2370''')
2371
2372 def test_mime_attachments_in_constructor(self):
2373 eq = self.assertEqual
2374 text1 = MIMEText('')
2375 text2 = MIMEText('')
2376 msg = MIMEMultipart(_subparts=(text1, text2))
2377 eq(len(msg.get_payload()), 2)
2378 eq(msg.get_payload(0), text1)
2379 eq(msg.get_payload(1), text2)
2380
Christian Heimes587c2bf2008-01-19 16:21:02 +00002381 def test_default_multipart_constructor(self):
2382 msg = MIMEMultipart()
2383 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002384
Ezio Melottib3aedd42010-11-20 19:04:17 +00002385
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002386# A general test of parser->model->generator idempotency. IOW, read a message
2387# in, parse it into a message object tree, then without touching the tree,
2388# regenerate the plain text. The original text and the transformed text
2389# should be identical. Note: that we ignore the Unix-From since that may
2390# contain a changed date.
2391class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002392
2393 linesep = '\n'
2394
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002395 def _msgobj(self, filename):
2396 with openfile(filename) as fp:
2397 data = fp.read()
2398 msg = email.message_from_string(data)
2399 return msg, data
2400
R. David Murray719a4492010-11-21 16:53:48 +00002401 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002402 eq = self.ndiffAssertEqual
2403 s = StringIO()
2404 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002405 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002406 eq(text, s.getvalue())
2407
2408 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002409 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002410 msg, text = self._msgobj('msg_01.txt')
2411 eq(msg.get_content_type(), 'text/plain')
2412 eq(msg.get_content_maintype(), 'text')
2413 eq(msg.get_content_subtype(), 'plain')
2414 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2415 eq(msg.get_param('charset'), 'us-ascii')
2416 eq(msg.preamble, None)
2417 eq(msg.epilogue, None)
2418 self._idempotent(msg, text)
2419
2420 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002421 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002422 msg, text = self._msgobj('msg_03.txt')
2423 eq(msg.get_content_type(), 'text/plain')
2424 eq(msg.get_params(), None)
2425 eq(msg.get_param('charset'), None)
2426 self._idempotent(msg, text)
2427
2428 def test_simple_multipart(self):
2429 msg, text = self._msgobj('msg_04.txt')
2430 self._idempotent(msg, text)
2431
2432 def test_MIME_digest(self):
2433 msg, text = self._msgobj('msg_02.txt')
2434 self._idempotent(msg, text)
2435
2436 def test_long_header(self):
2437 msg, text = self._msgobj('msg_27.txt')
2438 self._idempotent(msg, text)
2439
2440 def test_MIME_digest_with_part_headers(self):
2441 msg, text = self._msgobj('msg_28.txt')
2442 self._idempotent(msg, text)
2443
2444 def test_mixed_with_image(self):
2445 msg, text = self._msgobj('msg_06.txt')
2446 self._idempotent(msg, text)
2447
2448 def test_multipart_report(self):
2449 msg, text = self._msgobj('msg_05.txt')
2450 self._idempotent(msg, text)
2451
2452 def test_dsn(self):
2453 msg, text = self._msgobj('msg_16.txt')
2454 self._idempotent(msg, text)
2455
2456 def test_preamble_epilogue(self):
2457 msg, text = self._msgobj('msg_21.txt')
2458 self._idempotent(msg, text)
2459
2460 def test_multipart_one_part(self):
2461 msg, text = self._msgobj('msg_23.txt')
2462 self._idempotent(msg, text)
2463
2464 def test_multipart_no_parts(self):
2465 msg, text = self._msgobj('msg_24.txt')
2466 self._idempotent(msg, text)
2467
2468 def test_no_start_boundary(self):
2469 msg, text = self._msgobj('msg_31.txt')
2470 self._idempotent(msg, text)
2471
2472 def test_rfc2231_charset(self):
2473 msg, text = self._msgobj('msg_32.txt')
2474 self._idempotent(msg, text)
2475
2476 def test_more_rfc2231_parameters(self):
2477 msg, text = self._msgobj('msg_33.txt')
2478 self._idempotent(msg, text)
2479
2480 def test_text_plain_in_a_multipart_digest(self):
2481 msg, text = self._msgobj('msg_34.txt')
2482 self._idempotent(msg, text)
2483
2484 def test_nested_multipart_mixeds(self):
2485 msg, text = self._msgobj('msg_12a.txt')
2486 self._idempotent(msg, text)
2487
2488 def test_message_external_body_idempotent(self):
2489 msg, text = self._msgobj('msg_36.txt')
2490 self._idempotent(msg, text)
2491
R. David Murray719a4492010-11-21 16:53:48 +00002492 def test_message_delivery_status(self):
2493 msg, text = self._msgobj('msg_43.txt')
2494 self._idempotent(msg, text, unixfrom=True)
2495
R. David Murray96fd54e2010-10-08 15:55:28 +00002496 def test_message_signed_idempotent(self):
2497 msg, text = self._msgobj('msg_45.txt')
2498 self._idempotent(msg, text)
2499
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002500 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002501 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002502 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002503 # Get a message object and reset the seek pointer for other tests
2504 msg, text = self._msgobj('msg_05.txt')
2505 eq(msg.get_content_type(), 'multipart/report')
2506 # Test the Content-Type: parameters
2507 params = {}
2508 for pk, pv in msg.get_params():
2509 params[pk] = pv
2510 eq(params['report-type'], 'delivery-status')
2511 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002512 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2513 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002514 eq(len(msg.get_payload()), 3)
2515 # Make sure the subparts are what we expect
2516 msg1 = msg.get_payload(0)
2517 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002518 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002519 msg2 = msg.get_payload(1)
2520 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002521 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002522 msg3 = msg.get_payload(2)
2523 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002524 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002525 payload = msg3.get_payload()
2526 unless(isinstance(payload, list))
2527 eq(len(payload), 1)
2528 msg4 = payload[0]
2529 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002530 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002531
2532 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002533 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002534 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002535 msg, text = self._msgobj('msg_06.txt')
2536 # Check some of the outer headers
2537 eq(msg.get_content_type(), 'message/rfc822')
2538 # Make sure the payload is a list of exactly one sub-Message, and that
2539 # that submessage has a type of text/plain
2540 payload = msg.get_payload()
2541 unless(isinstance(payload, list))
2542 eq(len(payload), 1)
2543 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002544 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002545 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002546 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002547 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002548
2549
Ezio Melottib3aedd42010-11-20 19:04:17 +00002550
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002551# Test various other bits of the package's functionality
2552class TestMiscellaneous(TestEmailBase):
2553 def test_message_from_string(self):
2554 with openfile('msg_01.txt') as fp:
2555 text = fp.read()
2556 msg = email.message_from_string(text)
2557 s = StringIO()
2558 # Don't wrap/continue long headers since we're trying to test
2559 # idempotency.
2560 g = Generator(s, maxheaderlen=0)
2561 g.flatten(msg)
2562 self.assertEqual(text, s.getvalue())
2563
2564 def test_message_from_file(self):
2565 with openfile('msg_01.txt') as fp:
2566 text = fp.read()
2567 fp.seek(0)
2568 msg = email.message_from_file(fp)
2569 s = StringIO()
2570 # Don't wrap/continue long headers since we're trying to test
2571 # idempotency.
2572 g = Generator(s, maxheaderlen=0)
2573 g.flatten(msg)
2574 self.assertEqual(text, s.getvalue())
2575
2576 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002577 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578 with openfile('msg_01.txt') as fp:
2579 text = fp.read()
2580
2581 # Create a subclass
2582 class MyMessage(Message):
2583 pass
2584
2585 msg = email.message_from_string(text, MyMessage)
2586 unless(isinstance(msg, MyMessage))
2587 # Try something more complicated
2588 with openfile('msg_02.txt') as fp:
2589 text = fp.read()
2590 msg = email.message_from_string(text, MyMessage)
2591 for subpart in msg.walk():
2592 unless(isinstance(subpart, MyMessage))
2593
2594 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002595 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002596 # Create a subclass
2597 class MyMessage(Message):
2598 pass
2599
2600 with openfile('msg_01.txt') as fp:
2601 msg = email.message_from_file(fp, MyMessage)
2602 unless(isinstance(msg, MyMessage))
2603 # Try something more complicated
2604 with openfile('msg_02.txt') as fp:
2605 msg = email.message_from_file(fp, MyMessage)
2606 for subpart in msg.walk():
2607 unless(isinstance(subpart, MyMessage))
2608
2609 def test__all__(self):
2610 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002611 self.assertEqual(sorted(module.__all__), [
2612 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2613 'generator', 'header', 'iterators', 'message',
2614 'message_from_binary_file', 'message_from_bytes',
2615 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002616 'quoprimime', 'utils',
2617 ])
2618
2619 def test_formatdate(self):
2620 now = time.time()
2621 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2622 time.gmtime(now)[:6])
2623
2624 def test_formatdate_localtime(self):
2625 now = time.time()
2626 self.assertEqual(
2627 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2628 time.localtime(now)[:6])
2629
2630 def test_formatdate_usegmt(self):
2631 now = time.time()
2632 self.assertEqual(
2633 utils.formatdate(now, localtime=False),
2634 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2635 self.assertEqual(
2636 utils.formatdate(now, localtime=False, usegmt=True),
2637 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2638
2639 def test_parsedate_none(self):
2640 self.assertEqual(utils.parsedate(''), None)
2641
2642 def test_parsedate_compact(self):
2643 # The FWS after the comma is optional
2644 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2645 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2646
2647 def test_parsedate_no_dayofweek(self):
2648 eq = self.assertEqual
2649 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2650 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2651
2652 def test_parsedate_compact_no_dayofweek(self):
2653 eq = self.assertEqual
2654 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2655 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2656
R. David Murray4a62e892010-12-23 20:35:46 +00002657 def test_parsedate_no_space_before_positive_offset(self):
2658 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2659 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2660
2661 def test_parsedate_no_space_before_negative_offset(self):
2662 # Issue 1155362: we already handled '+' for this case.
2663 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2664 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2665
2666
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002667 def test_parsedate_acceptable_to_time_functions(self):
2668 eq = self.assertEqual
2669 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2670 t = int(time.mktime(timetup))
2671 eq(time.localtime(t)[:6], timetup[:6])
2672 eq(int(time.strftime('%Y', timetup)), 2003)
2673 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2674 t = int(time.mktime(timetup[:9]))
2675 eq(time.localtime(t)[:6], timetup[:6])
2676 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2677
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002678 def test_mktime_tz(self):
2679 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2680 -1, -1, -1, 0)), 0)
2681 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2682 -1, -1, -1, 1234)), -1234)
2683
R. David Murray219d1c82010-08-25 00:45:55 +00002684 def test_parsedate_y2k(self):
2685 """Test for parsing a date with a two-digit year.
2686
2687 Parsing a date with a two-digit year should return the correct
2688 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2689 obsoletes RFC822) requires four-digit years.
2690
2691 """
2692 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2693 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2694 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2695 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2696
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002697 def test_parseaddr_empty(self):
2698 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2699 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2700
2701 def test_noquote_dump(self):
2702 self.assertEqual(
2703 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2704 'A Silly Person <person@dom.ain>')
2705
2706 def test_escape_dump(self):
2707 self.assertEqual(
2708 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2709 r'"A \(Very\) Silly Person" <person@dom.ain>')
2710 a = r'A \(Special\) Person'
2711 b = 'person@dom.ain'
2712 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2713
2714 def test_escape_backslashes(self):
2715 self.assertEqual(
2716 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2717 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2718 a = r'Arthur \Backslash\ Foobar'
2719 b = 'person@dom.ain'
2720 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2721
2722 def test_name_with_dot(self):
2723 x = 'John X. Doe <jxd@example.com>'
2724 y = '"John X. Doe" <jxd@example.com>'
2725 a, b = ('John X. Doe', 'jxd@example.com')
2726 self.assertEqual(utils.parseaddr(x), (a, b))
2727 self.assertEqual(utils.parseaddr(y), (a, b))
2728 # formataddr() quotes the name if there's a dot in it
2729 self.assertEqual(utils.formataddr((a, b)), y)
2730
R. David Murray5397e862010-10-02 15:58:26 +00002731 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2732 # issue 10005. Note that in the third test the second pair of
2733 # backslashes is not actually a quoted pair because it is not inside a
2734 # comment or quoted string: the address being parsed has a quoted
2735 # string containing a quoted backslash, followed by 'example' and two
2736 # backslashes, followed by another quoted string containing a space and
2737 # the word 'example'. parseaddr copies those two backslashes
2738 # literally. Per rfc5322 this is not technically correct since a \ may
2739 # not appear in an address outside of a quoted string. It is probably
2740 # a sensible Postel interpretation, though.
2741 eq = self.assertEqual
2742 eq(utils.parseaddr('""example" example"@example.com'),
2743 ('', '""example" example"@example.com'))
2744 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2745 ('', '"\\"example\\" example"@example.com'))
2746 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2747 ('', '"\\\\"example\\\\" example"@example.com'))
2748
R. David Murray63563cd2010-12-18 18:25:38 +00002749 def test_parseaddr_preserves_spaces_in_local_part(self):
2750 # issue 9286. A normal RFC5322 local part should not contain any
2751 # folding white space, but legacy local parts can (they are a sequence
2752 # of atoms, not dotatoms). On the other hand we strip whitespace from
2753 # before the @ and around dots, on the assumption that the whitespace
2754 # around the punctuation is a mistake in what would otherwise be
2755 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2756 self.assertEqual(('', "merwok wok@xample.com"),
2757 utils.parseaddr("merwok wok@xample.com"))
2758 self.assertEqual(('', "merwok wok@xample.com"),
2759 utils.parseaddr("merwok wok@xample.com"))
2760 self.assertEqual(('', "merwok wok@xample.com"),
2761 utils.parseaddr(" merwok wok @xample.com"))
2762 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2763 utils.parseaddr('merwok"wok" wok@xample.com'))
2764 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2765 utils.parseaddr('merwok. wok . wok@xample.com'))
2766
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002767 def test_multiline_from_comment(self):
2768 x = """\
2769Foo
2770\tBar <foo@example.com>"""
2771 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2772
2773 def test_quote_dump(self):
2774 self.assertEqual(
2775 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2776 r'"A Silly; Person" <person@dom.ain>')
2777
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002778 def test_charset_richcomparisons(self):
2779 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002780 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 cset1 = Charset()
2782 cset2 = Charset()
2783 eq(cset1, 'us-ascii')
2784 eq(cset1, 'US-ASCII')
2785 eq(cset1, 'Us-AsCiI')
2786 eq('us-ascii', cset1)
2787 eq('US-ASCII', cset1)
2788 eq('Us-AsCiI', cset1)
2789 ne(cset1, 'usascii')
2790 ne(cset1, 'USASCII')
2791 ne(cset1, 'UsAsCiI')
2792 ne('usascii', cset1)
2793 ne('USASCII', cset1)
2794 ne('UsAsCiI', cset1)
2795 eq(cset1, cset2)
2796 eq(cset2, cset1)
2797
2798 def test_getaddresses(self):
2799 eq = self.assertEqual
2800 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2801 'Bud Person <bperson@dom.ain>']),
2802 [('Al Person', 'aperson@dom.ain'),
2803 ('Bud Person', 'bperson@dom.ain')])
2804
2805 def test_getaddresses_nasty(self):
2806 eq = self.assertEqual
2807 eq(utils.getaddresses(['foo: ;']), [('', '')])
2808 eq(utils.getaddresses(
2809 ['[]*-- =~$']),
2810 [('', ''), ('', ''), ('', '*--')])
2811 eq(utils.getaddresses(
2812 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2813 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2814
2815 def test_getaddresses_embedded_comment(self):
2816 """Test proper handling of a nested comment"""
2817 eq = self.assertEqual
2818 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2819 eq(addrs[0][1], 'foo@bar.com')
2820
2821 def test_utils_quote_unquote(self):
2822 eq = self.assertEqual
2823 msg = Message()
2824 msg.add_header('content-disposition', 'attachment',
2825 filename='foo\\wacky"name')
2826 eq(msg.get_filename(), 'foo\\wacky"name')
2827
2828 def test_get_body_encoding_with_bogus_charset(self):
2829 charset = Charset('not a charset')
2830 self.assertEqual(charset.get_body_encoding(), 'base64')
2831
2832 def test_get_body_encoding_with_uppercase_charset(self):
2833 eq = self.assertEqual
2834 msg = Message()
2835 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2836 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2837 charsets = msg.get_charsets()
2838 eq(len(charsets), 1)
2839 eq(charsets[0], 'utf-8')
2840 charset = Charset(charsets[0])
2841 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002842 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002843 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2844 eq(msg.get_payload(decode=True), b'hello world')
2845 eq(msg['content-transfer-encoding'], 'base64')
2846 # Try another one
2847 msg = Message()
2848 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2849 charsets = msg.get_charsets()
2850 eq(len(charsets), 1)
2851 eq(charsets[0], 'us-ascii')
2852 charset = Charset(charsets[0])
2853 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2854 msg.set_payload('hello world', charset=charset)
2855 eq(msg.get_payload(), 'hello world')
2856 eq(msg['content-transfer-encoding'], '7bit')
2857
2858 def test_charsets_case_insensitive(self):
2859 lc = Charset('us-ascii')
2860 uc = Charset('US-ASCII')
2861 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2862
2863 def test_partial_falls_inside_message_delivery_status(self):
2864 eq = self.ndiffAssertEqual
2865 # The Parser interface provides chunks of data to FeedParser in 8192
2866 # byte gulps. SF bug #1076485 found one of those chunks inside
2867 # message/delivery-status header block, which triggered an
2868 # unreadline() of NeedMoreData.
2869 msg = self._msgobj('msg_43.txt')
2870 sfp = StringIO()
2871 iterators._structure(msg, sfp)
2872 eq(sfp.getvalue(), """\
2873multipart/report
2874 text/plain
2875 message/delivery-status
2876 text/plain
2877 text/plain
2878 text/plain
2879 text/plain
2880 text/plain
2881 text/plain
2882 text/plain
2883 text/plain
2884 text/plain
2885 text/plain
2886 text/plain
2887 text/plain
2888 text/plain
2889 text/plain
2890 text/plain
2891 text/plain
2892 text/plain
2893 text/plain
2894 text/plain
2895 text/plain
2896 text/plain
2897 text/plain
2898 text/plain
2899 text/plain
2900 text/plain
2901 text/plain
2902 text/rfc822-headers
2903""")
2904
R. David Murraya0b44b52010-12-02 21:47:19 +00002905 def test_make_msgid_domain(self):
2906 self.assertEqual(
2907 email.utils.make_msgid(domain='testdomain-string')[-19:],
2908 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002909
R David Murraye67c6c52013-03-07 16:38:03 -05002910 def test_Generator_linend(self):
2911 # Issue 14645.
2912 with openfile('msg_26.txt', newline='\n') as f:
2913 msgtxt = f.read()
2914 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2915 msg = email.message_from_string(msgtxt)
2916 s = StringIO()
2917 g = email.generator.Generator(s)
2918 g.flatten(msg)
2919 self.assertEqual(s.getvalue(), msgtxt_nl)
2920
2921 def test_BytesGenerator_linend(self):
2922 # Issue 14645.
2923 with openfile('msg_26.txt', newline='\n') as f:
2924 msgtxt = f.read()
2925 msgtxt_nl = msgtxt.replace('\r\n', '\n')
2926 msg = email.message_from_string(msgtxt_nl)
2927 s = BytesIO()
2928 g = email.generator.BytesGenerator(s)
2929 g.flatten(msg, linesep='\r\n')
2930 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
2931
2932 def test_BytesGenerator_linend_with_non_ascii(self):
2933 # Issue 14645.
2934 with openfile('msg_26.txt', 'rb') as f:
2935 msgtxt = f.read()
2936 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
2937 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
2938 msg = email.message_from_bytes(msgtxt_nl)
2939 s = BytesIO()
2940 g = email.generator.BytesGenerator(s)
2941 g.flatten(msg, linesep='\r\n')
2942 self.assertEqual(s.getvalue(), msgtxt)
2943
Ezio Melottib3aedd42010-11-20 19:04:17 +00002944
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002945# Test the iterator/generators
2946class TestIterators(TestEmailBase):
2947 def test_body_line_iterator(self):
2948 eq = self.assertEqual
2949 neq = self.ndiffAssertEqual
2950 # First a simple non-multipart message
2951 msg = self._msgobj('msg_01.txt')
2952 it = iterators.body_line_iterator(msg)
2953 lines = list(it)
2954 eq(len(lines), 6)
2955 neq(EMPTYSTRING.join(lines), msg.get_payload())
2956 # Now a more complicated multipart
2957 msg = self._msgobj('msg_02.txt')
2958 it = iterators.body_line_iterator(msg)
2959 lines = list(it)
2960 eq(len(lines), 43)
2961 with openfile('msg_19.txt') as fp:
2962 neq(EMPTYSTRING.join(lines), fp.read())
2963
2964 def test_typed_subpart_iterator(self):
2965 eq = self.assertEqual
2966 msg = self._msgobj('msg_04.txt')
2967 it = iterators.typed_subpart_iterator(msg, 'text')
2968 lines = []
2969 subparts = 0
2970 for subpart in it:
2971 subparts += 1
2972 lines.append(subpart.get_payload())
2973 eq(subparts, 2)
2974 eq(EMPTYSTRING.join(lines), """\
2975a simple kind of mirror
2976to reflect upon our own
2977a simple kind of mirror
2978to reflect upon our own
2979""")
2980
2981 def test_typed_subpart_iterator_default_type(self):
2982 eq = self.assertEqual
2983 msg = self._msgobj('msg_03.txt')
2984 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2985 lines = []
2986 subparts = 0
2987 for subpart in it:
2988 subparts += 1
2989 lines.append(subpart.get_payload())
2990 eq(subparts, 1)
2991 eq(EMPTYSTRING.join(lines), """\
2992
2993Hi,
2994
2995Do you like this message?
2996
2997-Me
2998""")
2999
R. David Murray45bf773f2010-07-17 01:19:57 +00003000 def test_pushCR_LF(self):
3001 '''FeedParser BufferedSubFile.push() assumed it received complete
3002 line endings. A CR ending one push() followed by a LF starting
3003 the next push() added an empty line.
3004 '''
3005 imt = [
3006 ("a\r \n", 2),
3007 ("b", 0),
3008 ("c\n", 1),
3009 ("", 0),
3010 ("d\r\n", 1),
3011 ("e\r", 0),
3012 ("\nf", 1),
3013 ("\r\n", 1),
3014 ]
3015 from email.feedparser import BufferedSubFile, NeedMoreData
3016 bsf = BufferedSubFile()
3017 om = []
3018 nt = 0
3019 for il, n in imt:
3020 bsf.push(il)
3021 nt += n
3022 n1 = 0
3023 while True:
3024 ol = bsf.readline()
3025 if ol == NeedMoreData:
3026 break
3027 om.append(ol)
3028 n1 += 1
3029 self.assertTrue(n == n1)
3030 self.assertTrue(len(om) == nt)
3031 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3032
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003033
Ezio Melottib3aedd42010-11-20 19:04:17 +00003034
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035class TestParsers(TestEmailBase):
3036 def test_header_parser(self):
3037 eq = self.assertEqual
3038 # Parse only the headers of a complex multipart MIME document
3039 with openfile('msg_02.txt') as fp:
3040 msg = HeaderParser().parse(fp)
3041 eq(msg['from'], 'ppp-request@zzz.org')
3042 eq(msg['to'], 'ppp@zzz.org')
3043 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003044 self.assertFalse(msg.is_multipart())
3045 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003046
3047 def test_whitespace_continuation(self):
3048 eq = self.assertEqual
3049 # This message contains a line after the Subject: header that has only
3050 # whitespace, but it is not empty!
3051 msg = email.message_from_string("""\
3052From: aperson@dom.ain
3053To: bperson@dom.ain
3054Subject: the next line has a space on it
3055\x20
3056Date: Mon, 8 Apr 2002 15:09:19 -0400
3057Message-ID: spam
3058
3059Here's the message body
3060""")
3061 eq(msg['subject'], 'the next line has a space on it\n ')
3062 eq(msg['message-id'], 'spam')
3063 eq(msg.get_payload(), "Here's the message body\n")
3064
3065 def test_whitespace_continuation_last_header(self):
3066 eq = self.assertEqual
3067 # Like the previous test, but the subject line is the last
3068 # header.
3069 msg = email.message_from_string("""\
3070From: aperson@dom.ain
3071To: bperson@dom.ain
3072Date: Mon, 8 Apr 2002 15:09:19 -0400
3073Message-ID: spam
3074Subject: the next line has a space on it
3075\x20
3076
3077Here's the message body
3078""")
3079 eq(msg['subject'], 'the next line has a space on it\n ')
3080 eq(msg['message-id'], 'spam')
3081 eq(msg.get_payload(), "Here's the message body\n")
3082
3083 def test_crlf_separation(self):
3084 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003085 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003086 msg = Parser().parse(fp)
3087 eq(len(msg.get_payload()), 2)
3088 part1 = msg.get_payload(0)
3089 eq(part1.get_content_type(), 'text/plain')
3090 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3091 part2 = msg.get_payload(1)
3092 eq(part2.get_content_type(), 'application/riscos')
3093
R. David Murray8451c4b2010-10-23 22:19:56 +00003094 def test_crlf_flatten(self):
3095 # Using newline='\n' preserves the crlfs in this input file.
3096 with openfile('msg_26.txt', newline='\n') as fp:
3097 text = fp.read()
3098 msg = email.message_from_string(text)
3099 s = StringIO()
3100 g = Generator(s)
3101 g.flatten(msg, linesep='\r\n')
3102 self.assertEqual(s.getvalue(), text)
3103
3104 maxDiff = None
3105
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003106 def test_multipart_digest_with_extra_mime_headers(self):
3107 eq = self.assertEqual
3108 neq = self.ndiffAssertEqual
3109 with openfile('msg_28.txt') as fp:
3110 msg = email.message_from_file(fp)
3111 # Structure is:
3112 # multipart/digest
3113 # message/rfc822
3114 # text/plain
3115 # message/rfc822
3116 # text/plain
3117 eq(msg.is_multipart(), 1)
3118 eq(len(msg.get_payload()), 2)
3119 part1 = msg.get_payload(0)
3120 eq(part1.get_content_type(), 'message/rfc822')
3121 eq(part1.is_multipart(), 1)
3122 eq(len(part1.get_payload()), 1)
3123 part1a = part1.get_payload(0)
3124 eq(part1a.is_multipart(), 0)
3125 eq(part1a.get_content_type(), 'text/plain')
3126 neq(part1a.get_payload(), 'message 1\n')
3127 # next message/rfc822
3128 part2 = msg.get_payload(1)
3129 eq(part2.get_content_type(), 'message/rfc822')
3130 eq(part2.is_multipart(), 1)
3131 eq(len(part2.get_payload()), 1)
3132 part2a = part2.get_payload(0)
3133 eq(part2a.is_multipart(), 0)
3134 eq(part2a.get_content_type(), 'text/plain')
3135 neq(part2a.get_payload(), 'message 2\n')
3136
3137 def test_three_lines(self):
3138 # A bug report by Andrew McNamara
3139 lines = ['From: Andrew Person <aperson@dom.ain',
3140 'Subject: Test',
3141 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3142 msg = email.message_from_string(NL.join(lines))
3143 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3144
3145 def test_strip_line_feed_and_carriage_return_in_headers(self):
3146 eq = self.assertEqual
3147 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3148 value1 = 'text'
3149 value2 = 'more text'
3150 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3151 value1, value2)
3152 msg = email.message_from_string(m)
3153 eq(msg.get('Header'), value1)
3154 eq(msg.get('Next-Header'), value2)
3155
3156 def test_rfc2822_header_syntax(self):
3157 eq = self.assertEqual
3158 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3159 msg = email.message_from_string(m)
3160 eq(len(msg), 3)
3161 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3162 eq(msg.get_payload(), 'body')
3163
3164 def test_rfc2822_space_not_allowed_in_header(self):
3165 eq = self.assertEqual
3166 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3167 msg = email.message_from_string(m)
3168 eq(len(msg.keys()), 0)
3169
3170 def test_rfc2822_one_character_header(self):
3171 eq = self.assertEqual
3172 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3173 msg = email.message_from_string(m)
3174 headers = msg.keys()
3175 headers.sort()
3176 eq(headers, ['A', 'B', 'CC'])
3177 eq(msg.get_payload(), 'body')
3178
R. David Murray45e0e142010-06-16 02:19:40 +00003179 def test_CRLFLF_at_end_of_part(self):
3180 # issue 5610: feedparser should not eat two chars from body part ending
3181 # with "\r\n\n".
3182 m = (
3183 "From: foo@bar.com\n"
3184 "To: baz\n"
3185 "Mime-Version: 1.0\n"
3186 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3187 "\n"
3188 "--BOUNDARY\n"
3189 "Content-Type: text/plain\n"
3190 "\n"
3191 "body ending with CRLF newline\r\n"
3192 "\n"
3193 "--BOUNDARY--\n"
3194 )
3195 msg = email.message_from_string(m)
3196 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003197
Ezio Melottib3aedd42010-11-20 19:04:17 +00003198
R. David Murray96fd54e2010-10-08 15:55:28 +00003199class Test8BitBytesHandling(unittest.TestCase):
3200 # In Python3 all input is string, but that doesn't work if the actual input
3201 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3202 # decode byte streams using the surrogateescape error handler, and
3203 # reconvert to binary at appropriate places if we detect surrogates. This
3204 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3205 # but it does allow us to parse and preserve them, and to decode body
3206 # parts that use an 8bit CTE.
3207
3208 bodytest_msg = textwrap.dedent("""\
3209 From: foo@bar.com
3210 To: baz
3211 Mime-Version: 1.0
3212 Content-Type: text/plain; charset={charset}
3213 Content-Transfer-Encoding: {cte}
3214
3215 {bodyline}
3216 """)
3217
3218 def test_known_8bit_CTE(self):
3219 m = self.bodytest_msg.format(charset='utf-8',
3220 cte='8bit',
3221 bodyline='pöstal').encode('utf-8')
3222 msg = email.message_from_bytes(m)
3223 self.assertEqual(msg.get_payload(), "pöstal\n")
3224 self.assertEqual(msg.get_payload(decode=True),
3225 "pöstal\n".encode('utf-8'))
3226
3227 def test_unknown_8bit_CTE(self):
3228 m = self.bodytest_msg.format(charset='notavalidcharset',
3229 cte='8bit',
3230 bodyline='pöstal').encode('utf-8')
3231 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003232 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003233 self.assertEqual(msg.get_payload(decode=True),
3234 "pöstal\n".encode('utf-8'))
3235
3236 def test_8bit_in_quopri_body(self):
3237 # This is non-RFC compliant data...without 'decode' the library code
3238 # decodes the body using the charset from the headers, and because the
3239 # source byte really is utf-8 this works. This is likely to fail
3240 # against real dirty data (ie: produce mojibake), but the data is
3241 # invalid anyway so it is as good a guess as any. But this means that
3242 # this test just confirms the current behavior; that behavior is not
3243 # necessarily the best possible behavior. With 'decode' it is
3244 # returning the raw bytes, so that test should be of correct behavior,
3245 # or at least produce the same result that email4 did.
3246 m = self.bodytest_msg.format(charset='utf-8',
3247 cte='quoted-printable',
3248 bodyline='p=C3=B6stál').encode('utf-8')
3249 msg = email.message_from_bytes(m)
3250 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3251 self.assertEqual(msg.get_payload(decode=True),
3252 'pöstál\n'.encode('utf-8'))
3253
3254 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3255 # This is similar to the previous test, but proves that if the 8bit
3256 # byte is undecodeable in the specified charset, it gets replaced
3257 # by the unicode 'unknown' character. Again, this may or may not
3258 # be the ideal behavior. Note that if decode=False none of the
3259 # decoders will get involved, so this is the only test we need
3260 # for this behavior.
3261 m = self.bodytest_msg.format(charset='ascii',
3262 cte='quoted-printable',
3263 bodyline='p=C3=B6stál').encode('utf-8')
3264 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003265 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003266 self.assertEqual(msg.get_payload(decode=True),
3267 'pöstál\n'.encode('utf-8'))
3268
3269 def test_8bit_in_base64_body(self):
3270 # Sticking an 8bit byte in a base64 block makes it undecodable by
3271 # normal means, so the block is returned undecoded, but as bytes.
3272 m = self.bodytest_msg.format(charset='utf-8',
3273 cte='base64',
3274 bodyline='cMO2c3RhbAá=').encode('utf-8')
3275 msg = email.message_from_bytes(m)
3276 self.assertEqual(msg.get_payload(decode=True),
3277 'cMO2c3RhbAá=\n'.encode('utf-8'))
3278
3279 def test_8bit_in_uuencode_body(self):
3280 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3281 # normal means, so the block is returned undecoded, but as bytes.
3282 m = self.bodytest_msg.format(charset='utf-8',
3283 cte='uuencode',
3284 bodyline='<,.V<W1A; á ').encode('utf-8')
3285 msg = email.message_from_bytes(m)
3286 self.assertEqual(msg.get_payload(decode=True),
3287 '<,.V<W1A; á \n'.encode('utf-8'))
3288
3289
R. David Murray92532142011-01-07 23:25:30 +00003290 headertest_headers = (
3291 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3292 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3293 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3294 '\tJean de Baddie',
3295 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3296 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3297 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3298 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3299 )
3300 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3301 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003302
3303 def test_get_8bit_header(self):
3304 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003305 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3306 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003307
3308 def test_print_8bit_headers(self):
3309 msg = email.message_from_bytes(self.headertest_msg)
3310 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003311 textwrap.dedent("""\
3312 From: {}
3313 To: {}
3314 Subject: {}
3315 From: {}
3316
3317 Yes, they are flying.
3318 """).format(*[expected[1] for (_, expected) in
3319 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003320
3321 def test_values_with_8bit_headers(self):
3322 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003323 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003324 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003325 'b\uFFFD\uFFFDz',
3326 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3327 'coll\uFFFD\uFFFDgue, le pouf '
3328 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003329 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003330 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003331
3332 def test_items_with_8bit_headers(self):
3333 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003334 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003335 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003336 ('To', 'b\uFFFD\uFFFDz'),
3337 ('Subject', 'Maintenant je vous '
3338 'pr\uFFFD\uFFFDsente '
3339 'mon coll\uFFFD\uFFFDgue, le pouf '
3340 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3341 '\tJean de Baddie'),
3342 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003343
3344 def test_get_all_with_8bit_headers(self):
3345 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003346 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003347 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003348 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003349
R David Murraya2150232011-03-16 21:11:23 -04003350 def test_get_content_type_with_8bit(self):
3351 msg = email.message_from_bytes(textwrap.dedent("""\
3352 Content-Type: text/pl\xA7in; charset=utf-8
3353 """).encode('latin-1'))
3354 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3355 self.assertEqual(msg.get_content_maintype(), "text")
3356 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3357
3358 def test_get_params_with_8bit(self):
3359 msg = email.message_from_bytes(
3360 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3361 self.assertEqual(msg.get_params(header='x-header'),
3362 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3363 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3364 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3365 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3366
3367 def test_get_rfc2231_params_with_8bit(self):
3368 msg = email.message_from_bytes(textwrap.dedent("""\
3369 Content-Type: text/plain; charset=us-ascii;
3370 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3371 ).encode('latin-1'))
3372 self.assertEqual(msg.get_param('title'),
3373 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3374
3375 def test_set_rfc2231_params_with_8bit(self):
3376 msg = email.message_from_bytes(textwrap.dedent("""\
3377 Content-Type: text/plain; charset=us-ascii;
3378 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3379 ).encode('latin-1'))
3380 msg.set_param('title', 'test')
3381 self.assertEqual(msg.get_param('title'), 'test')
3382
3383 def test_del_rfc2231_params_with_8bit(self):
3384 msg = email.message_from_bytes(textwrap.dedent("""\
3385 Content-Type: text/plain; charset=us-ascii;
3386 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3387 ).encode('latin-1'))
3388 msg.del_param('title')
3389 self.assertEqual(msg.get_param('title'), None)
3390 self.assertEqual(msg.get_content_maintype(), 'text')
3391
3392 def test_get_payload_with_8bit_cte_header(self):
3393 msg = email.message_from_bytes(textwrap.dedent("""\
3394 Content-Transfer-Encoding: b\xa7se64
3395 Content-Type: text/plain; charset=latin-1
3396
3397 payload
3398 """).encode('latin-1'))
3399 self.assertEqual(msg.get_payload(), 'payload\n')
3400 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3401
R. David Murray96fd54e2010-10-08 15:55:28 +00003402 non_latin_bin_msg = textwrap.dedent("""\
3403 From: foo@bar.com
3404 To: báz
3405 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3406 \tJean de Baddie
3407 Mime-Version: 1.0
3408 Content-Type: text/plain; charset="utf-8"
3409 Content-Transfer-Encoding: 8bit
3410
3411 Да, они летят.
3412 """).encode('utf-8')
3413
3414 def test_bytes_generator(self):
3415 msg = email.message_from_bytes(self.non_latin_bin_msg)
3416 out = BytesIO()
3417 email.generator.BytesGenerator(out).flatten(msg)
3418 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3419
R. David Murray7372a072011-01-26 21:21:32 +00003420 def test_bytes_generator_handles_None_body(self):
3421 #Issue 11019
3422 msg = email.message.Message()
3423 out = BytesIO()
3424 email.generator.BytesGenerator(out).flatten(msg)
3425 self.assertEqual(out.getvalue(), b"\n")
3426
R. David Murray92532142011-01-07 23:25:30 +00003427 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003428 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003429 To: =?unknown-8bit?q?b=C3=A1z?=
3430 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3431 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3432 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003433 Mime-Version: 1.0
3434 Content-Type: text/plain; charset="utf-8"
3435 Content-Transfer-Encoding: base64
3436
3437 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3438 """)
3439
3440 def test_generator_handles_8bit(self):
3441 msg = email.message_from_bytes(self.non_latin_bin_msg)
3442 out = StringIO()
3443 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003444 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003445
3446 def test_bytes_generator_with_unix_from(self):
3447 # The unixfrom contains a current date, so we can't check it
3448 # literally. Just make sure the first word is 'From' and the
3449 # rest of the message matches the input.
3450 msg = email.message_from_bytes(self.non_latin_bin_msg)
3451 out = BytesIO()
3452 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3453 lines = out.getvalue().split(b'\n')
3454 self.assertEqual(lines[0].split()[0], b'From')
3455 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3456
R. David Murray92532142011-01-07 23:25:30 +00003457 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3458 non_latin_bin_msg_as7bit[2:4] = [
3459 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3460 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3461 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3462
R. David Murray96fd54e2010-10-08 15:55:28 +00003463 def test_message_from_binary_file(self):
3464 fn = 'test.msg'
3465 self.addCleanup(unlink, fn)
3466 with open(fn, 'wb') as testfile:
3467 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003468 with open(fn, 'rb') as testfile:
3469 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003470 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3471
3472 latin_bin_msg = textwrap.dedent("""\
3473 From: foo@bar.com
3474 To: Dinsdale
3475 Subject: Nudge nudge, wink, wink
3476 Mime-Version: 1.0
3477 Content-Type: text/plain; charset="latin-1"
3478 Content-Transfer-Encoding: 8bit
3479
3480 oh là là, know what I mean, know what I mean?
3481 """).encode('latin-1')
3482
3483 latin_bin_msg_as7bit = textwrap.dedent("""\
3484 From: foo@bar.com
3485 To: Dinsdale
3486 Subject: Nudge nudge, wink, wink
3487 Mime-Version: 1.0
3488 Content-Type: text/plain; charset="iso-8859-1"
3489 Content-Transfer-Encoding: quoted-printable
3490
3491 oh l=E0 l=E0, know what I mean, know what I mean?
3492 """)
3493
3494 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3495 m = email.message_from_bytes(self.latin_bin_msg)
3496 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3497
3498 def test_decoded_generator_emits_unicode_body(self):
3499 m = email.message_from_bytes(self.latin_bin_msg)
3500 out = StringIO()
3501 email.generator.DecodedGenerator(out).flatten(m)
3502 #DecodedHeader output contains an extra blank line compared
3503 #to the input message. RDM: not sure if this is a bug or not,
3504 #but it is not specific to the 8bit->7bit conversion.
3505 self.assertEqual(out.getvalue(),
3506 self.latin_bin_msg.decode('latin-1')+'\n')
3507
3508 def test_bytes_feedparser(self):
3509 bfp = email.feedparser.BytesFeedParser()
3510 for i in range(0, len(self.latin_bin_msg), 10):
3511 bfp.feed(self.latin_bin_msg[i:i+10])
3512 m = bfp.close()
3513 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3514
R. David Murray8451c4b2010-10-23 22:19:56 +00003515 def test_crlf_flatten(self):
3516 with openfile('msg_26.txt', 'rb') as fp:
3517 text = fp.read()
3518 msg = email.message_from_bytes(text)
3519 s = BytesIO()
3520 g = email.generator.BytesGenerator(s)
3521 g.flatten(msg, linesep='\r\n')
3522 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003523
3524 def test_8bit_multipart(self):
3525 # Issue 11605
3526 source = textwrap.dedent("""\
3527 Date: Fri, 18 Mar 2011 17:15:43 +0100
3528 To: foo@example.com
3529 From: foodwatch-Newsletter <bar@example.com>
3530 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3531 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3532 MIME-Version: 1.0
3533 Content-Type: multipart/alternative;
3534 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3535
3536 --b1_76a486bee62b0d200f33dc2ca08220ad
3537 Content-Type: text/plain; charset="utf-8"
3538 Content-Transfer-Encoding: 8bit
3539
3540 Guten Tag, ,
3541
3542 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3543 Nachrichten aus Japan.
3544
3545
3546 --b1_76a486bee62b0d200f33dc2ca08220ad
3547 Content-Type: text/html; charset="utf-8"
3548 Content-Transfer-Encoding: 8bit
3549
3550 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3551 "http://www.w3.org/TR/html4/loose.dtd">
3552 <html lang="de">
3553 <head>
3554 <title>foodwatch - Newsletter</title>
3555 </head>
3556 <body>
3557 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3558 die Nachrichten aus Japan.</p>
3559 </body>
3560 </html>
3561 --b1_76a486bee62b0d200f33dc2ca08220ad--
3562
3563 """).encode('utf-8')
3564 msg = email.message_from_bytes(source)
3565 s = BytesIO()
3566 g = email.generator.BytesGenerator(s)
3567 g.flatten(msg)
3568 self.assertEqual(s.getvalue(), source)
3569
R David Murray9fd170e2012-03-14 14:05:03 -04003570 def test_bytes_generator_b_encoding_linesep(self):
3571 # Issue 14062: b encoding was tacking on an extra \n.
3572 m = Message()
3573 # This has enough non-ascii that it should always end up b encoded.
3574 m['Subject'] = Header('žluťoučký kůň')
3575 s = BytesIO()
3576 g = email.generator.BytesGenerator(s)
3577 g.flatten(m, linesep='\r\n')
3578 self.assertEqual(
3579 s.getvalue(),
3580 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3581
3582 def test_generator_b_encoding_linesep(self):
3583 # Since this broke in ByteGenerator, test Generator for completeness.
3584 m = Message()
3585 # This has enough non-ascii that it should always end up b encoded.
3586 m['Subject'] = Header('žluťoučký kůň')
3587 s = StringIO()
3588 g = email.generator.Generator(s)
3589 g.flatten(m, linesep='\r\n')
3590 self.assertEqual(
3591 s.getvalue(),
3592 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3593
R. David Murray8451c4b2010-10-23 22:19:56 +00003594 maxDiff = None
3595
Ezio Melottib3aedd42010-11-20 19:04:17 +00003596
R. David Murray719a4492010-11-21 16:53:48 +00003597class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003598
R. David Murraye5db2632010-11-20 15:10:13 +00003599 maxDiff = None
3600
R. David Murray96fd54e2010-10-08 15:55:28 +00003601 def _msgobj(self, filename):
3602 with openfile(filename, 'rb') as fp:
3603 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003604 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003605 msg = email.message_from_bytes(data)
3606 return msg, data
3607
R. David Murray719a4492010-11-21 16:53:48 +00003608 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003609 b = BytesIO()
3610 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003611 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003612 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003613
R. David Murraye5db2632010-11-20 15:10:13 +00003614 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003615 # Not using self.blinesep here is intentional. This way the output
3616 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003617 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3618
3619
R. David Murray719a4492010-11-21 16:53:48 +00003620class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3621 TestIdempotent):
3622 linesep = '\n'
3623 blinesep = b'\n'
3624 normalize_linesep_regex = re.compile(br'\r\n')
3625
3626
3627class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3628 TestIdempotent):
3629 linesep = '\r\n'
3630 blinesep = b'\r\n'
3631 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3632
Ezio Melottib3aedd42010-11-20 19:04:17 +00003633
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003634class TestBase64(unittest.TestCase):
3635 def test_len(self):
3636 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003637 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003638 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003639 for size in range(15):
3640 if size == 0 : bsize = 0
3641 elif size <= 3 : bsize = 4
3642 elif size <= 6 : bsize = 8
3643 elif size <= 9 : bsize = 12
3644 elif size <= 12: bsize = 16
3645 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003646 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003647
3648 def test_decode(self):
3649 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003650 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003651 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003652
3653 def test_encode(self):
3654 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003655 eq(base64mime.body_encode(b''), b'')
3656 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003657 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003658 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003659 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003660 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003661eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3662eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3663eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3664eHh4eCB4eHh4IA==
3665""")
3666 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003667 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003668 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3670eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3671eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3672eHh4eCB4eHh4IA==\r
3673""")
3674
3675 def test_header_encode(self):
3676 eq = self.assertEqual
3677 he = base64mime.header_encode
3678 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003679 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3680 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003681 # Test the charset option
3682 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3683 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684
3685
Ezio Melottib3aedd42010-11-20 19:04:17 +00003686
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003687class TestQuopri(unittest.TestCase):
3688 def setUp(self):
3689 # Set of characters (as byte integers) that don't need to be encoded
3690 # in headers.
3691 self.hlit = list(chain(
3692 range(ord('a'), ord('z') + 1),
3693 range(ord('A'), ord('Z') + 1),
3694 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003695 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003696 # Set of characters (as byte integers) that do need to be encoded in
3697 # headers.
3698 self.hnon = [c for c in range(256) if c not in self.hlit]
3699 assert len(self.hlit) + len(self.hnon) == 256
3700 # Set of characters (as byte integers) that don't need to be encoded
3701 # in bodies.
3702 self.blit = list(range(ord(' '), ord('~') + 1))
3703 self.blit.append(ord('\t'))
3704 self.blit.remove(ord('='))
3705 # Set of characters (as byte integers) that do need to be encoded in
3706 # bodies.
3707 self.bnon = [c for c in range(256) if c not in self.blit]
3708 assert len(self.blit) + len(self.bnon) == 256
3709
Guido van Rossum9604e662007-08-30 03:46:43 +00003710 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003711 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003712 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003713 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003714 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003715 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003716 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003717
Guido van Rossum9604e662007-08-30 03:46:43 +00003718 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003720 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003721 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003723 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003724 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003725
3726 def test_header_quopri_len(self):
3727 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003728 eq(quoprimime.header_length(b'hello'), 5)
3729 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003730 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003731 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003732 # =?xxx?q?...?= means 10 extra characters
3733 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003734 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3735 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003737 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738 # =?xxx?q?...?= means 10 extra characters
3739 10)
3740 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003741 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003742 'expected length 1 for %r' % chr(c))
3743 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003744 # Space is special; it's encoded to _
3745 if c == ord(' '):
3746 continue
3747 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003749 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750
3751 def test_body_quopri_len(self):
3752 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003753 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003754 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003755 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003756 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757
3758 def test_quote_unquote_idempotent(self):
3759 for x in range(256):
3760 c = chr(x)
3761 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3762
R David Murrayec1b5b82011-03-23 14:19:05 -04003763 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3764 if charset is None:
3765 encoded_header = quoprimime.header_encode(header)
3766 else:
3767 encoded_header = quoprimime.header_encode(header, charset)
3768 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769
R David Murraycafd79d2011-03-23 15:25:55 -04003770 def test_header_encode_null(self):
3771 self._test_header_encode(b'', '')
3772
R David Murrayec1b5b82011-03-23 14:19:05 -04003773 def test_header_encode_one_word(self):
3774 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3775
3776 def test_header_encode_two_lines(self):
3777 self._test_header_encode(b'hello\nworld',
3778 '=?iso-8859-1?q?hello=0Aworld?=')
3779
3780 def test_header_encode_non_ascii(self):
3781 self._test_header_encode(b'hello\xc7there',
3782 '=?iso-8859-1?q?hello=C7there?=')
3783
3784 def test_header_encode_alt_charset(self):
3785 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3786 charset='iso-8859-2')
3787
3788 def _test_header_decode(self, encoded_header, expected_decoded_header):
3789 decoded_header = quoprimime.header_decode(encoded_header)
3790 self.assertEqual(decoded_header, expected_decoded_header)
3791
3792 def test_header_decode_null(self):
3793 self._test_header_decode('', '')
3794
3795 def test_header_decode_one_word(self):
3796 self._test_header_decode('hello', 'hello')
3797
3798 def test_header_decode_two_lines(self):
3799 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3800
3801 def test_header_decode_non_ascii(self):
3802 self._test_header_decode('hello=C7there', 'hello\xc7there')
3803
3804 def _test_decode(self, encoded, expected_decoded, eol=None):
3805 if eol is None:
3806 decoded = quoprimime.decode(encoded)
3807 else:
3808 decoded = quoprimime.decode(encoded, eol=eol)
3809 self.assertEqual(decoded, expected_decoded)
3810
3811 def test_decode_null_word(self):
3812 self._test_decode('', '')
3813
3814 def test_decode_null_line_null_word(self):
3815 self._test_decode('\r\n', '\n')
3816
3817 def test_decode_one_word(self):
3818 self._test_decode('hello', 'hello')
3819
3820 def test_decode_one_word_eol(self):
3821 self._test_decode('hello', 'hello', eol='X')
3822
3823 def test_decode_one_line(self):
3824 self._test_decode('hello\r\n', 'hello\n')
3825
3826 def test_decode_one_line_lf(self):
3827 self._test_decode('hello\n', 'hello\n')
3828
R David Murraycafd79d2011-03-23 15:25:55 -04003829 def test_decode_one_line_cr(self):
3830 self._test_decode('hello\r', 'hello\n')
3831
3832 def test_decode_one_line_nl(self):
3833 self._test_decode('hello\n', 'helloX', eol='X')
3834
3835 def test_decode_one_line_crnl(self):
3836 self._test_decode('hello\r\n', 'helloX', eol='X')
3837
R David Murrayec1b5b82011-03-23 14:19:05 -04003838 def test_decode_one_line_one_word(self):
3839 self._test_decode('hello\r\nworld', 'hello\nworld')
3840
3841 def test_decode_one_line_one_word_eol(self):
3842 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3843
3844 def test_decode_two_lines(self):
3845 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3846
R David Murraycafd79d2011-03-23 15:25:55 -04003847 def test_decode_two_lines_eol(self):
3848 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3849
R David Murrayec1b5b82011-03-23 14:19:05 -04003850 def test_decode_one_long_line(self):
3851 self._test_decode('Spam' * 250, 'Spam' * 250)
3852
3853 def test_decode_one_space(self):
3854 self._test_decode(' ', '')
3855
3856 def test_decode_multiple_spaces(self):
3857 self._test_decode(' ' * 5, '')
3858
3859 def test_decode_one_line_trailing_spaces(self):
3860 self._test_decode('hello \r\n', 'hello\n')
3861
3862 def test_decode_two_lines_trailing_spaces(self):
3863 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3864
3865 def test_decode_quoted_word(self):
3866 self._test_decode('=22quoted=20words=22', '"quoted words"')
3867
3868 def test_decode_uppercase_quoting(self):
3869 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3870
3871 def test_decode_lowercase_quoting(self):
3872 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3873
3874 def test_decode_soft_line_break(self):
3875 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3876
3877 def test_decode_false_quoting(self):
3878 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3879
3880 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3881 kwargs = {}
3882 if maxlinelen is None:
3883 # Use body_encode's default.
3884 maxlinelen = 76
3885 else:
3886 kwargs['maxlinelen'] = maxlinelen
3887 if eol is None:
3888 # Use body_encode's default.
3889 eol = '\n'
3890 else:
3891 kwargs['eol'] = eol
3892 encoded_body = quoprimime.body_encode(body, **kwargs)
3893 self.assertEqual(encoded_body, expected_encoded_body)
3894 if eol == '\n' or eol == '\r\n':
3895 # We know how to split the result back into lines, so maxlinelen
3896 # can be checked.
3897 for line in encoded_body.splitlines():
3898 self.assertLessEqual(len(line), maxlinelen)
3899
3900 def test_encode_null(self):
3901 self._test_encode('', '')
3902
3903 def test_encode_null_lines(self):
3904 self._test_encode('\n\n', '\n\n')
3905
3906 def test_encode_one_line(self):
3907 self._test_encode('hello\n', 'hello\n')
3908
3909 def test_encode_one_line_crlf(self):
3910 self._test_encode('hello\r\n', 'hello\n')
3911
3912 def test_encode_one_line_eol(self):
3913 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3914
3915 def test_encode_one_space(self):
3916 self._test_encode(' ', '=20')
3917
3918 def test_encode_one_line_one_space(self):
3919 self._test_encode(' \n', '=20\n')
3920
R David Murrayb938c8c2011-03-24 12:19:26 -04003921# XXX: body_encode() expect strings, but uses ord(char) from these strings
3922# to index into a 256-entry list. For code points above 255, this will fail.
3923# Should there be a check for 8-bit only ord() values in body, or at least
3924# a comment about the expected input?
3925
3926 def test_encode_two_lines_one_space(self):
3927 self._test_encode(' \n \n', '=20\n=20\n')
3928
R David Murrayec1b5b82011-03-23 14:19:05 -04003929 def test_encode_one_word_trailing_spaces(self):
3930 self._test_encode('hello ', 'hello =20')
3931
3932 def test_encode_one_line_trailing_spaces(self):
3933 self._test_encode('hello \n', 'hello =20\n')
3934
3935 def test_encode_one_word_trailing_tab(self):
3936 self._test_encode('hello \t', 'hello =09')
3937
3938 def test_encode_one_line_trailing_tab(self):
3939 self._test_encode('hello \t\n', 'hello =09\n')
3940
3941 def test_encode_trailing_space_before_maxlinelen(self):
3942 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3943
R David Murrayb938c8c2011-03-24 12:19:26 -04003944 def test_encode_trailing_space_at_maxlinelen(self):
3945 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3946
R David Murrayec1b5b82011-03-23 14:19:05 -04003947 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003948 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3949
3950 def test_encode_whitespace_lines(self):
3951 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003952
3953 def test_encode_quoted_equals(self):
3954 self._test_encode('a = b', 'a =3D b')
3955
3956 def test_encode_one_long_string(self):
3957 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3958
3959 def test_encode_one_long_line(self):
3960 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3961
3962 def test_encode_one_very_long_line(self):
3963 self._test_encode('x' * 200 + '\n',
3964 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3965
3966 def test_encode_one_long_line(self):
3967 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3968
3969 def test_encode_shortest_maxlinelen(self):
3970 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971
R David Murrayb938c8c2011-03-24 12:19:26 -04003972 def test_encode_maxlinelen_too_small(self):
3973 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3974
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003975 def test_encode(self):
3976 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003977 eq(quoprimime.body_encode(''), '')
3978 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003980 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003981 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003982 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3984 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3985x xxxx xxxx xxxx xxxx=20""")
3986 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003987 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3988 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003989xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3990 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3991x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003992 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993one line
3994
3995two line"""), """\
3996one line
3997
3998two line""")
3999
4000
Ezio Melottib3aedd42010-11-20 19:04:17 +00004001
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002# Test the Charset class
4003class TestCharset(unittest.TestCase):
4004 def tearDown(self):
4005 from email import charset as CharsetModule
4006 try:
4007 del CharsetModule.CHARSETS['fake']
4008 except KeyError:
4009 pass
4010
Guido van Rossum9604e662007-08-30 03:46:43 +00004011 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004012 eq = self.assertEqual
4013 # Make sure us-ascii = no Unicode conversion
4014 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004015 eq(c.header_encode('Hello World!'), 'Hello World!')
4016 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004017 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004018 self.assertRaises(UnicodeError, c.header_encode, s)
4019 c = Charset('utf-8')
4020 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021
4022 def test_body_encode(self):
4023 eq = self.assertEqual
4024 # Try a charset with QP body encoding
4025 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004026 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004027 # Try a charset with Base64 body encoding
4028 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004029 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004030 # Try a charset with None body encoding
4031 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004032 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004033 # Try the convert argument, where input codec != output codec
4034 c = Charset('euc-jp')
4035 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004036 # XXX FIXME
4037## try:
4038## eq('\x1b$B5FCO;~IW\x1b(B',
4039## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4040## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4041## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4042## except LookupError:
4043## # We probably don't have the Japanese codecs installed
4044## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004045 # Testing SF bug #625509, which we have to fake, since there are no
4046 # built-in encodings where the header encoding is QP but the body
4047 # encoding is not.
4048 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004049 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004050 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004051 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004052
4053 def test_unicode_charset_name(self):
4054 charset = Charset('us-ascii')
4055 self.assertEqual(str(charset), 'us-ascii')
4056 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4057
4058
Ezio Melottib3aedd42010-11-20 19:04:17 +00004059
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060# Test multilingual MIME headers.
4061class TestHeader(TestEmailBase):
4062 def test_simple(self):
4063 eq = self.ndiffAssertEqual
4064 h = Header('Hello World!')
4065 eq(h.encode(), 'Hello World!')
4066 h.append(' Goodbye World!')
4067 eq(h.encode(), 'Hello World! Goodbye World!')
4068
4069 def test_simple_surprise(self):
4070 eq = self.ndiffAssertEqual
4071 h = Header('Hello World!')
4072 eq(h.encode(), 'Hello World!')
4073 h.append('Goodbye World!')
4074 eq(h.encode(), 'Hello World! Goodbye World!')
4075
4076 def test_header_needs_no_decoding(self):
4077 h = 'no decoding needed'
4078 self.assertEqual(decode_header(h), [(h, None)])
4079
4080 def test_long(self):
4081 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4082 maxlinelen=76)
4083 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004084 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004085
4086 def test_multilingual(self):
4087 eq = self.ndiffAssertEqual
4088 g = Charset("iso-8859-1")
4089 cz = Charset("iso-8859-2")
4090 utf8 = Charset("utf-8")
4091 g_head = (b'Die Mieter treten hier ein werden mit einem '
4092 b'Foerderband komfortabel den Korridor entlang, '
4093 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4094 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4095 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4096 b'd\xf9vtipu.. ')
4097 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4098 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4099 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4100 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4101 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4102 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4103 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4104 '\u3044\u307e\u3059\u3002')
4105 h = Header(g_head, g)
4106 h.append(cz_head, cz)
4107 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004108 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004109 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004110=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4111 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4112 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4113 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004114 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4115 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4116 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4117 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004118 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4119 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4120 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4121 decoded = decode_header(enc)
4122 eq(len(decoded), 3)
4123 eq(decoded[0], (g_head, 'iso-8859-1'))
4124 eq(decoded[1], (cz_head, 'iso-8859-2'))
4125 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004126 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004127 eq(ustr,
4128 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4129 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4130 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4131 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4132 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4133 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4134 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4135 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4136 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4137 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4138 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4139 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4140 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4141 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4142 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4143 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4144 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004145 # Test make_header()
4146 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004147 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004148
4149 def test_empty_header_encode(self):
4150 h = Header()
4151 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004152
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004153 def test_header_ctor_default_args(self):
4154 eq = self.ndiffAssertEqual
4155 h = Header()
4156 eq(h, '')
4157 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004158 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004159
4160 def test_explicit_maxlinelen(self):
4161 eq = self.ndiffAssertEqual
4162 hstr = ('A very long line that must get split to something other '
4163 'than at the 76th character boundary to test the non-default '
4164 'behavior')
4165 h = Header(hstr)
4166 eq(h.encode(), '''\
4167A very long line that must get split to something other than at the 76th
4168 character boundary to test the non-default behavior''')
4169 eq(str(h), hstr)
4170 h = Header(hstr, header_name='Subject')
4171 eq(h.encode(), '''\
4172A very long line that must get split to something other than at the
4173 76th character boundary to test the non-default behavior''')
4174 eq(str(h), hstr)
4175 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4176 eq(h.encode(), hstr)
4177 eq(str(h), hstr)
4178
Guido van Rossum9604e662007-08-30 03:46:43 +00004179 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004180 eq = self.ndiffAssertEqual
4181 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004182 x = 'xxxx ' * 20
4183 h.append(x)
4184 s = h.encode()
4185 eq(s, """\
4186=?iso-8859-1?q?xxx?=
4187 =?iso-8859-1?q?x_?=
4188 =?iso-8859-1?q?xx?=
4189 =?iso-8859-1?q?xx?=
4190 =?iso-8859-1?q?_x?=
4191 =?iso-8859-1?q?xx?=
4192 =?iso-8859-1?q?x_?=
4193 =?iso-8859-1?q?xx?=
4194 =?iso-8859-1?q?xx?=
4195 =?iso-8859-1?q?_x?=
4196 =?iso-8859-1?q?xx?=
4197 =?iso-8859-1?q?x_?=
4198 =?iso-8859-1?q?xx?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?_x?=
4201 =?iso-8859-1?q?xx?=
4202 =?iso-8859-1?q?x_?=
4203 =?iso-8859-1?q?xx?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?_x?=
4206 =?iso-8859-1?q?xx?=
4207 =?iso-8859-1?q?x_?=
4208 =?iso-8859-1?q?xx?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?_x?=
4211 =?iso-8859-1?q?xx?=
4212 =?iso-8859-1?q?x_?=
4213 =?iso-8859-1?q?xx?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?_x?=
4216 =?iso-8859-1?q?xx?=
4217 =?iso-8859-1?q?x_?=
4218 =?iso-8859-1?q?xx?=
4219 =?iso-8859-1?q?xx?=
4220 =?iso-8859-1?q?_x?=
4221 =?iso-8859-1?q?xx?=
4222 =?iso-8859-1?q?x_?=
4223 =?iso-8859-1?q?xx?=
4224 =?iso-8859-1?q?xx?=
4225 =?iso-8859-1?q?_x?=
4226 =?iso-8859-1?q?xx?=
4227 =?iso-8859-1?q?x_?=
4228 =?iso-8859-1?q?xx?=
4229 =?iso-8859-1?q?xx?=
4230 =?iso-8859-1?q?_x?=
4231 =?iso-8859-1?q?xx?=
4232 =?iso-8859-1?q?x_?=
4233 =?iso-8859-1?q?xx?=
4234 =?iso-8859-1?q?xx?=
4235 =?iso-8859-1?q?_?=""")
4236 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004237 h = Header(charset='iso-8859-1', maxlinelen=40)
4238 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004239 s = h.encode()
4240 eq(s, """\
4241=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4242 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4243 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4244 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4245 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4246 eq(x, str(make_header(decode_header(s))))
4247
4248 def test_base64_splittable(self):
4249 eq = self.ndiffAssertEqual
4250 h = Header(charset='koi8-r', maxlinelen=20)
4251 x = 'xxxx ' * 20
4252 h.append(x)
4253 s = h.encode()
4254 eq(s, """\
4255=?koi8-r?b?eHh4?=
4256 =?koi8-r?b?eCB4?=
4257 =?koi8-r?b?eHh4?=
4258 =?koi8-r?b?IHh4?=
4259 =?koi8-r?b?eHgg?=
4260 =?koi8-r?b?eHh4?=
4261 =?koi8-r?b?eCB4?=
4262 =?koi8-r?b?eHh4?=
4263 =?koi8-r?b?IHh4?=
4264 =?koi8-r?b?eHgg?=
4265 =?koi8-r?b?eHh4?=
4266 =?koi8-r?b?eCB4?=
4267 =?koi8-r?b?eHh4?=
4268 =?koi8-r?b?IHh4?=
4269 =?koi8-r?b?eHgg?=
4270 =?koi8-r?b?eHh4?=
4271 =?koi8-r?b?eCB4?=
4272 =?koi8-r?b?eHh4?=
4273 =?koi8-r?b?IHh4?=
4274 =?koi8-r?b?eHgg?=
4275 =?koi8-r?b?eHh4?=
4276 =?koi8-r?b?eCB4?=
4277 =?koi8-r?b?eHh4?=
4278 =?koi8-r?b?IHh4?=
4279 =?koi8-r?b?eHgg?=
4280 =?koi8-r?b?eHh4?=
4281 =?koi8-r?b?eCB4?=
4282 =?koi8-r?b?eHh4?=
4283 =?koi8-r?b?IHh4?=
4284 =?koi8-r?b?eHgg?=
4285 =?koi8-r?b?eHh4?=
4286 =?koi8-r?b?eCB4?=
4287 =?koi8-r?b?eHh4?=
4288 =?koi8-r?b?IA==?=""")
4289 eq(x, str(make_header(decode_header(s))))
4290 h = Header(charset='koi8-r', maxlinelen=40)
4291 h.append(x)
4292 s = h.encode()
4293 eq(s, """\
4294=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4295 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4296 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4297 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4298 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4299 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4300 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004301
4302 def test_us_ascii_header(self):
4303 eq = self.assertEqual
4304 s = 'hello'
4305 x = decode_header(s)
4306 eq(x, [('hello', None)])
4307 h = make_header(x)
4308 eq(s, h.encode())
4309
4310 def test_string_charset(self):
4311 eq = self.assertEqual
4312 h = Header()
4313 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004314 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004315
4316## def test_unicode_error(self):
4317## raises = self.assertRaises
4318## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4319## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4320## h = Header()
4321## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4322## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4323## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4324
4325 def test_utf8_shortest(self):
4326 eq = self.assertEqual
4327 h = Header('p\xf6stal', 'utf-8')
4328 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4329 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4330 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4331
4332 def test_bad_8bit_header(self):
4333 raises = self.assertRaises
4334 eq = self.assertEqual
4335 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4336 raises(UnicodeError, Header, x)
4337 h = Header()
4338 raises(UnicodeError, h.append, x)
4339 e = x.decode('utf-8', 'replace')
4340 eq(str(Header(x, errors='replace')), e)
4341 h.append(x, errors='replace')
4342 eq(str(h), e)
4343
R David Murray041015c2011-03-25 15:10:55 -04004344 def test_escaped_8bit_header(self):
4345 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004346 e = x.decode('ascii', 'surrogateescape')
4347 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004348 self.assertEqual(str(h),
4349 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4350 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4351
R David Murraye5e366c2011-06-18 12:57:28 -04004352 def test_header_handles_binary_unknown8bit(self):
4353 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4354 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4355 self.assertEqual(str(h),
4356 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4357 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4358
4359 def test_make_header_handles_binary_unknown8bit(self):
4360 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4361 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4362 h2 = email.header.make_header(email.header.decode_header(h))
4363 self.assertEqual(str(h2),
4364 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4365 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4366
R David Murray041015c2011-03-25 15:10:55 -04004367 def test_modify_returned_list_does_not_change_header(self):
4368 h = Header('test')
4369 chunks = email.header.decode_header(h)
4370 chunks.append(('ascii', 'test2'))
4371 self.assertEqual(str(h), 'test')
4372
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004373 def test_encoded_adjacent_nonencoded(self):
4374 eq = self.assertEqual
4375 h = Header()
4376 h.append('hello', 'iso-8859-1')
4377 h.append('world')
4378 s = h.encode()
4379 eq(s, '=?iso-8859-1?q?hello?= world')
4380 h = make_header(decode_header(s))
4381 eq(h.encode(), s)
4382
4383 def test_whitespace_eater(self):
4384 eq = self.assertEqual
4385 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4386 parts = decode_header(s)
4387 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4388 hdr = make_header(parts)
4389 eq(hdr.encode(),
4390 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4391
4392 def test_broken_base64_header(self):
4393 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004394 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004395 raises(errors.HeaderParseError, decode_header, s)
4396
R. David Murray477efb32011-01-05 01:39:32 +00004397 def test_shift_jis_charset(self):
4398 h = Header('文', charset='shift_jis')
4399 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4400
R David Murrayde912762011-03-16 18:26:23 -04004401 def test_flatten_header_with_no_value(self):
4402 # Issue 11401 (regression from email 4.x) Note that the space after
4403 # the header doesn't reflect the input, but this is also the way
4404 # email 4.x behaved. At some point it would be nice to fix that.
4405 msg = email.message_from_string("EmptyHeader:")
4406 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4407
R David Murray01581ee2011-04-18 10:04:34 -04004408 def test_encode_preserves_leading_ws_on_value(self):
4409 msg = Message()
4410 msg['SomeHeader'] = ' value with leading ws'
4411 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4412
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004413
Ezio Melottib3aedd42010-11-20 19:04:17 +00004414
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004415# Test RFC 2231 header parameters (en/de)coding
4416class TestRFC2231(TestEmailBase):
4417 def test_get_param(self):
4418 eq = self.assertEqual
4419 msg = self._msgobj('msg_29.txt')
4420 eq(msg.get_param('title'),
4421 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4422 eq(msg.get_param('title', unquote=False),
4423 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4424
4425 def test_set_param(self):
4426 eq = self.ndiffAssertEqual
4427 msg = Message()
4428 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4429 charset='us-ascii')
4430 eq(msg.get_param('title'),
4431 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4432 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4433 charset='us-ascii', language='en')
4434 eq(msg.get_param('title'),
4435 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4436 msg = self._msgobj('msg_01.txt')
4437 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4438 charset='us-ascii', language='en')
4439 eq(msg.as_string(maxheaderlen=78), """\
4440Return-Path: <bbb@zzz.org>
4441Delivered-To: bbb@zzz.org
4442Received: by mail.zzz.org (Postfix, from userid 889)
4443\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4444MIME-Version: 1.0
4445Content-Transfer-Encoding: 7bit
4446Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4447From: bbb@ddd.com (John X. Doe)
4448To: bbb@zzz.org
4449Subject: This is a test message
4450Date: Fri, 4 May 2001 14:05:44 -0400
4451Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004452 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004453
4454
4455Hi,
4456
4457Do you like this message?
4458
4459-Me
4460""")
4461
R David Murraya2860e82011-04-16 09:20:30 -04004462 def test_set_param_requote(self):
4463 msg = Message()
4464 msg.set_param('title', 'foo')
4465 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4466 msg.set_param('title', 'bar', requote=False)
4467 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4468 # tspecial is still quoted.
4469 msg.set_param('title', "(bar)bell", requote=False)
4470 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4471
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004472 def test_del_param(self):
4473 eq = self.ndiffAssertEqual
4474 msg = self._msgobj('msg_01.txt')
4475 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4476 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4477 charset='us-ascii', language='en')
4478 msg.del_param('foo', header='Content-Type')
4479 eq(msg.as_string(maxheaderlen=78), """\
4480Return-Path: <bbb@zzz.org>
4481Delivered-To: bbb@zzz.org
4482Received: by mail.zzz.org (Postfix, from userid 889)
4483\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4484MIME-Version: 1.0
4485Content-Transfer-Encoding: 7bit
4486Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4487From: bbb@ddd.com (John X. Doe)
4488To: bbb@zzz.org
4489Subject: This is a test message
4490Date: Fri, 4 May 2001 14:05:44 -0400
4491Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004492 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004493
4494
4495Hi,
4496
4497Do you like this message?
4498
4499-Me
4500""")
4501
4502 def test_rfc2231_get_content_charset(self):
4503 eq = self.assertEqual
4504 msg = self._msgobj('msg_32.txt')
4505 eq(msg.get_content_charset(), 'us-ascii')
4506
R. David Murraydfd7eb02010-12-24 22:36:49 +00004507 def test_rfc2231_parse_rfc_quoting(self):
4508 m = textwrap.dedent('''\
4509 Content-Disposition: inline;
4510 \tfilename*0*=''This%20is%20even%20more%20;
4511 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4512 \tfilename*2="is it not.pdf"
4513
4514 ''')
4515 msg = email.message_from_string(m)
4516 self.assertEqual(msg.get_filename(),
4517 'This is even more ***fun*** is it not.pdf')
4518 self.assertEqual(m, msg.as_string())
4519
4520 def test_rfc2231_parse_extra_quoting(self):
4521 m = textwrap.dedent('''\
4522 Content-Disposition: inline;
4523 \tfilename*0*="''This%20is%20even%20more%20";
4524 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4525 \tfilename*2="is it not.pdf"
4526
4527 ''')
4528 msg = email.message_from_string(m)
4529 self.assertEqual(msg.get_filename(),
4530 'This is even more ***fun*** is it not.pdf')
4531 self.assertEqual(m, msg.as_string())
4532
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004533 def test_rfc2231_no_language_or_charset(self):
4534 m = '''\
4535Content-Transfer-Encoding: 8bit
4536Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4537Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4538
4539'''
4540 msg = email.message_from_string(m)
4541 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004542 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004543 self.assertEqual(
4544 param,
4545 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4546
4547 def test_rfc2231_no_language_or_charset_in_filename(self):
4548 m = '''\
4549Content-Disposition: inline;
4550\tfilename*0*="''This%20is%20even%20more%20";
4551\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4552\tfilename*2="is it not.pdf"
4553
4554'''
4555 msg = email.message_from_string(m)
4556 self.assertEqual(msg.get_filename(),
4557 'This is even more ***fun*** is it not.pdf')
4558
4559 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4560 m = '''\
4561Content-Disposition: inline;
4562\tfilename*0*="''This%20is%20even%20more%20";
4563\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4564\tfilename*2="is it not.pdf"
4565
4566'''
4567 msg = email.message_from_string(m)
4568 self.assertEqual(msg.get_filename(),
4569 'This is even more ***fun*** is it not.pdf')
4570
4571 def test_rfc2231_partly_encoded(self):
4572 m = '''\
4573Content-Disposition: inline;
4574\tfilename*0="''This%20is%20even%20more%20";
4575\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4576\tfilename*2="is it not.pdf"
4577
4578'''
4579 msg = email.message_from_string(m)
4580 self.assertEqual(
4581 msg.get_filename(),
4582 'This%20is%20even%20more%20***fun*** is it not.pdf')
4583
4584 def test_rfc2231_partly_nonencoded(self):
4585 m = '''\
4586Content-Disposition: inline;
4587\tfilename*0="This%20is%20even%20more%20";
4588\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4589\tfilename*2="is it not.pdf"
4590
4591'''
4592 msg = email.message_from_string(m)
4593 self.assertEqual(
4594 msg.get_filename(),
4595 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4596
4597 def test_rfc2231_no_language_or_charset_in_boundary(self):
4598 m = '''\
4599Content-Type: multipart/alternative;
4600\tboundary*0*="''This%20is%20even%20more%20";
4601\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4602\tboundary*2="is it not.pdf"
4603
4604'''
4605 msg = email.message_from_string(m)
4606 self.assertEqual(msg.get_boundary(),
4607 'This is even more ***fun*** is it not.pdf')
4608
4609 def test_rfc2231_no_language_or_charset_in_charset(self):
4610 # This is a nonsensical charset value, but tests the code anyway
4611 m = '''\
4612Content-Type: text/plain;
4613\tcharset*0*="This%20is%20even%20more%20";
4614\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4615\tcharset*2="is it not.pdf"
4616
4617'''
4618 msg = email.message_from_string(m)
4619 self.assertEqual(msg.get_content_charset(),
4620 'this is even more ***fun*** is it not.pdf')
4621
4622 def test_rfc2231_bad_encoding_in_filename(self):
4623 m = '''\
4624Content-Disposition: inline;
4625\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4626\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4627\tfilename*2="is it not.pdf"
4628
4629'''
4630 msg = email.message_from_string(m)
4631 self.assertEqual(msg.get_filename(),
4632 'This is even more ***fun*** is it not.pdf')
4633
4634 def test_rfc2231_bad_encoding_in_charset(self):
4635 m = """\
4636Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4637
4638"""
4639 msg = email.message_from_string(m)
4640 # This should return None because non-ascii characters in the charset
4641 # are not allowed.
4642 self.assertEqual(msg.get_content_charset(), None)
4643
4644 def test_rfc2231_bad_character_in_charset(self):
4645 m = """\
4646Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4647
4648"""
4649 msg = email.message_from_string(m)
4650 # This should return None because non-ascii characters in the charset
4651 # are not allowed.
4652 self.assertEqual(msg.get_content_charset(), None)
4653
4654 def test_rfc2231_bad_character_in_filename(self):
4655 m = '''\
4656Content-Disposition: inline;
4657\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4658\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4659\tfilename*2*="is it not.pdf%E2"
4660
4661'''
4662 msg = email.message_from_string(m)
4663 self.assertEqual(msg.get_filename(),
4664 'This is even more ***fun*** is it not.pdf\ufffd')
4665
4666 def test_rfc2231_unknown_encoding(self):
4667 m = """\
4668Content-Transfer-Encoding: 8bit
4669Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4670
4671"""
4672 msg = email.message_from_string(m)
4673 self.assertEqual(msg.get_filename(), 'myfile.txt')
4674
4675 def test_rfc2231_single_tick_in_filename_extended(self):
4676 eq = self.assertEqual
4677 m = """\
4678Content-Type: application/x-foo;
4679\tname*0*=\"Frank's\"; name*1*=\" Document\"
4680
4681"""
4682 msg = email.message_from_string(m)
4683 charset, language, s = msg.get_param('name')
4684 eq(charset, None)
4685 eq(language, None)
4686 eq(s, "Frank's Document")
4687
4688 def test_rfc2231_single_tick_in_filename(self):
4689 m = """\
4690Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4691
4692"""
4693 msg = email.message_from_string(m)
4694 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004695 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004696 self.assertEqual(param, "Frank's Document")
4697
4698 def test_rfc2231_tick_attack_extended(self):
4699 eq = self.assertEqual
4700 m = """\
4701Content-Type: application/x-foo;
4702\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4703
4704"""
4705 msg = email.message_from_string(m)
4706 charset, language, s = msg.get_param('name')
4707 eq(charset, 'us-ascii')
4708 eq(language, 'en-us')
4709 eq(s, "Frank's Document")
4710
4711 def test_rfc2231_tick_attack(self):
4712 m = """\
4713Content-Type: application/x-foo;
4714\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4715
4716"""
4717 msg = email.message_from_string(m)
4718 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004719 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004720 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4721
4722 def test_rfc2231_no_extended_values(self):
4723 eq = self.assertEqual
4724 m = """\
4725Content-Type: application/x-foo; name=\"Frank's Document\"
4726
4727"""
4728 msg = email.message_from_string(m)
4729 eq(msg.get_param('name'), "Frank's Document")
4730
4731 def test_rfc2231_encoded_then_unencoded_segments(self):
4732 eq = self.assertEqual
4733 m = """\
4734Content-Type: application/x-foo;
4735\tname*0*=\"us-ascii'en-us'My\";
4736\tname*1=\" Document\";
4737\tname*2*=\" For You\"
4738
4739"""
4740 msg = email.message_from_string(m)
4741 charset, language, s = msg.get_param('name')
4742 eq(charset, 'us-ascii')
4743 eq(language, 'en-us')
4744 eq(s, 'My Document For You')
4745
4746 def test_rfc2231_unencoded_then_encoded_segments(self):
4747 eq = self.assertEqual
4748 m = """\
4749Content-Type: application/x-foo;
4750\tname*0=\"us-ascii'en-us'My\";
4751\tname*1*=\" Document\";
4752\tname*2*=\" For You\"
4753
4754"""
4755 msg = email.message_from_string(m)
4756 charset, language, s = msg.get_param('name')
4757 eq(charset, 'us-ascii')
4758 eq(language, 'en-us')
4759 eq(s, 'My Document For You')
4760
4761
Ezio Melottib3aedd42010-11-20 19:04:17 +00004762
R. David Murraya8f480f2010-01-16 18:30:03 +00004763# Tests to ensure that signed parts of an email are completely preserved, as
4764# required by RFC1847 section 2.1. Note that these are incomplete, because the
4765# email package does not currently always preserve the body. See issue 1670765.
4766class TestSigned(TestEmailBase):
4767
4768 def _msg_and_obj(self, filename):
4769 with openfile(findfile(filename)) as fp:
4770 original = fp.read()
4771 msg = email.message_from_string(original)
4772 return original, msg
4773
4774 def _signed_parts_eq(self, original, result):
4775 # Extract the first mime part of each message
4776 import re
4777 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4778 inpart = repart.search(original).group(2)
4779 outpart = repart.search(result).group(2)
4780 self.assertEqual(outpart, inpart)
4781
4782 def test_long_headers_as_string(self):
4783 original, msg = self._msg_and_obj('msg_45.txt')
4784 result = msg.as_string()
4785 self._signed_parts_eq(original, result)
4786
4787 def test_long_headers_as_string_maxheaderlen(self):
4788 original, msg = self._msg_and_obj('msg_45.txt')
4789 result = msg.as_string(maxheaderlen=60)
4790 self._signed_parts_eq(original, result)
4791
4792 def test_long_headers_flatten(self):
4793 original, msg = self._msg_and_obj('msg_45.txt')
4794 fp = StringIO()
4795 Generator(fp).flatten(msg)
4796 result = fp.getvalue()
4797 self._signed_parts_eq(original, result)
4798
4799
Ezio Melottib3aedd42010-11-20 19:04:17 +00004800
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004801def _testclasses():
4802 mod = sys.modules[__name__]
4803 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4804
4805
4806def suite():
4807 suite = unittest.TestSuite()
4808 for testclass in _testclasses():
4809 suite.addTest(unittest.makeSuite(testclass))
4810 return suite
4811
4812
4813def test_main():
4814 for testclass in _testclasses():
4815 run_unittest(testclass)
4816
4817
Ezio Melottib3aedd42010-11-20 19:04:17 +00004818
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004819if __name__ == '__main__':
4820 unittest.main(defaultTest='suite')