blob: 2fa4aa8b6a93f7e8b2279596dd55ee6c880efbf5 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040023from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
R David Murray7441a7a2012-03-14 02:59:51 -0400622 def test_unicode_header_defaults_to_utf8_encoding(self):
623 # Issue 14291
624 m = MIMEText('abc\n')
625 m['Subject'] = 'É test'
626 self.assertEqual(str(m),textwrap.dedent("""\
627 Content-Type: text/plain; charset="us-ascii"
628 MIME-Version: 1.0
629 Content-Transfer-Encoding: 7bit
630 Subject: =?utf-8?q?=C3=89_test?=
631
632 abc
633 """))
634
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635# Test the email.encoders module
636class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400637
638 def test_EncodersEncode_base64(self):
639 with openfile('PyBanner048.gif', 'rb') as fp:
640 bindata = fp.read()
641 mimed = email.mime.image.MIMEImage(bindata)
642 base64ed = mimed.get_payload()
643 # the transfer-encoded body lines should all be <=76 characters
644 lines = base64ed.split('\n')
645 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
646
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000647 def test_encode_empty_payload(self):
648 eq = self.assertEqual
649 msg = Message()
650 msg.set_charset('us-ascii')
651 eq(msg['content-transfer-encoding'], '7bit')
652
653 def test_default_cte(self):
654 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000655 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656 msg = MIMEText('hello world')
657 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000658 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 msg = MIMEText('hello \xf8 world')
660 eq(msg['content-transfer-encoding'], '8bit')
661 # And now with a different charset
662 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
663 eq(msg['content-transfer-encoding'], 'quoted-printable')
664
R. David Murraye85200d2010-05-06 01:41:14 +0000665 def test_encode7or8bit(self):
666 # Make sure a charset whose input character set is 8bit but
667 # whose output character set is 7bit gets a transfer-encoding
668 # of 7bit.
669 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000670 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000671 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
R David Murrayf581b372013-02-05 10:49:49 -0500673 def test_qp_encode_latin1(self):
674 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
675 self.assertEqual(str(msg), textwrap.dedent("""\
676 MIME-Version: 1.0
677 Content-Type: text/text; charset="iso-8859-1"
678 Content-Transfer-Encoding: quoted-printable
679
680 =E1=F6
681 """))
682
683 def test_qp_encode_non_latin1(self):
684 # Issue 16948
685 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
686 self.assertEqual(str(msg), textwrap.dedent("""\
687 MIME-Version: 1.0
688 Content-Type: text/text; charset="iso-8859-2"
689 Content-Transfer-Encoding: quoted-printable
690
691 =BF
692 """))
693
Ezio Melottib3aedd42010-11-20 19:04:17 +0000694
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000695# Test long header wrapping
696class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400697
698 maxDiff = None
699
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000700 def test_split_long_continuation(self):
701 eq = self.ndiffAssertEqual
702 msg = email.message_from_string("""\
703Subject: bug demonstration
704\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
705\tmore text
706
707test
708""")
709 sfp = StringIO()
710 g = Generator(sfp)
711 g.flatten(msg)
712 eq(sfp.getvalue(), """\
713Subject: bug demonstration
714\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
715\tmore text
716
717test
718""")
719
720 def test_another_long_almost_unsplittable_header(self):
721 eq = self.ndiffAssertEqual
722 hstr = """\
723bug demonstration
724\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
725\tmore text"""
726 h = Header(hstr, continuation_ws='\t')
727 eq(h.encode(), """\
728bug demonstration
729\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
730\tmore text""")
731 h = Header(hstr.replace('\t', ' '))
732 eq(h.encode(), """\
733bug demonstration
734 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
735 more text""")
736
737 def test_long_nonstring(self):
738 eq = self.ndiffAssertEqual
739 g = Charset("iso-8859-1")
740 cz = Charset("iso-8859-2")
741 utf8 = Charset("utf-8")
742 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
743 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
744 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
745 b'bef\xf6rdert. ')
746 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
747 b'd\xf9vtipu.. ')
748 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
749 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
750 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
751 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
752 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
753 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
754 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
755 '\u3044\u307e\u3059\u3002')
756 h = Header(g_head, g, header_name='Subject')
757 h.append(cz_head, cz)
758 h.append(utf8_head, utf8)
759 msg = Message()
760 msg['Subject'] = h
761 sfp = StringIO()
762 g = Generator(sfp)
763 g.flatten(msg)
764 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000765Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
766 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
767 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
768 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
769 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
770 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
771 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
772 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
773 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
774 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
775 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000776
777""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000778 eq(h.encode(maxlinelen=76), """\
779=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
780 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
781 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
782 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
783 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
784 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
785 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
786 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
787 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
788 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
789 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000790
791 def test_long_header_encode(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit')
796 eq(h.encode(), '''\
797wasnipoop; giraffes="very-long-necked-animals";
798 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
799
800 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
801 eq = self.ndiffAssertEqual
802 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
803 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
804 header_name='X-Foobar-Spoink-Defrobnit',
805 continuation_ws='\t')
806 eq(h.encode(), '''\
807wasnipoop; giraffes="very-long-necked-animals";
808 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
809
810 def test_long_header_encode_with_tab_continuation(self):
811 eq = self.ndiffAssertEqual
812 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
813 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
814 header_name='X-Foobar-Spoink-Defrobnit',
815 continuation_ws='\t')
816 eq(h.encode(), '''\
817wasnipoop; giraffes="very-long-necked-animals";
818\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
819
R David Murray3a6152f2011-03-14 21:13:03 -0400820 def test_header_encode_with_different_output_charset(self):
821 h = Header('文', 'euc-jp')
822 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
823
824 def test_long_header_encode_with_different_output_charset(self):
825 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
826 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
827 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
828 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
829 res = """\
830=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
831 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
832 self.assertEqual(h.encode(), res)
833
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834 def test_header_splitter(self):
835 eq = self.ndiffAssertEqual
836 msg = MIMEText('')
837 # It'd be great if we could use add_header() here, but that doesn't
838 # guarantee an order of the parameters.
839 msg['X-Foobar-Spoink-Defrobnit'] = (
840 'wasnipoop; giraffes="very-long-necked-animals"; '
841 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
842 sfp = StringIO()
843 g = Generator(sfp)
844 g.flatten(msg)
845 eq(sfp.getvalue(), '''\
846Content-Type: text/plain; charset="us-ascii"
847MIME-Version: 1.0
848Content-Transfer-Encoding: 7bit
849X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
850 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
851
852''')
853
854 def test_no_semis_header_splitter(self):
855 eq = self.ndiffAssertEqual
856 msg = Message()
857 msg['From'] = 'test@dom.ain'
858 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
859 msg.set_payload('Test')
860 sfp = StringIO()
861 g = Generator(sfp)
862 g.flatten(msg)
863 eq(sfp.getvalue(), """\
864From: test@dom.ain
865References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
866 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
867
868Test""")
869
R David Murray7da4db12011-04-07 20:37:17 -0400870 def test_last_split_chunk_does_not_fit(self):
871 eq = self.ndiffAssertEqual
872 h = Header('Subject: the first part of this is short, but_the_second'
873 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
874 '_all_by_itself')
875 eq(h.encode(), """\
876Subject: the first part of this is short,
877 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
878
879 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
880 eq = self.ndiffAssertEqual
881 h = Header(', but_the_second'
882 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
883 '_all_by_itself')
884 eq(h.encode(), """\
885,
886 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
887
888 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
889 eq = self.ndiffAssertEqual
890 h = Header(', , but_the_second'
891 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
892 '_all_by_itself')
893 eq(h.encode(), """\
894, ,
895 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
896
897 def test_trailing_splitable_on_overlong_unsplitable(self):
898 eq = self.ndiffAssertEqual
899 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
900 'be_on_a_line_all_by_itself;')
901 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
902 "be_on_a_line_all_by_itself;")
903
904 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
905 eq = self.ndiffAssertEqual
906 h = Header('; '
907 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400908 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400909 eq(h.encode(), """\
910;
R David Murray01581ee2011-04-18 10:04:34 -0400911 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400912
R David Murraye1292a22011-04-07 20:54:03 -0400913 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400914 eq = self.ndiffAssertEqual
915 h = Header('This is a long line that has two whitespaces in a row. '
916 'This used to cause truncation of the header when folded')
917 eq(h.encode(), """\
918This is a long line that has two whitespaces in a row. This used to cause
919 truncation of the header when folded""")
920
R David Murray01581ee2011-04-18 10:04:34 -0400921 def test_splitter_split_on_punctuation_only_if_fws(self):
922 eq = self.ndiffAssertEqual
923 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
924 'they;arenotlegal;fold,points')
925 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
926 "arenotlegal;fold,points")
927
928 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
929 eq = self.ndiffAssertEqual
930 h = Header('this is a test where we need to have more than one line '
931 'before; our final line that is just too big to fit;; '
932 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
933 'be_on_a_line_all_by_itself;')
934 eq(h.encode(), """\
935this is a test where we need to have more than one line before;
936 our final line that is just too big to fit;;
937 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
938
939 def test_overlong_last_part_followed_by_split_point(self):
940 eq = self.ndiffAssertEqual
941 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
942 'be_on_a_line_all_by_itself ')
943 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
944 "should_be_on_a_line_all_by_itself ")
945
946 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
947 eq = self.ndiffAssertEqual
948 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
949 'before_our_final_line_; ; '
950 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
951 'be_on_a_line_all_by_itself; ')
952 eq(h.encode(), """\
953this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
954 ;
955 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
956
957 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
958 eq = self.ndiffAssertEqual
959 h = Header('this is a test where we need to have more than one line '
960 'before our final line; ; '
961 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
962 'be_on_a_line_all_by_itself; ')
963 eq(h.encode(), """\
964this is a test where we need to have more than one line before our final line;
965 ;
966 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
967
968 def test_long_header_with_whitespace_runs(self):
969 eq = self.ndiffAssertEqual
970 msg = Message()
971 msg['From'] = 'test@dom.ain'
972 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
973 msg.set_payload('Test')
974 sfp = StringIO()
975 g = Generator(sfp)
976 g.flatten(msg)
977 eq(sfp.getvalue(), """\
978From: test@dom.ain
979References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
980 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
981 <foo@dom.ain> <foo@dom.ain>\x20\x20
982
983Test""")
984
985 def test_long_run_with_semi_header_splitter(self):
986 eq = self.ndiffAssertEqual
987 msg = Message()
988 msg['From'] = 'test@dom.ain'
989 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
990 msg.set_payload('Test')
991 sfp = StringIO()
992 g = Generator(sfp)
993 g.flatten(msg)
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
997 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
998 <foo@dom.ain>; abc
999
1000Test""")
1001
1002 def test_splitter_split_on_punctuation_only_if_fws(self):
1003 eq = self.ndiffAssertEqual
1004 msg = Message()
1005 msg['From'] = 'test@dom.ain'
1006 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1007 'they;arenotlegal;fold,points')
1008 msg.set_payload('Test')
1009 sfp = StringIO()
1010 g = Generator(sfp)
1011 g.flatten(msg)
1012 # XXX the space after the header should not be there.
1013 eq(sfp.getvalue(), """\
1014From: test@dom.ain
1015References:\x20
1016 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1017
1018Test""")
1019
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020 def test_no_split_long_header(self):
1021 eq = self.ndiffAssertEqual
1022 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001023 h = Header(hstr)
1024 # These come on two lines because Headers are really field value
1025 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001026 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001027References:
1028 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1029 h = Header('x' * 80)
1030 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001031
1032 def test_splitting_multiple_long_lines(self):
1033 eq = self.ndiffAssertEqual
1034 hstr = """\
1035from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1036\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1037\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1038"""
1039 h = Header(hstr, continuation_ws='\t')
1040 eq(h.encode(), """\
1041from babylon.socal-raves.org (localhost [127.0.0.1]);
1042 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1043 for <mailman-admin@babylon.socal-raves.org>;
1044 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1046 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1047 for <mailman-admin@babylon.socal-raves.org>;
1048 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1049\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1050 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1051 for <mailman-admin@babylon.socal-raves.org>;
1052 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1053
1054 def test_splitting_first_line_only_is_long(self):
1055 eq = self.ndiffAssertEqual
1056 hstr = """\
1057from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1058\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1059\tid 17k4h5-00034i-00
1060\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1061 h = Header(hstr, maxlinelen=78, header_name='Received',
1062 continuation_ws='\t')
1063 eq(h.encode(), """\
1064from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1065 helo=cthulhu.gerg.ca)
1066\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1067\tid 17k4h5-00034i-00
1068\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1069
1070 def test_long_8bit_header(self):
1071 eq = self.ndiffAssertEqual
1072 msg = Message()
1073 h = Header('Britische Regierung gibt', 'iso-8859-1',
1074 header_name='Subject')
1075 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001076 eq(h.encode(maxlinelen=76), """\
1077=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1078 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001080 eq(msg.as_string(maxheaderlen=76), """\
1081Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1082 =?iso-8859-1?q?hore-Windkraftprojekte?=
1083
1084""")
1085 eq(msg.as_string(maxheaderlen=0), """\
1086Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087
1088""")
1089
1090 def test_long_8bit_header_no_charset(self):
1091 eq = self.ndiffAssertEqual
1092 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001093 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1094 'f\xfcr Offshore-Windkraftprojekte '
1095 '<a-very-long-address@example.com>')
1096 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001097 eq(msg.as_string(maxheaderlen=78), """\
1098Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1099 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1100
1101""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001102 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001103 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001104 header_name='Reply-To')
1105 eq(msg.as_string(maxheaderlen=78), """\
1106Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1107 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108
1109""")
1110
1111 def test_long_to_header(self):
1112 eq = self.ndiffAssertEqual
1113 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001114 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 '"Someone Test #B" <someone@umich.edu>, '
1116 '"Someone Test #C" <someone@eecs.umich.edu>, '
1117 '"Someone Test #D" <someone@eecs.umich.edu>')
1118 msg = Message()
1119 msg['To'] = to
1120 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001121To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001122 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001123 "Someone Test #C" <someone@eecs.umich.edu>,
1124 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125
1126''')
1127
1128 def test_long_line_after_append(self):
1129 eq = self.ndiffAssertEqual
1130 s = 'This is an example of string which has almost the limit of header length.'
1131 h = Header(s)
1132 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001133 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134This is an example of string which has almost the limit of header length.
1135 Add another line.""")
1136
1137 def test_shorter_line_with_append(self):
1138 eq = self.ndiffAssertEqual
1139 s = 'This is a shorter line.'
1140 h = Header(s)
1141 h.append('Add another sentence. (Surprise?)')
1142 eq(h.encode(),
1143 'This is a shorter line. Add another sentence. (Surprise?)')
1144
1145 def test_long_field_name(self):
1146 eq = self.ndiffAssertEqual
1147 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001148 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1149 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1150 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1151 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152 h = Header(gs, 'iso-8859-1', header_name=fn)
1153 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001154 eq(h.encode(maxlinelen=76), """\
1155=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1156 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1157 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1158 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159
1160 def test_long_received_header(self):
1161 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1162 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1163 'Wed, 05 Mar 2003 18:10:18 -0700')
1164 msg = Message()
1165 msg['Received-1'] = Header(h, continuation_ws='\t')
1166 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001167 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1170 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001171 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001172Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1173 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_string_headerinst_eq(self):
1179 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1180 'tu-muenchen.de> (David Bremner\'s message of '
1181 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1182 msg = Message()
1183 msg['Received-1'] = Header(h, header_name='Received-1',
1184 continuation_ws='\t')
1185 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001186 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001188Received-1:\x20
1189 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1190 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1191Received-2:\x20
1192 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1193 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194
1195""")
1196
1197 def test_long_unbreakable_lines_with_continuation(self):
1198 eq = self.ndiffAssertEqual
1199 msg = Message()
1200 t = """\
1201iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1202 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1203 msg['Face-1'] = t
1204 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001205 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001206 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001207 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001209Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001210 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001212Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001215Face-3:\x20
1216 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1217 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218
1219""")
1220
1221 def test_another_long_multiline_header(self):
1222 eq = self.ndiffAssertEqual
1223 m = ('Received: from siimage.com '
1224 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001225 'Microsoft SMTPSVC(5.0.2195.4905); '
1226 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227 msg = email.message_from_string(m)
1228 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001229Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1230 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232''')
1233
1234 def test_long_lines_with_different_header(self):
1235 eq = self.ndiffAssertEqual
1236 h = ('List-Unsubscribe: '
1237 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1238 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1239 '?subject=unsubscribe>')
1240 msg = Message()
1241 msg['List'] = h
1242 msg['List'] = Header(h, header_name='List')
1243 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001244List: List-Unsubscribe:
1245 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001246 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001247List: List-Unsubscribe:
1248 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001249 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001250
1251""")
1252
R. David Murray6f0022d2011-01-07 21:57:25 +00001253 def test_long_rfc2047_header_with_embedded_fws(self):
1254 h = Header(textwrap.dedent("""\
1255 We're going to pretend this header is in a non-ascii character set
1256 \tto see if line wrapping with encoded words and embedded
1257 folding white space works"""),
1258 charset='utf-8',
1259 header_name='Test')
1260 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1261 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1262 =?utf-8?q?cter_set?=
1263 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1264 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001267# Test mangling of "From " lines in the body of a message
1268class TestFromMangling(unittest.TestCase):
1269 def setUp(self):
1270 self.msg = Message()
1271 self.msg['From'] = 'aaa@bbb.org'
1272 self.msg.set_payload("""\
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277 def test_mangled_from(self):
1278 s = StringIO()
1279 g = Generator(s, mangle_from_=True)
1280 g.flatten(self.msg)
1281 self.assertEqual(s.getvalue(), """\
1282From: aaa@bbb.org
1283
1284>From the desk of A.A.A.:
1285Blah blah blah
1286""")
1287
1288 def test_dont_mangle_from(self):
1289 s = StringIO()
1290 g = Generator(s, mangle_from_=False)
1291 g.flatten(self.msg)
1292 self.assertEqual(s.getvalue(), """\
1293From: aaa@bbb.org
1294
1295From the desk of A.A.A.:
1296Blah blah blah
1297""")
1298
R David Murray6a31bc62012-07-22 21:47:53 -04001299 def test_mangle_from_in_preamble_and_epilog(self):
1300 s = StringIO()
1301 g = Generator(s, mangle_from_=True)
1302 msg = email.message_from_string(textwrap.dedent("""\
1303 From: foo@bar.com
1304 Mime-Version: 1.0
1305 Content-Type: multipart/mixed; boundary=XXX
1306
1307 From somewhere unknown
1308
1309 --XXX
1310 Content-Type: text/plain
1311
1312 foo
1313
1314 --XXX--
1315
1316 From somewhere unknowable
1317 """))
1318 g.flatten(msg)
1319 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1320 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
R David Murray638d40b2012-08-24 11:14:13 -04001322 def test_mangled_from_with_bad_bytes(self):
1323 source = textwrap.dedent("""\
1324 Content-Type: text/plain; charset="utf-8"
1325 MIME-Version: 1.0
1326 Content-Transfer-Encoding: 8bit
1327 From: aaa@bbb.org
1328
1329 """).encode('utf-8')
1330 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1331 b = BytesIO()
1332 g = BytesGenerator(b, mangle_from_=True)
1333 g.flatten(msg)
1334 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1335
Ezio Melottib3aedd42010-11-20 19:04:17 +00001336
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001337# Test the basic MIMEAudio class
1338class TestMIMEAudio(unittest.TestCase):
1339 def setUp(self):
1340 # Make sure we pick up the audiotest.au that lives in email/test/data.
1341 # In Python, there's an audiotest.au living in Lib/test but that isn't
1342 # included in some binary distros that don't include the test
1343 # package. The trailing empty string on the .join() is significant
1344 # since findfile() will do a dirname().
1345 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1346 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1347 self._audiodata = fp.read()
1348 self._au = MIMEAudio(self._audiodata)
1349
1350 def test_guess_minor_type(self):
1351 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1352
1353 def test_encoding(self):
1354 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001355 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1356 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357
1358 def test_checkSetMinor(self):
1359 au = MIMEAudio(self._audiodata, 'fish')
1360 self.assertEqual(au.get_content_type(), 'audio/fish')
1361
1362 def test_add_header(self):
1363 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001364 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365 self._au.add_header('Content-Disposition', 'attachment',
1366 filename='audiotest.au')
1367 eq(self._au['content-disposition'],
1368 'attachment; filename="audiotest.au"')
1369 eq(self._au.get_params(header='content-disposition'),
1370 [('attachment', ''), ('filename', 'audiotest.au')])
1371 eq(self._au.get_param('filename', header='content-disposition'),
1372 'audiotest.au')
1373 missing = []
1374 eq(self._au.get_param('attachment', header='content-disposition'), '')
1375 unless(self._au.get_param('foo', failobj=missing,
1376 header='content-disposition') is missing)
1377 # Try some missing stuff
1378 unless(self._au.get_param('foobar', missing) is missing)
1379 unless(self._au.get_param('attachment', missing,
1380 header='foobar') is missing)
1381
1382
Ezio Melottib3aedd42010-11-20 19:04:17 +00001383
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384# Test the basic MIMEImage class
1385class TestMIMEImage(unittest.TestCase):
1386 def setUp(self):
1387 with openfile('PyBanner048.gif', 'rb') as fp:
1388 self._imgdata = fp.read()
1389 self._im = MIMEImage(self._imgdata)
1390
1391 def test_guess_minor_type(self):
1392 self.assertEqual(self._im.get_content_type(), 'image/gif')
1393
1394 def test_encoding(self):
1395 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001396 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1397 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001398
1399 def test_checkSetMinor(self):
1400 im = MIMEImage(self._imgdata, 'fish')
1401 self.assertEqual(im.get_content_type(), 'image/fish')
1402
1403 def test_add_header(self):
1404 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001405 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001406 self._im.add_header('Content-Disposition', 'attachment',
1407 filename='dingusfish.gif')
1408 eq(self._im['content-disposition'],
1409 'attachment; filename="dingusfish.gif"')
1410 eq(self._im.get_params(header='content-disposition'),
1411 [('attachment', ''), ('filename', 'dingusfish.gif')])
1412 eq(self._im.get_param('filename', header='content-disposition'),
1413 'dingusfish.gif')
1414 missing = []
1415 eq(self._im.get_param('attachment', header='content-disposition'), '')
1416 unless(self._im.get_param('foo', failobj=missing,
1417 header='content-disposition') is missing)
1418 # Try some missing stuff
1419 unless(self._im.get_param('foobar', missing) is missing)
1420 unless(self._im.get_param('attachment', missing,
1421 header='foobar') is missing)
1422
1423
Ezio Melottib3aedd42010-11-20 19:04:17 +00001424
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001425# Test the basic MIMEApplication class
1426class TestMIMEApplication(unittest.TestCase):
1427 def test_headers(self):
1428 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001429 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001430 eq(msg.get_content_type(), 'application/octet-stream')
1431 eq(msg['content-transfer-encoding'], 'base64')
1432
1433 def test_body(self):
1434 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001435 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1436 msg = MIMEApplication(bytesdata)
1437 # whitespace in the cte encoded block is RFC-irrelevant.
1438 eq(msg.get_payload().strip(), '+vv8/f7/')
1439 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001440
1441
Ezio Melottib3aedd42010-11-20 19:04:17 +00001442
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001443# Test the basic MIMEText class
1444class TestMIMEText(unittest.TestCase):
1445 def setUp(self):
1446 self._msg = MIMEText('hello there')
1447
1448 def test_types(self):
1449 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001450 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001451 eq(self._msg.get_content_type(), 'text/plain')
1452 eq(self._msg.get_param('charset'), 'us-ascii')
1453 missing = []
1454 unless(self._msg.get_param('foobar', missing) is missing)
1455 unless(self._msg.get_param('charset', missing, header='foobar')
1456 is missing)
1457
1458 def test_payload(self):
1459 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001460 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001461
1462 def test_charset(self):
1463 eq = self.assertEqual
1464 msg = MIMEText('hello there', _charset='us-ascii')
1465 eq(msg.get_charset().input_charset, 'us-ascii')
1466 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1467
R. David Murray850fc852010-06-03 01:58:28 +00001468 def test_7bit_input(self):
1469 eq = self.assertEqual
1470 msg = MIMEText('hello there', _charset='us-ascii')
1471 eq(msg.get_charset().input_charset, 'us-ascii')
1472 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1473
1474 def test_7bit_input_no_charset(self):
1475 eq = self.assertEqual
1476 msg = MIMEText('hello there')
1477 eq(msg.get_charset(), 'us-ascii')
1478 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1479 self.assertTrue('hello there' in msg.as_string())
1480
1481 def test_utf8_input(self):
1482 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1483 eq = self.assertEqual
1484 msg = MIMEText(teststr, _charset='utf-8')
1485 eq(msg.get_charset().output_charset, 'utf-8')
1486 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1487 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1488
1489 @unittest.skip("can't fix because of backward compat in email5, "
1490 "will fix in email6")
1491 def test_utf8_input_no_charset(self):
1492 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1493 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1494
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001495
Ezio Melottib3aedd42010-11-20 19:04:17 +00001496
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001497# Test complicated multipart/* messages
1498class TestMultipart(TestEmailBase):
1499 def setUp(self):
1500 with openfile('PyBanner048.gif', 'rb') as fp:
1501 data = fp.read()
1502 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1503 image = MIMEImage(data, name='dingusfish.gif')
1504 image.add_header('content-disposition', 'attachment',
1505 filename='dingusfish.gif')
1506 intro = MIMEText('''\
1507Hi there,
1508
1509This is the dingus fish.
1510''')
1511 container.attach(intro)
1512 container.attach(image)
1513 container['From'] = 'Barry <barry@digicool.com>'
1514 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1515 container['Subject'] = 'Here is your dingus fish'
1516
1517 now = 987809702.54848599
1518 timetuple = time.localtime(now)
1519 if timetuple[-1] == 0:
1520 tzsecs = time.timezone
1521 else:
1522 tzsecs = time.altzone
1523 if tzsecs > 0:
1524 sign = '-'
1525 else:
1526 sign = '+'
1527 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1528 container['Date'] = time.strftime(
1529 '%a, %d %b %Y %H:%M:%S',
1530 time.localtime(now)) + tzoffset
1531 self._msg = container
1532 self._im = image
1533 self._txt = intro
1534
1535 def test_hierarchy(self):
1536 # convenience
1537 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001538 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001539 raises = self.assertRaises
1540 # tests
1541 m = self._msg
1542 unless(m.is_multipart())
1543 eq(m.get_content_type(), 'multipart/mixed')
1544 eq(len(m.get_payload()), 2)
1545 raises(IndexError, m.get_payload, 2)
1546 m0 = m.get_payload(0)
1547 m1 = m.get_payload(1)
1548 unless(m0 is self._txt)
1549 unless(m1 is self._im)
1550 eq(m.get_payload(), [m0, m1])
1551 unless(not m0.is_multipart())
1552 unless(not m1.is_multipart())
1553
1554 def test_empty_multipart_idempotent(self):
1555 text = """\
1556Content-Type: multipart/mixed; boundary="BOUNDARY"
1557MIME-Version: 1.0
1558Subject: A subject
1559To: aperson@dom.ain
1560From: bperson@dom.ain
1561
1562
1563--BOUNDARY
1564
1565
1566--BOUNDARY--
1567"""
1568 msg = Parser().parsestr(text)
1569 self.ndiffAssertEqual(text, msg.as_string())
1570
1571 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1572 outer = MIMEBase('multipart', 'mixed')
1573 outer['Subject'] = 'A subject'
1574 outer['To'] = 'aperson@dom.ain'
1575 outer['From'] = 'bperson@dom.ain'
1576 outer.set_boundary('BOUNDARY')
1577 self.ndiffAssertEqual(outer.as_string(), '''\
1578Content-Type: multipart/mixed; boundary="BOUNDARY"
1579MIME-Version: 1.0
1580Subject: A subject
1581To: aperson@dom.ain
1582From: bperson@dom.ain
1583
1584--BOUNDARY
1585
1586--BOUNDARY--''')
1587
1588 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1589 outer = MIMEBase('multipart', 'mixed')
1590 outer['Subject'] = 'A subject'
1591 outer['To'] = 'aperson@dom.ain'
1592 outer['From'] = 'bperson@dom.ain'
1593 outer.preamble = ''
1594 outer.epilogue = ''
1595 outer.set_boundary('BOUNDARY')
1596 self.ndiffAssertEqual(outer.as_string(), '''\
1597Content-Type: multipart/mixed; boundary="BOUNDARY"
1598MIME-Version: 1.0
1599Subject: A subject
1600To: aperson@dom.ain
1601From: bperson@dom.ain
1602
1603
1604--BOUNDARY
1605
1606--BOUNDARY--
1607''')
1608
1609 def test_one_part_in_a_multipart(self):
1610 eq = self.ndiffAssertEqual
1611 outer = MIMEBase('multipart', 'mixed')
1612 outer['Subject'] = 'A subject'
1613 outer['To'] = 'aperson@dom.ain'
1614 outer['From'] = 'bperson@dom.ain'
1615 outer.set_boundary('BOUNDARY')
1616 msg = MIMEText('hello world')
1617 outer.attach(msg)
1618 eq(outer.as_string(), '''\
1619Content-Type: multipart/mixed; boundary="BOUNDARY"
1620MIME-Version: 1.0
1621Subject: A subject
1622To: aperson@dom.ain
1623From: bperson@dom.ain
1624
1625--BOUNDARY
1626Content-Type: text/plain; charset="us-ascii"
1627MIME-Version: 1.0
1628Content-Transfer-Encoding: 7bit
1629
1630hello world
1631--BOUNDARY--''')
1632
1633 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1634 eq = self.ndiffAssertEqual
1635 outer = MIMEBase('multipart', 'mixed')
1636 outer['Subject'] = 'A subject'
1637 outer['To'] = 'aperson@dom.ain'
1638 outer['From'] = 'bperson@dom.ain'
1639 outer.preamble = ''
1640 msg = MIMEText('hello world')
1641 outer.attach(msg)
1642 outer.set_boundary('BOUNDARY')
1643 eq(outer.as_string(), '''\
1644Content-Type: multipart/mixed; boundary="BOUNDARY"
1645MIME-Version: 1.0
1646Subject: A subject
1647To: aperson@dom.ain
1648From: bperson@dom.ain
1649
1650
1651--BOUNDARY
1652Content-Type: text/plain; charset="us-ascii"
1653MIME-Version: 1.0
1654Content-Transfer-Encoding: 7bit
1655
1656hello world
1657--BOUNDARY--''')
1658
1659
1660 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1661 eq = self.ndiffAssertEqual
1662 outer = MIMEBase('multipart', 'mixed')
1663 outer['Subject'] = 'A subject'
1664 outer['To'] = 'aperson@dom.ain'
1665 outer['From'] = 'bperson@dom.ain'
1666 outer.preamble = None
1667 msg = MIMEText('hello world')
1668 outer.attach(msg)
1669 outer.set_boundary('BOUNDARY')
1670 eq(outer.as_string(), '''\
1671Content-Type: multipart/mixed; boundary="BOUNDARY"
1672MIME-Version: 1.0
1673Subject: A subject
1674To: aperson@dom.ain
1675From: bperson@dom.ain
1676
1677--BOUNDARY
1678Content-Type: text/plain; charset="us-ascii"
1679MIME-Version: 1.0
1680Content-Transfer-Encoding: 7bit
1681
1682hello world
1683--BOUNDARY--''')
1684
1685
1686 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1687 eq = self.ndiffAssertEqual
1688 outer = MIMEBase('multipart', 'mixed')
1689 outer['Subject'] = 'A subject'
1690 outer['To'] = 'aperson@dom.ain'
1691 outer['From'] = 'bperson@dom.ain'
1692 outer.epilogue = None
1693 msg = MIMEText('hello world')
1694 outer.attach(msg)
1695 outer.set_boundary('BOUNDARY')
1696 eq(outer.as_string(), '''\
1697Content-Type: multipart/mixed; boundary="BOUNDARY"
1698MIME-Version: 1.0
1699Subject: A subject
1700To: aperson@dom.ain
1701From: bperson@dom.ain
1702
1703--BOUNDARY
1704Content-Type: text/plain; charset="us-ascii"
1705MIME-Version: 1.0
1706Content-Transfer-Encoding: 7bit
1707
1708hello world
1709--BOUNDARY--''')
1710
1711
1712 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1713 eq = self.ndiffAssertEqual
1714 outer = MIMEBase('multipart', 'mixed')
1715 outer['Subject'] = 'A subject'
1716 outer['To'] = 'aperson@dom.ain'
1717 outer['From'] = 'bperson@dom.ain'
1718 outer.epilogue = ''
1719 msg = MIMEText('hello world')
1720 outer.attach(msg)
1721 outer.set_boundary('BOUNDARY')
1722 eq(outer.as_string(), '''\
1723Content-Type: multipart/mixed; boundary="BOUNDARY"
1724MIME-Version: 1.0
1725Subject: A subject
1726To: aperson@dom.ain
1727From: bperson@dom.ain
1728
1729--BOUNDARY
1730Content-Type: text/plain; charset="us-ascii"
1731MIME-Version: 1.0
1732Content-Transfer-Encoding: 7bit
1733
1734hello world
1735--BOUNDARY--
1736''')
1737
1738
1739 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1740 eq = self.ndiffAssertEqual
1741 outer = MIMEBase('multipart', 'mixed')
1742 outer['Subject'] = 'A subject'
1743 outer['To'] = 'aperson@dom.ain'
1744 outer['From'] = 'bperson@dom.ain'
1745 outer.epilogue = '\n'
1746 msg = MIMEText('hello world')
1747 outer.attach(msg)
1748 outer.set_boundary('BOUNDARY')
1749 eq(outer.as_string(), '''\
1750Content-Type: multipart/mixed; boundary="BOUNDARY"
1751MIME-Version: 1.0
1752Subject: A subject
1753To: aperson@dom.ain
1754From: bperson@dom.ain
1755
1756--BOUNDARY
1757Content-Type: text/plain; charset="us-ascii"
1758MIME-Version: 1.0
1759Content-Transfer-Encoding: 7bit
1760
1761hello world
1762--BOUNDARY--
1763
1764''')
1765
1766 def test_message_external_body(self):
1767 eq = self.assertEqual
1768 msg = self._msgobj('msg_36.txt')
1769 eq(len(msg.get_payload()), 2)
1770 msg1 = msg.get_payload(1)
1771 eq(msg1.get_content_type(), 'multipart/alternative')
1772 eq(len(msg1.get_payload()), 2)
1773 for subpart in msg1.get_payload():
1774 eq(subpart.get_content_type(), 'message/external-body')
1775 eq(len(subpart.get_payload()), 1)
1776 subsubpart = subpart.get_payload(0)
1777 eq(subsubpart.get_content_type(), 'text/plain')
1778
1779 def test_double_boundary(self):
1780 # msg_37.txt is a multipart that contains two dash-boundary's in a
1781 # row. Our interpretation of RFC 2046 calls for ignoring the second
1782 # and subsequent boundaries.
1783 msg = self._msgobj('msg_37.txt')
1784 self.assertEqual(len(msg.get_payload()), 3)
1785
1786 def test_nested_inner_contains_outer_boundary(self):
1787 eq = self.ndiffAssertEqual
1788 # msg_38.txt has an inner part that contains outer boundaries. My
1789 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1790 # these are illegal and should be interpreted as unterminated inner
1791 # parts.
1792 msg = self._msgobj('msg_38.txt')
1793 sfp = StringIO()
1794 iterators._structure(msg, sfp)
1795 eq(sfp.getvalue(), """\
1796multipart/mixed
1797 multipart/mixed
1798 multipart/alternative
1799 text/plain
1800 text/plain
1801 text/plain
1802 text/plain
1803""")
1804
1805 def test_nested_with_same_boundary(self):
1806 eq = self.ndiffAssertEqual
1807 # msg 39.txt is similarly evil in that it's got inner parts that use
1808 # the same boundary as outer parts. Again, I believe the way this is
1809 # parsed is closest to the spirit of RFC 2046
1810 msg = self._msgobj('msg_39.txt')
1811 sfp = StringIO()
1812 iterators._structure(msg, sfp)
1813 eq(sfp.getvalue(), """\
1814multipart/mixed
1815 multipart/mixed
1816 multipart/alternative
1817 application/octet-stream
1818 application/octet-stream
1819 text/plain
1820""")
1821
1822 def test_boundary_in_non_multipart(self):
1823 msg = self._msgobj('msg_40.txt')
1824 self.assertEqual(msg.as_string(), '''\
1825MIME-Version: 1.0
1826Content-Type: text/html; boundary="--961284236552522269"
1827
1828----961284236552522269
1829Content-Type: text/html;
1830Content-Transfer-Encoding: 7Bit
1831
1832<html></html>
1833
1834----961284236552522269--
1835''')
1836
1837 def test_boundary_with_leading_space(self):
1838 eq = self.assertEqual
1839 msg = email.message_from_string('''\
1840MIME-Version: 1.0
1841Content-Type: multipart/mixed; boundary=" XXXX"
1842
1843-- XXXX
1844Content-Type: text/plain
1845
1846
1847-- XXXX
1848Content-Type: text/plain
1849
1850-- XXXX--
1851''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001852 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001853 eq(msg.get_boundary(), ' XXXX')
1854 eq(len(msg.get_payload()), 2)
1855
1856 def test_boundary_without_trailing_newline(self):
1857 m = Parser().parsestr("""\
1858Content-Type: multipart/mixed; boundary="===============0012394164=="
1859MIME-Version: 1.0
1860
1861--===============0012394164==
1862Content-Type: image/file1.jpg
1863MIME-Version: 1.0
1864Content-Transfer-Encoding: base64
1865
1866YXNkZg==
1867--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001868 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001869
1870
Ezio Melottib3aedd42010-11-20 19:04:17 +00001871
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001872# Test some badly formatted messages
1873class TestNonConformant(TestEmailBase):
1874 def test_parse_missing_minor_type(self):
1875 eq = self.assertEqual
1876 msg = self._msgobj('msg_14.txt')
1877 eq(msg.get_content_type(), 'text/plain')
1878 eq(msg.get_content_maintype(), 'text')
1879 eq(msg.get_content_subtype(), 'plain')
1880
1881 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001882 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001883 msg = self._msgobj('msg_15.txt')
1884 # XXX We can probably eventually do better
1885 inner = msg.get_payload(0)
1886 unless(hasattr(inner, 'defects'))
1887 self.assertEqual(len(inner.defects), 1)
1888 unless(isinstance(inner.defects[0],
1889 errors.StartBoundaryNotFoundDefect))
1890
1891 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001892 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001893 msg = self._msgobj('msg_25.txt')
1894 unless(isinstance(msg.get_payload(), str))
1895 self.assertEqual(len(msg.defects), 2)
1896 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1897 unless(isinstance(msg.defects[1],
1898 errors.MultipartInvariantViolationDefect))
1899
1900 def test_invalid_content_type(self):
1901 eq = self.assertEqual
1902 neq = self.ndiffAssertEqual
1903 msg = Message()
1904 # RFC 2045, $5.2 says invalid yields text/plain
1905 msg['Content-Type'] = 'text'
1906 eq(msg.get_content_maintype(), 'text')
1907 eq(msg.get_content_subtype(), 'plain')
1908 eq(msg.get_content_type(), 'text/plain')
1909 # Clear the old value and try something /really/ invalid
1910 del msg['content-type']
1911 msg['Content-Type'] = 'foo'
1912 eq(msg.get_content_maintype(), 'text')
1913 eq(msg.get_content_subtype(), 'plain')
1914 eq(msg.get_content_type(), 'text/plain')
1915 # Still, make sure that the message is idempotently generated
1916 s = StringIO()
1917 g = Generator(s)
1918 g.flatten(msg)
1919 neq(s.getvalue(), 'Content-Type: foo\n\n')
1920
1921 def test_no_start_boundary(self):
1922 eq = self.ndiffAssertEqual
1923 msg = self._msgobj('msg_31.txt')
1924 eq(msg.get_payload(), """\
1925--BOUNDARY
1926Content-Type: text/plain
1927
1928message 1
1929
1930--BOUNDARY
1931Content-Type: text/plain
1932
1933message 2
1934
1935--BOUNDARY--
1936""")
1937
1938 def test_no_separating_blank_line(self):
1939 eq = self.ndiffAssertEqual
1940 msg = self._msgobj('msg_35.txt')
1941 eq(msg.as_string(), """\
1942From: aperson@dom.ain
1943To: bperson@dom.ain
1944Subject: here's something interesting
1945
1946counter to RFC 2822, there's no separating newline here
1947""")
1948
1949 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001950 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001951 msg = self._msgobj('msg_41.txt')
1952 unless(hasattr(msg, 'defects'))
1953 self.assertEqual(len(msg.defects), 2)
1954 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1955 unless(isinstance(msg.defects[1],
1956 errors.MultipartInvariantViolationDefect))
1957
1958 def test_missing_start_boundary(self):
1959 outer = self._msgobj('msg_42.txt')
1960 # The message structure is:
1961 #
1962 # multipart/mixed
1963 # text/plain
1964 # message/rfc822
1965 # multipart/mixed [*]
1966 #
1967 # [*] This message is missing its start boundary
1968 bad = outer.get_payload(1).get_payload(0)
1969 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001970 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001971 errors.StartBoundaryNotFoundDefect))
1972
1973 def test_first_line_is_continuation_header(self):
1974 eq = self.assertEqual
1975 m = ' Line 1\nLine 2\nLine 3'
1976 msg = email.message_from_string(m)
1977 eq(msg.keys(), [])
1978 eq(msg.get_payload(), 'Line 2\nLine 3')
1979 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001980 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001981 errors.FirstHeaderLineIsContinuationDefect))
1982 eq(msg.defects[0].line, ' Line 1\n')
1983
1984
Ezio Melottib3aedd42010-11-20 19:04:17 +00001985
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001986# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001987class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988 def test_rfc2047_multiline(self):
1989 eq = self.assertEqual
1990 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1991 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1992 dh = decode_header(s)
1993 eq(dh, [
1994 (b'Re:', None),
1995 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1996 (b'baz foo bar', None),
1997 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1998 header = make_header(dh)
1999 eq(str(header),
2000 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002001 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002002Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2003 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004
2005 def test_whitespace_eater_unicode(self):
2006 eq = self.assertEqual
2007 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2008 dh = decode_header(s)
2009 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2010 (b'Pirard <pirard@dom.ain>', None)])
2011 header = str(make_header(dh))
2012 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2013
2014 def test_whitespace_eater_unicode_2(self):
2015 eq = self.assertEqual
2016 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2017 dh = decode_header(s)
2018 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2019 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2020 hu = str(make_header(dh))
2021 eq(hu, 'The quick brown fox jumped over the lazy dog')
2022
2023 def test_rfc2047_missing_whitespace(self):
2024 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2025 dh = decode_header(s)
2026 self.assertEqual(dh, [(s, None)])
2027
2028 def test_rfc2047_with_whitespace(self):
2029 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2030 dh = decode_header(s)
2031 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2032 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2033 (b'sbord', None)])
2034
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002035 def test_rfc2047_B_bad_padding(self):
2036 s = '=?iso-8859-1?B?%s?='
2037 data = [ # only test complete bytes
2038 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2039 ('dmk=', b'vi'), ('dmk', b'vi')
2040 ]
2041 for q, a in data:
2042 dh = decode_header(s % q)
2043 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002044
R. David Murray31e984c2010-10-01 15:40:20 +00002045 def test_rfc2047_Q_invalid_digits(self):
2046 # issue 10004.
2047 s = '=?iso-8659-1?Q?andr=e9=zz?='
2048 self.assertEqual(decode_header(s),
2049 [(b'andr\xe9=zz', 'iso-8659-1')])
2050
Ezio Melottib3aedd42010-11-20 19:04:17 +00002051
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002052# Test the MIMEMessage class
2053class TestMIMEMessage(TestEmailBase):
2054 def setUp(self):
2055 with openfile('msg_11.txt') as fp:
2056 self._text = fp.read()
2057
2058 def test_type_error(self):
2059 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2060
2061 def test_valid_argument(self):
2062 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002063 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002064 subject = 'A sub-message'
2065 m = Message()
2066 m['Subject'] = subject
2067 r = MIMEMessage(m)
2068 eq(r.get_content_type(), 'message/rfc822')
2069 payload = r.get_payload()
2070 unless(isinstance(payload, list))
2071 eq(len(payload), 1)
2072 subpart = payload[0]
2073 unless(subpart is m)
2074 eq(subpart['subject'], subject)
2075
2076 def test_bad_multipart(self):
2077 eq = self.assertEqual
2078 msg1 = Message()
2079 msg1['Subject'] = 'subpart 1'
2080 msg2 = Message()
2081 msg2['Subject'] = 'subpart 2'
2082 r = MIMEMessage(msg1)
2083 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2084
2085 def test_generate(self):
2086 # First craft the message to be encapsulated
2087 m = Message()
2088 m['Subject'] = 'An enclosed message'
2089 m.set_payload('Here is the body of the message.\n')
2090 r = MIMEMessage(m)
2091 r['Subject'] = 'The enclosing message'
2092 s = StringIO()
2093 g = Generator(s)
2094 g.flatten(r)
2095 self.assertEqual(s.getvalue(), """\
2096Content-Type: message/rfc822
2097MIME-Version: 1.0
2098Subject: The enclosing message
2099
2100Subject: An enclosed message
2101
2102Here is the body of the message.
2103""")
2104
2105 def test_parse_message_rfc822(self):
2106 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002107 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108 msg = self._msgobj('msg_11.txt')
2109 eq(msg.get_content_type(), 'message/rfc822')
2110 payload = msg.get_payload()
2111 unless(isinstance(payload, list))
2112 eq(len(payload), 1)
2113 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002114 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002115 eq(submsg['subject'], 'An enclosed message')
2116 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2117
2118 def test_dsn(self):
2119 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002120 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002121 # msg 16 is a Delivery Status Notification, see RFC 1894
2122 msg = self._msgobj('msg_16.txt')
2123 eq(msg.get_content_type(), 'multipart/report')
2124 unless(msg.is_multipart())
2125 eq(len(msg.get_payload()), 3)
2126 # Subpart 1 is a text/plain, human readable section
2127 subpart = msg.get_payload(0)
2128 eq(subpart.get_content_type(), 'text/plain')
2129 eq(subpart.get_payload(), """\
2130This report relates to a message you sent with the following header fields:
2131
2132 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2133 Date: Sun, 23 Sep 2001 20:10:55 -0700
2134 From: "Ian T. Henry" <henryi@oxy.edu>
2135 To: SoCal Raves <scr@socal-raves.org>
2136 Subject: [scr] yeah for Ians!!
2137
2138Your message cannot be delivered to the following recipients:
2139
2140 Recipient address: jangel1@cougar.noc.ucla.edu
2141 Reason: recipient reached disk quota
2142
2143""")
2144 # Subpart 2 contains the machine parsable DSN information. It
2145 # consists of two blocks of headers, represented by two nested Message
2146 # objects.
2147 subpart = msg.get_payload(1)
2148 eq(subpart.get_content_type(), 'message/delivery-status')
2149 eq(len(subpart.get_payload()), 2)
2150 # message/delivery-status should treat each block as a bunch of
2151 # headers, i.e. a bunch of Message objects.
2152 dsn1 = subpart.get_payload(0)
2153 unless(isinstance(dsn1, Message))
2154 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2155 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2156 # Try a missing one <wink>
2157 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2158 dsn2 = subpart.get_payload(1)
2159 unless(isinstance(dsn2, Message))
2160 eq(dsn2['action'], 'failed')
2161 eq(dsn2.get_params(header='original-recipient'),
2162 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2163 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2164 # Subpart 3 is the original message
2165 subpart = msg.get_payload(2)
2166 eq(subpart.get_content_type(), 'message/rfc822')
2167 payload = subpart.get_payload()
2168 unless(isinstance(payload, list))
2169 eq(len(payload), 1)
2170 subsubpart = payload[0]
2171 unless(isinstance(subsubpart, Message))
2172 eq(subsubpart.get_content_type(), 'text/plain')
2173 eq(subsubpart['message-id'],
2174 '<002001c144a6$8752e060$56104586@oxy.edu>')
2175
2176 def test_epilogue(self):
2177 eq = self.ndiffAssertEqual
2178 with openfile('msg_21.txt') as fp:
2179 text = fp.read()
2180 msg = Message()
2181 msg['From'] = 'aperson@dom.ain'
2182 msg['To'] = 'bperson@dom.ain'
2183 msg['Subject'] = 'Test'
2184 msg.preamble = 'MIME message'
2185 msg.epilogue = 'End of MIME message\n'
2186 msg1 = MIMEText('One')
2187 msg2 = MIMEText('Two')
2188 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2189 msg.attach(msg1)
2190 msg.attach(msg2)
2191 sfp = StringIO()
2192 g = Generator(sfp)
2193 g.flatten(msg)
2194 eq(sfp.getvalue(), text)
2195
2196 def test_no_nl_preamble(self):
2197 eq = self.ndiffAssertEqual
2198 msg = Message()
2199 msg['From'] = 'aperson@dom.ain'
2200 msg['To'] = 'bperson@dom.ain'
2201 msg['Subject'] = 'Test'
2202 msg.preamble = 'MIME message'
2203 msg.epilogue = ''
2204 msg1 = MIMEText('One')
2205 msg2 = MIMEText('Two')
2206 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2207 msg.attach(msg1)
2208 msg.attach(msg2)
2209 eq(msg.as_string(), """\
2210From: aperson@dom.ain
2211To: bperson@dom.ain
2212Subject: Test
2213Content-Type: multipart/mixed; boundary="BOUNDARY"
2214
2215MIME message
2216--BOUNDARY
2217Content-Type: text/plain; charset="us-ascii"
2218MIME-Version: 1.0
2219Content-Transfer-Encoding: 7bit
2220
2221One
2222--BOUNDARY
2223Content-Type: text/plain; charset="us-ascii"
2224MIME-Version: 1.0
2225Content-Transfer-Encoding: 7bit
2226
2227Two
2228--BOUNDARY--
2229""")
2230
2231 def test_default_type(self):
2232 eq = self.assertEqual
2233 with openfile('msg_30.txt') as fp:
2234 msg = email.message_from_file(fp)
2235 container1 = msg.get_payload(0)
2236 eq(container1.get_default_type(), 'message/rfc822')
2237 eq(container1.get_content_type(), 'message/rfc822')
2238 container2 = msg.get_payload(1)
2239 eq(container2.get_default_type(), 'message/rfc822')
2240 eq(container2.get_content_type(), 'message/rfc822')
2241 container1a = container1.get_payload(0)
2242 eq(container1a.get_default_type(), 'text/plain')
2243 eq(container1a.get_content_type(), 'text/plain')
2244 container2a = container2.get_payload(0)
2245 eq(container2a.get_default_type(), 'text/plain')
2246 eq(container2a.get_content_type(), 'text/plain')
2247
2248 def test_default_type_with_explicit_container_type(self):
2249 eq = self.assertEqual
2250 with openfile('msg_28.txt') as fp:
2251 msg = email.message_from_file(fp)
2252 container1 = msg.get_payload(0)
2253 eq(container1.get_default_type(), 'message/rfc822')
2254 eq(container1.get_content_type(), 'message/rfc822')
2255 container2 = msg.get_payload(1)
2256 eq(container2.get_default_type(), 'message/rfc822')
2257 eq(container2.get_content_type(), 'message/rfc822')
2258 container1a = container1.get_payload(0)
2259 eq(container1a.get_default_type(), 'text/plain')
2260 eq(container1a.get_content_type(), 'text/plain')
2261 container2a = container2.get_payload(0)
2262 eq(container2a.get_default_type(), 'text/plain')
2263 eq(container2a.get_content_type(), 'text/plain')
2264
2265 def test_default_type_non_parsed(self):
2266 eq = self.assertEqual
2267 neq = self.ndiffAssertEqual
2268 # Set up container
2269 container = MIMEMultipart('digest', 'BOUNDARY')
2270 container.epilogue = ''
2271 # Set up subparts
2272 subpart1a = MIMEText('message 1\n')
2273 subpart2a = MIMEText('message 2\n')
2274 subpart1 = MIMEMessage(subpart1a)
2275 subpart2 = MIMEMessage(subpart2a)
2276 container.attach(subpart1)
2277 container.attach(subpart2)
2278 eq(subpart1.get_content_type(), 'message/rfc822')
2279 eq(subpart1.get_default_type(), 'message/rfc822')
2280 eq(subpart2.get_content_type(), 'message/rfc822')
2281 eq(subpart2.get_default_type(), 'message/rfc822')
2282 neq(container.as_string(0), '''\
2283Content-Type: multipart/digest; boundary="BOUNDARY"
2284MIME-Version: 1.0
2285
2286--BOUNDARY
2287Content-Type: message/rfc822
2288MIME-Version: 1.0
2289
2290Content-Type: text/plain; charset="us-ascii"
2291MIME-Version: 1.0
2292Content-Transfer-Encoding: 7bit
2293
2294message 1
2295
2296--BOUNDARY
2297Content-Type: message/rfc822
2298MIME-Version: 1.0
2299
2300Content-Type: text/plain; charset="us-ascii"
2301MIME-Version: 1.0
2302Content-Transfer-Encoding: 7bit
2303
2304message 2
2305
2306--BOUNDARY--
2307''')
2308 del subpart1['content-type']
2309 del subpart1['mime-version']
2310 del subpart2['content-type']
2311 del subpart2['mime-version']
2312 eq(subpart1.get_content_type(), 'message/rfc822')
2313 eq(subpart1.get_default_type(), 'message/rfc822')
2314 eq(subpart2.get_content_type(), 'message/rfc822')
2315 eq(subpart2.get_default_type(), 'message/rfc822')
2316 neq(container.as_string(0), '''\
2317Content-Type: multipart/digest; boundary="BOUNDARY"
2318MIME-Version: 1.0
2319
2320--BOUNDARY
2321
2322Content-Type: text/plain; charset="us-ascii"
2323MIME-Version: 1.0
2324Content-Transfer-Encoding: 7bit
2325
2326message 1
2327
2328--BOUNDARY
2329
2330Content-Type: text/plain; charset="us-ascii"
2331MIME-Version: 1.0
2332Content-Transfer-Encoding: 7bit
2333
2334message 2
2335
2336--BOUNDARY--
2337''')
2338
2339 def test_mime_attachments_in_constructor(self):
2340 eq = self.assertEqual
2341 text1 = MIMEText('')
2342 text2 = MIMEText('')
2343 msg = MIMEMultipart(_subparts=(text1, text2))
2344 eq(len(msg.get_payload()), 2)
2345 eq(msg.get_payload(0), text1)
2346 eq(msg.get_payload(1), text2)
2347
Christian Heimes587c2bf2008-01-19 16:21:02 +00002348 def test_default_multipart_constructor(self):
2349 msg = MIMEMultipart()
2350 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002351
Ezio Melottib3aedd42010-11-20 19:04:17 +00002352
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002353# A general test of parser->model->generator idempotency. IOW, read a message
2354# in, parse it into a message object tree, then without touching the tree,
2355# regenerate the plain text. The original text and the transformed text
2356# should be identical. Note: that we ignore the Unix-From since that may
2357# contain a changed date.
2358class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002359
2360 linesep = '\n'
2361
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002362 def _msgobj(self, filename):
2363 with openfile(filename) as fp:
2364 data = fp.read()
2365 msg = email.message_from_string(data)
2366 return msg, data
2367
R. David Murray719a4492010-11-21 16:53:48 +00002368 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002369 eq = self.ndiffAssertEqual
2370 s = StringIO()
2371 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002372 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002373 eq(text, s.getvalue())
2374
2375 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002376 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002377 msg, text = self._msgobj('msg_01.txt')
2378 eq(msg.get_content_type(), 'text/plain')
2379 eq(msg.get_content_maintype(), 'text')
2380 eq(msg.get_content_subtype(), 'plain')
2381 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2382 eq(msg.get_param('charset'), 'us-ascii')
2383 eq(msg.preamble, None)
2384 eq(msg.epilogue, None)
2385 self._idempotent(msg, text)
2386
2387 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002388 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002389 msg, text = self._msgobj('msg_03.txt')
2390 eq(msg.get_content_type(), 'text/plain')
2391 eq(msg.get_params(), None)
2392 eq(msg.get_param('charset'), None)
2393 self._idempotent(msg, text)
2394
2395 def test_simple_multipart(self):
2396 msg, text = self._msgobj('msg_04.txt')
2397 self._idempotent(msg, text)
2398
2399 def test_MIME_digest(self):
2400 msg, text = self._msgobj('msg_02.txt')
2401 self._idempotent(msg, text)
2402
2403 def test_long_header(self):
2404 msg, text = self._msgobj('msg_27.txt')
2405 self._idempotent(msg, text)
2406
2407 def test_MIME_digest_with_part_headers(self):
2408 msg, text = self._msgobj('msg_28.txt')
2409 self._idempotent(msg, text)
2410
2411 def test_mixed_with_image(self):
2412 msg, text = self._msgobj('msg_06.txt')
2413 self._idempotent(msg, text)
2414
2415 def test_multipart_report(self):
2416 msg, text = self._msgobj('msg_05.txt')
2417 self._idempotent(msg, text)
2418
2419 def test_dsn(self):
2420 msg, text = self._msgobj('msg_16.txt')
2421 self._idempotent(msg, text)
2422
2423 def test_preamble_epilogue(self):
2424 msg, text = self._msgobj('msg_21.txt')
2425 self._idempotent(msg, text)
2426
2427 def test_multipart_one_part(self):
2428 msg, text = self._msgobj('msg_23.txt')
2429 self._idempotent(msg, text)
2430
2431 def test_multipart_no_parts(self):
2432 msg, text = self._msgobj('msg_24.txt')
2433 self._idempotent(msg, text)
2434
2435 def test_no_start_boundary(self):
2436 msg, text = self._msgobj('msg_31.txt')
2437 self._idempotent(msg, text)
2438
2439 def test_rfc2231_charset(self):
2440 msg, text = self._msgobj('msg_32.txt')
2441 self._idempotent(msg, text)
2442
2443 def test_more_rfc2231_parameters(self):
2444 msg, text = self._msgobj('msg_33.txt')
2445 self._idempotent(msg, text)
2446
2447 def test_text_plain_in_a_multipart_digest(self):
2448 msg, text = self._msgobj('msg_34.txt')
2449 self._idempotent(msg, text)
2450
2451 def test_nested_multipart_mixeds(self):
2452 msg, text = self._msgobj('msg_12a.txt')
2453 self._idempotent(msg, text)
2454
2455 def test_message_external_body_idempotent(self):
2456 msg, text = self._msgobj('msg_36.txt')
2457 self._idempotent(msg, text)
2458
R. David Murray719a4492010-11-21 16:53:48 +00002459 def test_message_delivery_status(self):
2460 msg, text = self._msgobj('msg_43.txt')
2461 self._idempotent(msg, text, unixfrom=True)
2462
R. David Murray96fd54e2010-10-08 15:55:28 +00002463 def test_message_signed_idempotent(self):
2464 msg, text = self._msgobj('msg_45.txt')
2465 self._idempotent(msg, text)
2466
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002467 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002468 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002469 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002470 # Get a message object and reset the seek pointer for other tests
2471 msg, text = self._msgobj('msg_05.txt')
2472 eq(msg.get_content_type(), 'multipart/report')
2473 # Test the Content-Type: parameters
2474 params = {}
2475 for pk, pv in msg.get_params():
2476 params[pk] = pv
2477 eq(params['report-type'], 'delivery-status')
2478 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002479 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2480 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002481 eq(len(msg.get_payload()), 3)
2482 # Make sure the subparts are what we expect
2483 msg1 = msg.get_payload(0)
2484 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002485 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002486 msg2 = msg.get_payload(1)
2487 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002488 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489 msg3 = msg.get_payload(2)
2490 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002491 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002492 payload = msg3.get_payload()
2493 unless(isinstance(payload, list))
2494 eq(len(payload), 1)
2495 msg4 = payload[0]
2496 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002497 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002498
2499 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002500 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002501 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002502 msg, text = self._msgobj('msg_06.txt')
2503 # Check some of the outer headers
2504 eq(msg.get_content_type(), 'message/rfc822')
2505 # Make sure the payload is a list of exactly one sub-Message, and that
2506 # that submessage has a type of text/plain
2507 payload = msg.get_payload()
2508 unless(isinstance(payload, list))
2509 eq(len(payload), 1)
2510 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002511 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002512 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002513 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002514 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002515
2516
Ezio Melottib3aedd42010-11-20 19:04:17 +00002517
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518# Test various other bits of the package's functionality
2519class TestMiscellaneous(TestEmailBase):
2520 def test_message_from_string(self):
2521 with openfile('msg_01.txt') as fp:
2522 text = fp.read()
2523 msg = email.message_from_string(text)
2524 s = StringIO()
2525 # Don't wrap/continue long headers since we're trying to test
2526 # idempotency.
2527 g = Generator(s, maxheaderlen=0)
2528 g.flatten(msg)
2529 self.assertEqual(text, s.getvalue())
2530
2531 def test_message_from_file(self):
2532 with openfile('msg_01.txt') as fp:
2533 text = fp.read()
2534 fp.seek(0)
2535 msg = email.message_from_file(fp)
2536 s = StringIO()
2537 # Don't wrap/continue long headers since we're trying to test
2538 # idempotency.
2539 g = Generator(s, maxheaderlen=0)
2540 g.flatten(msg)
2541 self.assertEqual(text, s.getvalue())
2542
2543 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002544 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002545 with openfile('msg_01.txt') as fp:
2546 text = fp.read()
2547
2548 # Create a subclass
2549 class MyMessage(Message):
2550 pass
2551
2552 msg = email.message_from_string(text, MyMessage)
2553 unless(isinstance(msg, MyMessage))
2554 # Try something more complicated
2555 with openfile('msg_02.txt') as fp:
2556 text = fp.read()
2557 msg = email.message_from_string(text, MyMessage)
2558 for subpart in msg.walk():
2559 unless(isinstance(subpart, MyMessage))
2560
2561 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002562 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002563 # Create a subclass
2564 class MyMessage(Message):
2565 pass
2566
2567 with openfile('msg_01.txt') as fp:
2568 msg = email.message_from_file(fp, MyMessage)
2569 unless(isinstance(msg, MyMessage))
2570 # Try something more complicated
2571 with openfile('msg_02.txt') as fp:
2572 msg = email.message_from_file(fp, MyMessage)
2573 for subpart in msg.walk():
2574 unless(isinstance(subpart, MyMessage))
2575
2576 def test__all__(self):
2577 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002578 self.assertEqual(sorted(module.__all__), [
2579 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2580 'generator', 'header', 'iterators', 'message',
2581 'message_from_binary_file', 'message_from_bytes',
2582 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002583 'quoprimime', 'utils',
2584 ])
2585
2586 def test_formatdate(self):
2587 now = time.time()
2588 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2589 time.gmtime(now)[:6])
2590
2591 def test_formatdate_localtime(self):
2592 now = time.time()
2593 self.assertEqual(
2594 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2595 time.localtime(now)[:6])
2596
2597 def test_formatdate_usegmt(self):
2598 now = time.time()
2599 self.assertEqual(
2600 utils.formatdate(now, localtime=False),
2601 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2602 self.assertEqual(
2603 utils.formatdate(now, localtime=False, usegmt=True),
2604 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2605
2606 def test_parsedate_none(self):
2607 self.assertEqual(utils.parsedate(''), None)
2608
2609 def test_parsedate_compact(self):
2610 # The FWS after the comma is optional
2611 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2612 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2613
2614 def test_parsedate_no_dayofweek(self):
2615 eq = self.assertEqual
2616 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2617 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2618
2619 def test_parsedate_compact_no_dayofweek(self):
2620 eq = self.assertEqual
2621 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2622 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2623
R. David Murray4a62e892010-12-23 20:35:46 +00002624 def test_parsedate_no_space_before_positive_offset(self):
2625 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2626 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2627
2628 def test_parsedate_no_space_before_negative_offset(self):
2629 # Issue 1155362: we already handled '+' for this case.
2630 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2631 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2632
2633
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002634 def test_parsedate_acceptable_to_time_functions(self):
2635 eq = self.assertEqual
2636 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2637 t = int(time.mktime(timetup))
2638 eq(time.localtime(t)[:6], timetup[:6])
2639 eq(int(time.strftime('%Y', timetup)), 2003)
2640 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2641 t = int(time.mktime(timetup[:9]))
2642 eq(time.localtime(t)[:6], timetup[:6])
2643 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2644
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002645 def test_mktime_tz(self):
2646 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2647 -1, -1, -1, 0)), 0)
2648 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2649 -1, -1, -1, 1234)), -1234)
2650
R. David Murray219d1c82010-08-25 00:45:55 +00002651 def test_parsedate_y2k(self):
2652 """Test for parsing a date with a two-digit year.
2653
2654 Parsing a date with a two-digit year should return the correct
2655 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2656 obsoletes RFC822) requires four-digit years.
2657
2658 """
2659 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2660 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2661 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2662 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2663
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002664 def test_parseaddr_empty(self):
2665 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2666 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2667
2668 def test_noquote_dump(self):
2669 self.assertEqual(
2670 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2671 'A Silly Person <person@dom.ain>')
2672
2673 def test_escape_dump(self):
2674 self.assertEqual(
2675 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2676 r'"A \(Very\) Silly Person" <person@dom.ain>')
2677 a = r'A \(Special\) Person'
2678 b = 'person@dom.ain'
2679 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2680
2681 def test_escape_backslashes(self):
2682 self.assertEqual(
2683 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2684 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2685 a = r'Arthur \Backslash\ Foobar'
2686 b = 'person@dom.ain'
2687 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2688
2689 def test_name_with_dot(self):
2690 x = 'John X. Doe <jxd@example.com>'
2691 y = '"John X. Doe" <jxd@example.com>'
2692 a, b = ('John X. Doe', 'jxd@example.com')
2693 self.assertEqual(utils.parseaddr(x), (a, b))
2694 self.assertEqual(utils.parseaddr(y), (a, b))
2695 # formataddr() quotes the name if there's a dot in it
2696 self.assertEqual(utils.formataddr((a, b)), y)
2697
R. David Murray5397e862010-10-02 15:58:26 +00002698 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2699 # issue 10005. Note that in the third test the second pair of
2700 # backslashes is not actually a quoted pair because it is not inside a
2701 # comment or quoted string: the address being parsed has a quoted
2702 # string containing a quoted backslash, followed by 'example' and two
2703 # backslashes, followed by another quoted string containing a space and
2704 # the word 'example'. parseaddr copies those two backslashes
2705 # literally. Per rfc5322 this is not technically correct since a \ may
2706 # not appear in an address outside of a quoted string. It is probably
2707 # a sensible Postel interpretation, though.
2708 eq = self.assertEqual
2709 eq(utils.parseaddr('""example" example"@example.com'),
2710 ('', '""example" example"@example.com'))
2711 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2712 ('', '"\\"example\\" example"@example.com'))
2713 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2714 ('', '"\\\\"example\\\\" example"@example.com'))
2715
R. David Murray63563cd2010-12-18 18:25:38 +00002716 def test_parseaddr_preserves_spaces_in_local_part(self):
2717 # issue 9286. A normal RFC5322 local part should not contain any
2718 # folding white space, but legacy local parts can (they are a sequence
2719 # of atoms, not dotatoms). On the other hand we strip whitespace from
2720 # before the @ and around dots, on the assumption that the whitespace
2721 # around the punctuation is a mistake in what would otherwise be
2722 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2723 self.assertEqual(('', "merwok wok@xample.com"),
2724 utils.parseaddr("merwok wok@xample.com"))
2725 self.assertEqual(('', "merwok wok@xample.com"),
2726 utils.parseaddr("merwok wok@xample.com"))
2727 self.assertEqual(('', "merwok wok@xample.com"),
2728 utils.parseaddr(" merwok wok @xample.com"))
2729 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2730 utils.parseaddr('merwok"wok" wok@xample.com'))
2731 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2732 utils.parseaddr('merwok. wok . wok@xample.com'))
2733
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002734 def test_multiline_from_comment(self):
2735 x = """\
2736Foo
2737\tBar <foo@example.com>"""
2738 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2739
2740 def test_quote_dump(self):
2741 self.assertEqual(
2742 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2743 r'"A Silly; Person" <person@dom.ain>')
2744
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002745 def test_charset_richcomparisons(self):
2746 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002747 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002748 cset1 = Charset()
2749 cset2 = Charset()
2750 eq(cset1, 'us-ascii')
2751 eq(cset1, 'US-ASCII')
2752 eq(cset1, 'Us-AsCiI')
2753 eq('us-ascii', cset1)
2754 eq('US-ASCII', cset1)
2755 eq('Us-AsCiI', cset1)
2756 ne(cset1, 'usascii')
2757 ne(cset1, 'USASCII')
2758 ne(cset1, 'UsAsCiI')
2759 ne('usascii', cset1)
2760 ne('USASCII', cset1)
2761 ne('UsAsCiI', cset1)
2762 eq(cset1, cset2)
2763 eq(cset2, cset1)
2764
2765 def test_getaddresses(self):
2766 eq = self.assertEqual
2767 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2768 'Bud Person <bperson@dom.ain>']),
2769 [('Al Person', 'aperson@dom.ain'),
2770 ('Bud Person', 'bperson@dom.ain')])
2771
2772 def test_getaddresses_nasty(self):
2773 eq = self.assertEqual
2774 eq(utils.getaddresses(['foo: ;']), [('', '')])
2775 eq(utils.getaddresses(
2776 ['[]*-- =~$']),
2777 [('', ''), ('', ''), ('', '*--')])
2778 eq(utils.getaddresses(
2779 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2780 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2781
2782 def test_getaddresses_embedded_comment(self):
2783 """Test proper handling of a nested comment"""
2784 eq = self.assertEqual
2785 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2786 eq(addrs[0][1], 'foo@bar.com')
2787
2788 def test_utils_quote_unquote(self):
2789 eq = self.assertEqual
2790 msg = Message()
2791 msg.add_header('content-disposition', 'attachment',
2792 filename='foo\\wacky"name')
2793 eq(msg.get_filename(), 'foo\\wacky"name')
2794
2795 def test_get_body_encoding_with_bogus_charset(self):
2796 charset = Charset('not a charset')
2797 self.assertEqual(charset.get_body_encoding(), 'base64')
2798
2799 def test_get_body_encoding_with_uppercase_charset(self):
2800 eq = self.assertEqual
2801 msg = Message()
2802 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2803 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2804 charsets = msg.get_charsets()
2805 eq(len(charsets), 1)
2806 eq(charsets[0], 'utf-8')
2807 charset = Charset(charsets[0])
2808 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002809 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002810 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2811 eq(msg.get_payload(decode=True), b'hello world')
2812 eq(msg['content-transfer-encoding'], 'base64')
2813 # Try another one
2814 msg = Message()
2815 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2816 charsets = msg.get_charsets()
2817 eq(len(charsets), 1)
2818 eq(charsets[0], 'us-ascii')
2819 charset = Charset(charsets[0])
2820 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2821 msg.set_payload('hello world', charset=charset)
2822 eq(msg.get_payload(), 'hello world')
2823 eq(msg['content-transfer-encoding'], '7bit')
2824
2825 def test_charsets_case_insensitive(self):
2826 lc = Charset('us-ascii')
2827 uc = Charset('US-ASCII')
2828 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2829
2830 def test_partial_falls_inside_message_delivery_status(self):
2831 eq = self.ndiffAssertEqual
2832 # The Parser interface provides chunks of data to FeedParser in 8192
2833 # byte gulps. SF bug #1076485 found one of those chunks inside
2834 # message/delivery-status header block, which triggered an
2835 # unreadline() of NeedMoreData.
2836 msg = self._msgobj('msg_43.txt')
2837 sfp = StringIO()
2838 iterators._structure(msg, sfp)
2839 eq(sfp.getvalue(), """\
2840multipart/report
2841 text/plain
2842 message/delivery-status
2843 text/plain
2844 text/plain
2845 text/plain
2846 text/plain
2847 text/plain
2848 text/plain
2849 text/plain
2850 text/plain
2851 text/plain
2852 text/plain
2853 text/plain
2854 text/plain
2855 text/plain
2856 text/plain
2857 text/plain
2858 text/plain
2859 text/plain
2860 text/plain
2861 text/plain
2862 text/plain
2863 text/plain
2864 text/plain
2865 text/plain
2866 text/plain
2867 text/plain
2868 text/plain
2869 text/rfc822-headers
2870""")
2871
R. David Murraya0b44b52010-12-02 21:47:19 +00002872 def test_make_msgid_domain(self):
2873 self.assertEqual(
2874 email.utils.make_msgid(domain='testdomain-string')[-19:],
2875 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002876
Ezio Melottib3aedd42010-11-20 19:04:17 +00002877
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002878# Test the iterator/generators
2879class TestIterators(TestEmailBase):
2880 def test_body_line_iterator(self):
2881 eq = self.assertEqual
2882 neq = self.ndiffAssertEqual
2883 # First a simple non-multipart message
2884 msg = self._msgobj('msg_01.txt')
2885 it = iterators.body_line_iterator(msg)
2886 lines = list(it)
2887 eq(len(lines), 6)
2888 neq(EMPTYSTRING.join(lines), msg.get_payload())
2889 # Now a more complicated multipart
2890 msg = self._msgobj('msg_02.txt')
2891 it = iterators.body_line_iterator(msg)
2892 lines = list(it)
2893 eq(len(lines), 43)
2894 with openfile('msg_19.txt') as fp:
2895 neq(EMPTYSTRING.join(lines), fp.read())
2896
2897 def test_typed_subpart_iterator(self):
2898 eq = self.assertEqual
2899 msg = self._msgobj('msg_04.txt')
2900 it = iterators.typed_subpart_iterator(msg, 'text')
2901 lines = []
2902 subparts = 0
2903 for subpart in it:
2904 subparts += 1
2905 lines.append(subpart.get_payload())
2906 eq(subparts, 2)
2907 eq(EMPTYSTRING.join(lines), """\
2908a simple kind of mirror
2909to reflect upon our own
2910a simple kind of mirror
2911to reflect upon our own
2912""")
2913
2914 def test_typed_subpart_iterator_default_type(self):
2915 eq = self.assertEqual
2916 msg = self._msgobj('msg_03.txt')
2917 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2918 lines = []
2919 subparts = 0
2920 for subpart in it:
2921 subparts += 1
2922 lines.append(subpart.get_payload())
2923 eq(subparts, 1)
2924 eq(EMPTYSTRING.join(lines), """\
2925
2926Hi,
2927
2928Do you like this message?
2929
2930-Me
2931""")
2932
R. David Murray45bf773f2010-07-17 01:19:57 +00002933 def test_pushCR_LF(self):
2934 '''FeedParser BufferedSubFile.push() assumed it received complete
2935 line endings. A CR ending one push() followed by a LF starting
2936 the next push() added an empty line.
2937 '''
2938 imt = [
2939 ("a\r \n", 2),
2940 ("b", 0),
2941 ("c\n", 1),
2942 ("", 0),
2943 ("d\r\n", 1),
2944 ("e\r", 0),
2945 ("\nf", 1),
2946 ("\r\n", 1),
2947 ]
2948 from email.feedparser import BufferedSubFile, NeedMoreData
2949 bsf = BufferedSubFile()
2950 om = []
2951 nt = 0
2952 for il, n in imt:
2953 bsf.push(il)
2954 nt += n
2955 n1 = 0
2956 while True:
2957 ol = bsf.readline()
2958 if ol == NeedMoreData:
2959 break
2960 om.append(ol)
2961 n1 += 1
2962 self.assertTrue(n == n1)
2963 self.assertTrue(len(om) == nt)
2964 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2965
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002966
Ezio Melottib3aedd42010-11-20 19:04:17 +00002967
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002968class TestParsers(TestEmailBase):
2969 def test_header_parser(self):
2970 eq = self.assertEqual
2971 # Parse only the headers of a complex multipart MIME document
2972 with openfile('msg_02.txt') as fp:
2973 msg = HeaderParser().parse(fp)
2974 eq(msg['from'], 'ppp-request@zzz.org')
2975 eq(msg['to'], 'ppp@zzz.org')
2976 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002977 self.assertFalse(msg.is_multipart())
2978 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002979
2980 def test_whitespace_continuation(self):
2981 eq = self.assertEqual
2982 # This message contains a line after the Subject: header that has only
2983 # whitespace, but it is not empty!
2984 msg = email.message_from_string("""\
2985From: aperson@dom.ain
2986To: bperson@dom.ain
2987Subject: the next line has a space on it
2988\x20
2989Date: Mon, 8 Apr 2002 15:09:19 -0400
2990Message-ID: spam
2991
2992Here's the message body
2993""")
2994 eq(msg['subject'], 'the next line has a space on it\n ')
2995 eq(msg['message-id'], 'spam')
2996 eq(msg.get_payload(), "Here's the message body\n")
2997
2998 def test_whitespace_continuation_last_header(self):
2999 eq = self.assertEqual
3000 # Like the previous test, but the subject line is the last
3001 # header.
3002 msg = email.message_from_string("""\
3003From: aperson@dom.ain
3004To: bperson@dom.ain
3005Date: Mon, 8 Apr 2002 15:09:19 -0400
3006Message-ID: spam
3007Subject: the next line has a space on it
3008\x20
3009
3010Here's the message body
3011""")
3012 eq(msg['subject'], 'the next line has a space on it\n ')
3013 eq(msg['message-id'], 'spam')
3014 eq(msg.get_payload(), "Here's the message body\n")
3015
3016 def test_crlf_separation(self):
3017 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003018 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003019 msg = Parser().parse(fp)
3020 eq(len(msg.get_payload()), 2)
3021 part1 = msg.get_payload(0)
3022 eq(part1.get_content_type(), 'text/plain')
3023 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3024 part2 = msg.get_payload(1)
3025 eq(part2.get_content_type(), 'application/riscos')
3026
R. David Murray8451c4b2010-10-23 22:19:56 +00003027 def test_crlf_flatten(self):
3028 # Using newline='\n' preserves the crlfs in this input file.
3029 with openfile('msg_26.txt', newline='\n') as fp:
3030 text = fp.read()
3031 msg = email.message_from_string(text)
3032 s = StringIO()
3033 g = Generator(s)
3034 g.flatten(msg, linesep='\r\n')
3035 self.assertEqual(s.getvalue(), text)
3036
3037 maxDiff = None
3038
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039 def test_multipart_digest_with_extra_mime_headers(self):
3040 eq = self.assertEqual
3041 neq = self.ndiffAssertEqual
3042 with openfile('msg_28.txt') as fp:
3043 msg = email.message_from_file(fp)
3044 # Structure is:
3045 # multipart/digest
3046 # message/rfc822
3047 # text/plain
3048 # message/rfc822
3049 # text/plain
3050 eq(msg.is_multipart(), 1)
3051 eq(len(msg.get_payload()), 2)
3052 part1 = msg.get_payload(0)
3053 eq(part1.get_content_type(), 'message/rfc822')
3054 eq(part1.is_multipart(), 1)
3055 eq(len(part1.get_payload()), 1)
3056 part1a = part1.get_payload(0)
3057 eq(part1a.is_multipart(), 0)
3058 eq(part1a.get_content_type(), 'text/plain')
3059 neq(part1a.get_payload(), 'message 1\n')
3060 # next message/rfc822
3061 part2 = msg.get_payload(1)
3062 eq(part2.get_content_type(), 'message/rfc822')
3063 eq(part2.is_multipart(), 1)
3064 eq(len(part2.get_payload()), 1)
3065 part2a = part2.get_payload(0)
3066 eq(part2a.is_multipart(), 0)
3067 eq(part2a.get_content_type(), 'text/plain')
3068 neq(part2a.get_payload(), 'message 2\n')
3069
3070 def test_three_lines(self):
3071 # A bug report by Andrew McNamara
3072 lines = ['From: Andrew Person <aperson@dom.ain',
3073 'Subject: Test',
3074 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3075 msg = email.message_from_string(NL.join(lines))
3076 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3077
3078 def test_strip_line_feed_and_carriage_return_in_headers(self):
3079 eq = self.assertEqual
3080 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3081 value1 = 'text'
3082 value2 = 'more text'
3083 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3084 value1, value2)
3085 msg = email.message_from_string(m)
3086 eq(msg.get('Header'), value1)
3087 eq(msg.get('Next-Header'), value2)
3088
3089 def test_rfc2822_header_syntax(self):
3090 eq = self.assertEqual
3091 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3092 msg = email.message_from_string(m)
3093 eq(len(msg), 3)
3094 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3095 eq(msg.get_payload(), 'body')
3096
3097 def test_rfc2822_space_not_allowed_in_header(self):
3098 eq = self.assertEqual
3099 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3100 msg = email.message_from_string(m)
3101 eq(len(msg.keys()), 0)
3102
3103 def test_rfc2822_one_character_header(self):
3104 eq = self.assertEqual
3105 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3106 msg = email.message_from_string(m)
3107 headers = msg.keys()
3108 headers.sort()
3109 eq(headers, ['A', 'B', 'CC'])
3110 eq(msg.get_payload(), 'body')
3111
R. David Murray45e0e142010-06-16 02:19:40 +00003112 def test_CRLFLF_at_end_of_part(self):
3113 # issue 5610: feedparser should not eat two chars from body part ending
3114 # with "\r\n\n".
3115 m = (
3116 "From: foo@bar.com\n"
3117 "To: baz\n"
3118 "Mime-Version: 1.0\n"
3119 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3120 "\n"
3121 "--BOUNDARY\n"
3122 "Content-Type: text/plain\n"
3123 "\n"
3124 "body ending with CRLF newline\r\n"
3125 "\n"
3126 "--BOUNDARY--\n"
3127 )
3128 msg = email.message_from_string(m)
3129 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003130
Ezio Melottib3aedd42010-11-20 19:04:17 +00003131
R. David Murray96fd54e2010-10-08 15:55:28 +00003132class Test8BitBytesHandling(unittest.TestCase):
3133 # In Python3 all input is string, but that doesn't work if the actual input
3134 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3135 # decode byte streams using the surrogateescape error handler, and
3136 # reconvert to binary at appropriate places if we detect surrogates. This
3137 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3138 # but it does allow us to parse and preserve them, and to decode body
3139 # parts that use an 8bit CTE.
3140
3141 bodytest_msg = textwrap.dedent("""\
3142 From: foo@bar.com
3143 To: baz
3144 Mime-Version: 1.0
3145 Content-Type: text/plain; charset={charset}
3146 Content-Transfer-Encoding: {cte}
3147
3148 {bodyline}
3149 """)
3150
3151 def test_known_8bit_CTE(self):
3152 m = self.bodytest_msg.format(charset='utf-8',
3153 cte='8bit',
3154 bodyline='pöstal').encode('utf-8')
3155 msg = email.message_from_bytes(m)
3156 self.assertEqual(msg.get_payload(), "pöstal\n")
3157 self.assertEqual(msg.get_payload(decode=True),
3158 "pöstal\n".encode('utf-8'))
3159
3160 def test_unknown_8bit_CTE(self):
3161 m = self.bodytest_msg.format(charset='notavalidcharset',
3162 cte='8bit',
3163 bodyline='pöstal').encode('utf-8')
3164 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003165 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003166 self.assertEqual(msg.get_payload(decode=True),
3167 "pöstal\n".encode('utf-8'))
3168
3169 def test_8bit_in_quopri_body(self):
3170 # This is non-RFC compliant data...without 'decode' the library code
3171 # decodes the body using the charset from the headers, and because the
3172 # source byte really is utf-8 this works. This is likely to fail
3173 # against real dirty data (ie: produce mojibake), but the data is
3174 # invalid anyway so it is as good a guess as any. But this means that
3175 # this test just confirms the current behavior; that behavior is not
3176 # necessarily the best possible behavior. With 'decode' it is
3177 # returning the raw bytes, so that test should be of correct behavior,
3178 # or at least produce the same result that email4 did.
3179 m = self.bodytest_msg.format(charset='utf-8',
3180 cte='quoted-printable',
3181 bodyline='p=C3=B6stál').encode('utf-8')
3182 msg = email.message_from_bytes(m)
3183 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3184 self.assertEqual(msg.get_payload(decode=True),
3185 'pöstál\n'.encode('utf-8'))
3186
3187 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3188 # This is similar to the previous test, but proves that if the 8bit
3189 # byte is undecodeable in the specified charset, it gets replaced
3190 # by the unicode 'unknown' character. Again, this may or may not
3191 # be the ideal behavior. Note that if decode=False none of the
3192 # decoders will get involved, so this is the only test we need
3193 # for this behavior.
3194 m = self.bodytest_msg.format(charset='ascii',
3195 cte='quoted-printable',
3196 bodyline='p=C3=B6stál').encode('utf-8')
3197 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003198 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003199 self.assertEqual(msg.get_payload(decode=True),
3200 'pöstál\n'.encode('utf-8'))
3201
3202 def test_8bit_in_base64_body(self):
3203 # Sticking an 8bit byte in a base64 block makes it undecodable by
3204 # normal means, so the block is returned undecoded, but as bytes.
3205 m = self.bodytest_msg.format(charset='utf-8',
3206 cte='base64',
3207 bodyline='cMO2c3RhbAá=').encode('utf-8')
3208 msg = email.message_from_bytes(m)
3209 self.assertEqual(msg.get_payload(decode=True),
3210 'cMO2c3RhbAá=\n'.encode('utf-8'))
3211
3212 def test_8bit_in_uuencode_body(self):
3213 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3214 # normal means, so the block is returned undecoded, but as bytes.
3215 m = self.bodytest_msg.format(charset='utf-8',
3216 cte='uuencode',
3217 bodyline='<,.V<W1A; á ').encode('utf-8')
3218 msg = email.message_from_bytes(m)
3219 self.assertEqual(msg.get_payload(decode=True),
3220 '<,.V<W1A; á \n'.encode('utf-8'))
3221
3222
R. David Murray92532142011-01-07 23:25:30 +00003223 headertest_headers = (
3224 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3225 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3226 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3227 '\tJean de Baddie',
3228 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3229 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3230 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3231 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3232 )
3233 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3234 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003235
3236 def test_get_8bit_header(self):
3237 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003238 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3239 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003240
3241 def test_print_8bit_headers(self):
3242 msg = email.message_from_bytes(self.headertest_msg)
3243 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003244 textwrap.dedent("""\
3245 From: {}
3246 To: {}
3247 Subject: {}
3248 From: {}
3249
3250 Yes, they are flying.
3251 """).format(*[expected[1] for (_, expected) in
3252 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003253
3254 def test_values_with_8bit_headers(self):
3255 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003256 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003257 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003258 'b\uFFFD\uFFFDz',
3259 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3260 'coll\uFFFD\uFFFDgue, le pouf '
3261 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003262 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003263 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003264
3265 def test_items_with_8bit_headers(self):
3266 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003267 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003268 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003269 ('To', 'b\uFFFD\uFFFDz'),
3270 ('Subject', 'Maintenant je vous '
3271 'pr\uFFFD\uFFFDsente '
3272 'mon coll\uFFFD\uFFFDgue, le pouf '
3273 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3274 '\tJean de Baddie'),
3275 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003276
3277 def test_get_all_with_8bit_headers(self):
3278 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003279 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003280 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003281 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003282
R David Murraya2150232011-03-16 21:11:23 -04003283 def test_get_content_type_with_8bit(self):
3284 msg = email.message_from_bytes(textwrap.dedent("""\
3285 Content-Type: text/pl\xA7in; charset=utf-8
3286 """).encode('latin-1'))
3287 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3288 self.assertEqual(msg.get_content_maintype(), "text")
3289 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3290
3291 def test_get_params_with_8bit(self):
3292 msg = email.message_from_bytes(
3293 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3294 self.assertEqual(msg.get_params(header='x-header'),
3295 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3296 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3297 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3298 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3299
3300 def test_get_rfc2231_params_with_8bit(self):
3301 msg = email.message_from_bytes(textwrap.dedent("""\
3302 Content-Type: text/plain; charset=us-ascii;
3303 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3304 ).encode('latin-1'))
3305 self.assertEqual(msg.get_param('title'),
3306 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3307
3308 def test_set_rfc2231_params_with_8bit(self):
3309 msg = email.message_from_bytes(textwrap.dedent("""\
3310 Content-Type: text/plain; charset=us-ascii;
3311 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3312 ).encode('latin-1'))
3313 msg.set_param('title', 'test')
3314 self.assertEqual(msg.get_param('title'), 'test')
3315
3316 def test_del_rfc2231_params_with_8bit(self):
3317 msg = email.message_from_bytes(textwrap.dedent("""\
3318 Content-Type: text/plain; charset=us-ascii;
3319 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3320 ).encode('latin-1'))
3321 msg.del_param('title')
3322 self.assertEqual(msg.get_param('title'), None)
3323 self.assertEqual(msg.get_content_maintype(), 'text')
3324
3325 def test_get_payload_with_8bit_cte_header(self):
3326 msg = email.message_from_bytes(textwrap.dedent("""\
3327 Content-Transfer-Encoding: b\xa7se64
3328 Content-Type: text/plain; charset=latin-1
3329
3330 payload
3331 """).encode('latin-1'))
3332 self.assertEqual(msg.get_payload(), 'payload\n')
3333 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3334
R. David Murray96fd54e2010-10-08 15:55:28 +00003335 non_latin_bin_msg = textwrap.dedent("""\
3336 From: foo@bar.com
3337 To: báz
3338 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3339 \tJean de Baddie
3340 Mime-Version: 1.0
3341 Content-Type: text/plain; charset="utf-8"
3342 Content-Transfer-Encoding: 8bit
3343
3344 Да, они летят.
3345 """).encode('utf-8')
3346
3347 def test_bytes_generator(self):
3348 msg = email.message_from_bytes(self.non_latin_bin_msg)
3349 out = BytesIO()
3350 email.generator.BytesGenerator(out).flatten(msg)
3351 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3352
R. David Murray7372a072011-01-26 21:21:32 +00003353 def test_bytes_generator_handles_None_body(self):
3354 #Issue 11019
3355 msg = email.message.Message()
3356 out = BytesIO()
3357 email.generator.BytesGenerator(out).flatten(msg)
3358 self.assertEqual(out.getvalue(), b"\n")
3359
R. David Murray92532142011-01-07 23:25:30 +00003360 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003361 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003362 To: =?unknown-8bit?q?b=C3=A1z?=
3363 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3364 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3365 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003366 Mime-Version: 1.0
3367 Content-Type: text/plain; charset="utf-8"
3368 Content-Transfer-Encoding: base64
3369
3370 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3371 """)
3372
3373 def test_generator_handles_8bit(self):
3374 msg = email.message_from_bytes(self.non_latin_bin_msg)
3375 out = StringIO()
3376 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003377 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003378
3379 def test_bytes_generator_with_unix_from(self):
3380 # The unixfrom contains a current date, so we can't check it
3381 # literally. Just make sure the first word is 'From' and the
3382 # rest of the message matches the input.
3383 msg = email.message_from_bytes(self.non_latin_bin_msg)
3384 out = BytesIO()
3385 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3386 lines = out.getvalue().split(b'\n')
3387 self.assertEqual(lines[0].split()[0], b'From')
3388 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3389
R. David Murray92532142011-01-07 23:25:30 +00003390 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3391 non_latin_bin_msg_as7bit[2:4] = [
3392 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3393 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3394 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3395
R. David Murray96fd54e2010-10-08 15:55:28 +00003396 def test_message_from_binary_file(self):
3397 fn = 'test.msg'
3398 self.addCleanup(unlink, fn)
3399 with open(fn, 'wb') as testfile:
3400 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003401 with open(fn, 'rb') as testfile:
3402 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003403 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3404
3405 latin_bin_msg = textwrap.dedent("""\
3406 From: foo@bar.com
3407 To: Dinsdale
3408 Subject: Nudge nudge, wink, wink
3409 Mime-Version: 1.0
3410 Content-Type: text/plain; charset="latin-1"
3411 Content-Transfer-Encoding: 8bit
3412
3413 oh là là, know what I mean, know what I mean?
3414 """).encode('latin-1')
3415
3416 latin_bin_msg_as7bit = textwrap.dedent("""\
3417 From: foo@bar.com
3418 To: Dinsdale
3419 Subject: Nudge nudge, wink, wink
3420 Mime-Version: 1.0
3421 Content-Type: text/plain; charset="iso-8859-1"
3422 Content-Transfer-Encoding: quoted-printable
3423
3424 oh l=E0 l=E0, know what I mean, know what I mean?
3425 """)
3426
3427 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3428 m = email.message_from_bytes(self.latin_bin_msg)
3429 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3430
3431 def test_decoded_generator_emits_unicode_body(self):
3432 m = email.message_from_bytes(self.latin_bin_msg)
3433 out = StringIO()
3434 email.generator.DecodedGenerator(out).flatten(m)
3435 #DecodedHeader output contains an extra blank line compared
3436 #to the input message. RDM: not sure if this is a bug or not,
3437 #but it is not specific to the 8bit->7bit conversion.
3438 self.assertEqual(out.getvalue(),
3439 self.latin_bin_msg.decode('latin-1')+'\n')
3440
3441 def test_bytes_feedparser(self):
3442 bfp = email.feedparser.BytesFeedParser()
3443 for i in range(0, len(self.latin_bin_msg), 10):
3444 bfp.feed(self.latin_bin_msg[i:i+10])
3445 m = bfp.close()
3446 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3447
R. David Murray8451c4b2010-10-23 22:19:56 +00003448 def test_crlf_flatten(self):
3449 with openfile('msg_26.txt', 'rb') as fp:
3450 text = fp.read()
3451 msg = email.message_from_bytes(text)
3452 s = BytesIO()
3453 g = email.generator.BytesGenerator(s)
3454 g.flatten(msg, linesep='\r\n')
3455 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003456
3457 def test_8bit_multipart(self):
3458 # Issue 11605
3459 source = textwrap.dedent("""\
3460 Date: Fri, 18 Mar 2011 17:15:43 +0100
3461 To: foo@example.com
3462 From: foodwatch-Newsletter <bar@example.com>
3463 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3464 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3465 MIME-Version: 1.0
3466 Content-Type: multipart/alternative;
3467 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3468
3469 --b1_76a486bee62b0d200f33dc2ca08220ad
3470 Content-Type: text/plain; charset="utf-8"
3471 Content-Transfer-Encoding: 8bit
3472
3473 Guten Tag, ,
3474
3475 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3476 Nachrichten aus Japan.
3477
3478
3479 --b1_76a486bee62b0d200f33dc2ca08220ad
3480 Content-Type: text/html; charset="utf-8"
3481 Content-Transfer-Encoding: 8bit
3482
3483 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3484 "http://www.w3.org/TR/html4/loose.dtd">
3485 <html lang="de">
3486 <head>
3487 <title>foodwatch - Newsletter</title>
3488 </head>
3489 <body>
3490 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3491 die Nachrichten aus Japan.</p>
3492 </body>
3493 </html>
3494 --b1_76a486bee62b0d200f33dc2ca08220ad--
3495
3496 """).encode('utf-8')
3497 msg = email.message_from_bytes(source)
3498 s = BytesIO()
3499 g = email.generator.BytesGenerator(s)
3500 g.flatten(msg)
3501 self.assertEqual(s.getvalue(), source)
3502
R David Murray9fd170e2012-03-14 14:05:03 -04003503 def test_bytes_generator_b_encoding_linesep(self):
3504 # Issue 14062: b encoding was tacking on an extra \n.
3505 m = Message()
3506 # This has enough non-ascii that it should always end up b encoded.
3507 m['Subject'] = Header('žluťoučký kůň')
3508 s = BytesIO()
3509 g = email.generator.BytesGenerator(s)
3510 g.flatten(m, linesep='\r\n')
3511 self.assertEqual(
3512 s.getvalue(),
3513 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3514
3515 def test_generator_b_encoding_linesep(self):
3516 # Since this broke in ByteGenerator, test Generator for completeness.
3517 m = Message()
3518 # This has enough non-ascii that it should always end up b encoded.
3519 m['Subject'] = Header('žluťoučký kůň')
3520 s = StringIO()
3521 g = email.generator.Generator(s)
3522 g.flatten(m, linesep='\r\n')
3523 self.assertEqual(
3524 s.getvalue(),
3525 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3526
R. David Murray8451c4b2010-10-23 22:19:56 +00003527 maxDiff = None
3528
Ezio Melottib3aedd42010-11-20 19:04:17 +00003529
R. David Murray719a4492010-11-21 16:53:48 +00003530class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003531
R. David Murraye5db2632010-11-20 15:10:13 +00003532 maxDiff = None
3533
R. David Murray96fd54e2010-10-08 15:55:28 +00003534 def _msgobj(self, filename):
3535 with openfile(filename, 'rb') as fp:
3536 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003537 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003538 msg = email.message_from_bytes(data)
3539 return msg, data
3540
R. David Murray719a4492010-11-21 16:53:48 +00003541 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003542 b = BytesIO()
3543 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003544 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003545 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003546
R. David Murraye5db2632010-11-20 15:10:13 +00003547 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003548 # Not using self.blinesep here is intentional. This way the output
3549 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003550 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3551
3552
R. David Murray719a4492010-11-21 16:53:48 +00003553class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3554 TestIdempotent):
3555 linesep = '\n'
3556 blinesep = b'\n'
3557 normalize_linesep_regex = re.compile(br'\r\n')
3558
3559
3560class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3561 TestIdempotent):
3562 linesep = '\r\n'
3563 blinesep = b'\r\n'
3564 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3565
Ezio Melottib3aedd42010-11-20 19:04:17 +00003566
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003567class TestBase64(unittest.TestCase):
3568 def test_len(self):
3569 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003570 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003571 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003572 for size in range(15):
3573 if size == 0 : bsize = 0
3574 elif size <= 3 : bsize = 4
3575 elif size <= 6 : bsize = 8
3576 elif size <= 9 : bsize = 12
3577 elif size <= 12: bsize = 16
3578 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003579 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003580
3581 def test_decode(self):
3582 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003583 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003584 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003585
3586 def test_encode(self):
3587 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003588 eq(base64mime.body_encode(b''), b'')
3589 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003590 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003591 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003592 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003593 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003594eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3595eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3596eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3597eHh4eCB4eHh4IA==
3598""")
3599 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003600 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003601 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003602eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3603eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3604eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3605eHh4eCB4eHh4IA==\r
3606""")
3607
3608 def test_header_encode(self):
3609 eq = self.assertEqual
3610 he = base64mime.header_encode
3611 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003612 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3613 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003614 # Test the charset option
3615 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3616 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003617
3618
Ezio Melottib3aedd42010-11-20 19:04:17 +00003619
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003620class TestQuopri(unittest.TestCase):
3621 def setUp(self):
3622 # Set of characters (as byte integers) that don't need to be encoded
3623 # in headers.
3624 self.hlit = list(chain(
3625 range(ord('a'), ord('z') + 1),
3626 range(ord('A'), ord('Z') + 1),
3627 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003628 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003629 # Set of characters (as byte integers) that do need to be encoded in
3630 # headers.
3631 self.hnon = [c for c in range(256) if c not in self.hlit]
3632 assert len(self.hlit) + len(self.hnon) == 256
3633 # Set of characters (as byte integers) that don't need to be encoded
3634 # in bodies.
3635 self.blit = list(range(ord(' '), ord('~') + 1))
3636 self.blit.append(ord('\t'))
3637 self.blit.remove(ord('='))
3638 # Set of characters (as byte integers) that do need to be encoded in
3639 # bodies.
3640 self.bnon = [c for c in range(256) if c not in self.blit]
3641 assert len(self.blit) + len(self.bnon) == 256
3642
Guido van Rossum9604e662007-08-30 03:46:43 +00003643 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003644 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003645 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003646 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003647 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003648 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003649 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003650
Guido van Rossum9604e662007-08-30 03:46:43 +00003651 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003652 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003653 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003654 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003655 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003656 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003657 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003658
3659 def test_header_quopri_len(self):
3660 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003661 eq(quoprimime.header_length(b'hello'), 5)
3662 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003663 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003664 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003665 # =?xxx?q?...?= means 10 extra characters
3666 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003667 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3668 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003670 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003671 # =?xxx?q?...?= means 10 extra characters
3672 10)
3673 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003674 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003675 'expected length 1 for %r' % chr(c))
3676 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003677 # Space is special; it's encoded to _
3678 if c == ord(' '):
3679 continue
3680 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003681 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003682 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003683
3684 def test_body_quopri_len(self):
3685 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003686 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003687 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003688 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003689 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003690
3691 def test_quote_unquote_idempotent(self):
3692 for x in range(256):
3693 c = chr(x)
3694 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3695
R David Murrayec1b5b82011-03-23 14:19:05 -04003696 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3697 if charset is None:
3698 encoded_header = quoprimime.header_encode(header)
3699 else:
3700 encoded_header = quoprimime.header_encode(header, charset)
3701 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003702
R David Murraycafd79d2011-03-23 15:25:55 -04003703 def test_header_encode_null(self):
3704 self._test_header_encode(b'', '')
3705
R David Murrayec1b5b82011-03-23 14:19:05 -04003706 def test_header_encode_one_word(self):
3707 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3708
3709 def test_header_encode_two_lines(self):
3710 self._test_header_encode(b'hello\nworld',
3711 '=?iso-8859-1?q?hello=0Aworld?=')
3712
3713 def test_header_encode_non_ascii(self):
3714 self._test_header_encode(b'hello\xc7there',
3715 '=?iso-8859-1?q?hello=C7there?=')
3716
3717 def test_header_encode_alt_charset(self):
3718 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3719 charset='iso-8859-2')
3720
3721 def _test_header_decode(self, encoded_header, expected_decoded_header):
3722 decoded_header = quoprimime.header_decode(encoded_header)
3723 self.assertEqual(decoded_header, expected_decoded_header)
3724
3725 def test_header_decode_null(self):
3726 self._test_header_decode('', '')
3727
3728 def test_header_decode_one_word(self):
3729 self._test_header_decode('hello', 'hello')
3730
3731 def test_header_decode_two_lines(self):
3732 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3733
3734 def test_header_decode_non_ascii(self):
3735 self._test_header_decode('hello=C7there', 'hello\xc7there')
3736
3737 def _test_decode(self, encoded, expected_decoded, eol=None):
3738 if eol is None:
3739 decoded = quoprimime.decode(encoded)
3740 else:
3741 decoded = quoprimime.decode(encoded, eol=eol)
3742 self.assertEqual(decoded, expected_decoded)
3743
3744 def test_decode_null_word(self):
3745 self._test_decode('', '')
3746
3747 def test_decode_null_line_null_word(self):
3748 self._test_decode('\r\n', '\n')
3749
3750 def test_decode_one_word(self):
3751 self._test_decode('hello', 'hello')
3752
3753 def test_decode_one_word_eol(self):
3754 self._test_decode('hello', 'hello', eol='X')
3755
3756 def test_decode_one_line(self):
3757 self._test_decode('hello\r\n', 'hello\n')
3758
3759 def test_decode_one_line_lf(self):
3760 self._test_decode('hello\n', 'hello\n')
3761
R David Murraycafd79d2011-03-23 15:25:55 -04003762 def test_decode_one_line_cr(self):
3763 self._test_decode('hello\r', 'hello\n')
3764
3765 def test_decode_one_line_nl(self):
3766 self._test_decode('hello\n', 'helloX', eol='X')
3767
3768 def test_decode_one_line_crnl(self):
3769 self._test_decode('hello\r\n', 'helloX', eol='X')
3770
R David Murrayec1b5b82011-03-23 14:19:05 -04003771 def test_decode_one_line_one_word(self):
3772 self._test_decode('hello\r\nworld', 'hello\nworld')
3773
3774 def test_decode_one_line_one_word_eol(self):
3775 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3776
3777 def test_decode_two_lines(self):
3778 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3779
R David Murraycafd79d2011-03-23 15:25:55 -04003780 def test_decode_two_lines_eol(self):
3781 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3782
R David Murrayec1b5b82011-03-23 14:19:05 -04003783 def test_decode_one_long_line(self):
3784 self._test_decode('Spam' * 250, 'Spam' * 250)
3785
3786 def test_decode_one_space(self):
3787 self._test_decode(' ', '')
3788
3789 def test_decode_multiple_spaces(self):
3790 self._test_decode(' ' * 5, '')
3791
3792 def test_decode_one_line_trailing_spaces(self):
3793 self._test_decode('hello \r\n', 'hello\n')
3794
3795 def test_decode_two_lines_trailing_spaces(self):
3796 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3797
3798 def test_decode_quoted_word(self):
3799 self._test_decode('=22quoted=20words=22', '"quoted words"')
3800
3801 def test_decode_uppercase_quoting(self):
3802 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3803
3804 def test_decode_lowercase_quoting(self):
3805 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3806
3807 def test_decode_soft_line_break(self):
3808 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3809
3810 def test_decode_false_quoting(self):
3811 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3812
3813 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3814 kwargs = {}
3815 if maxlinelen is None:
3816 # Use body_encode's default.
3817 maxlinelen = 76
3818 else:
3819 kwargs['maxlinelen'] = maxlinelen
3820 if eol is None:
3821 # Use body_encode's default.
3822 eol = '\n'
3823 else:
3824 kwargs['eol'] = eol
3825 encoded_body = quoprimime.body_encode(body, **kwargs)
3826 self.assertEqual(encoded_body, expected_encoded_body)
3827 if eol == '\n' or eol == '\r\n':
3828 # We know how to split the result back into lines, so maxlinelen
3829 # can be checked.
3830 for line in encoded_body.splitlines():
3831 self.assertLessEqual(len(line), maxlinelen)
3832
3833 def test_encode_null(self):
3834 self._test_encode('', '')
3835
3836 def test_encode_null_lines(self):
3837 self._test_encode('\n\n', '\n\n')
3838
3839 def test_encode_one_line(self):
3840 self._test_encode('hello\n', 'hello\n')
3841
3842 def test_encode_one_line_crlf(self):
3843 self._test_encode('hello\r\n', 'hello\n')
3844
3845 def test_encode_one_line_eol(self):
3846 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3847
3848 def test_encode_one_space(self):
3849 self._test_encode(' ', '=20')
3850
3851 def test_encode_one_line_one_space(self):
3852 self._test_encode(' \n', '=20\n')
3853
R David Murrayb938c8c2011-03-24 12:19:26 -04003854# XXX: body_encode() expect strings, but uses ord(char) from these strings
3855# to index into a 256-entry list. For code points above 255, this will fail.
3856# Should there be a check for 8-bit only ord() values in body, or at least
3857# a comment about the expected input?
3858
3859 def test_encode_two_lines_one_space(self):
3860 self._test_encode(' \n \n', '=20\n=20\n')
3861
R David Murrayec1b5b82011-03-23 14:19:05 -04003862 def test_encode_one_word_trailing_spaces(self):
3863 self._test_encode('hello ', 'hello =20')
3864
3865 def test_encode_one_line_trailing_spaces(self):
3866 self._test_encode('hello \n', 'hello =20\n')
3867
3868 def test_encode_one_word_trailing_tab(self):
3869 self._test_encode('hello \t', 'hello =09')
3870
3871 def test_encode_one_line_trailing_tab(self):
3872 self._test_encode('hello \t\n', 'hello =09\n')
3873
3874 def test_encode_trailing_space_before_maxlinelen(self):
3875 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3876
R David Murrayb938c8c2011-03-24 12:19:26 -04003877 def test_encode_trailing_space_at_maxlinelen(self):
3878 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3879
R David Murrayec1b5b82011-03-23 14:19:05 -04003880 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003881 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3882
3883 def test_encode_whitespace_lines(self):
3884 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003885
3886 def test_encode_quoted_equals(self):
3887 self._test_encode('a = b', 'a =3D b')
3888
3889 def test_encode_one_long_string(self):
3890 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3891
3892 def test_encode_one_long_line(self):
3893 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3894
3895 def test_encode_one_very_long_line(self):
3896 self._test_encode('x' * 200 + '\n',
3897 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3898
3899 def test_encode_one_long_line(self):
3900 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3901
3902 def test_encode_shortest_maxlinelen(self):
3903 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003904
R David Murrayb938c8c2011-03-24 12:19:26 -04003905 def test_encode_maxlinelen_too_small(self):
3906 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3907
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003908 def test_encode(self):
3909 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003910 eq(quoprimime.body_encode(''), '')
3911 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003912 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003913 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003914 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003915 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003916xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3917 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3918x xxxx xxxx xxxx xxxx=20""")
3919 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003920 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3921 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003922xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3923 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3924x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003925 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003926one line
3927
3928two line"""), """\
3929one line
3930
3931two line""")
3932
3933
Ezio Melottib3aedd42010-11-20 19:04:17 +00003934
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935# Test the Charset class
3936class TestCharset(unittest.TestCase):
3937 def tearDown(self):
3938 from email import charset as CharsetModule
3939 try:
3940 del CharsetModule.CHARSETS['fake']
3941 except KeyError:
3942 pass
3943
Guido van Rossum9604e662007-08-30 03:46:43 +00003944 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003945 eq = self.assertEqual
3946 # Make sure us-ascii = no Unicode conversion
3947 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003948 eq(c.header_encode('Hello World!'), 'Hello World!')
3949 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003950 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003951 self.assertRaises(UnicodeError, c.header_encode, s)
3952 c = Charset('utf-8')
3953 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003954
3955 def test_body_encode(self):
3956 eq = self.assertEqual
3957 # Try a charset with QP body encoding
3958 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003959 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003960 # Try a charset with Base64 body encoding
3961 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003962 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003963 # Try a charset with None body encoding
3964 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003965 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003966 # Try the convert argument, where input codec != output codec
3967 c = Charset('euc-jp')
3968 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003969 # XXX FIXME
3970## try:
3971## eq('\x1b$B5FCO;~IW\x1b(B',
3972## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3973## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3974## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3975## except LookupError:
3976## # We probably don't have the Japanese codecs installed
3977## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003978 # Testing SF bug #625509, which we have to fake, since there are no
3979 # built-in encodings where the header encoding is QP but the body
3980 # encoding is not.
3981 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003982 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003984 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985
3986 def test_unicode_charset_name(self):
3987 charset = Charset('us-ascii')
3988 self.assertEqual(str(charset), 'us-ascii')
3989 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3990
3991
Ezio Melottib3aedd42010-11-20 19:04:17 +00003992
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993# Test multilingual MIME headers.
3994class TestHeader(TestEmailBase):
3995 def test_simple(self):
3996 eq = self.ndiffAssertEqual
3997 h = Header('Hello World!')
3998 eq(h.encode(), 'Hello World!')
3999 h.append(' Goodbye World!')
4000 eq(h.encode(), 'Hello World! Goodbye World!')
4001
4002 def test_simple_surprise(self):
4003 eq = self.ndiffAssertEqual
4004 h = Header('Hello World!')
4005 eq(h.encode(), 'Hello World!')
4006 h.append('Goodbye World!')
4007 eq(h.encode(), 'Hello World! Goodbye World!')
4008
4009 def test_header_needs_no_decoding(self):
4010 h = 'no decoding needed'
4011 self.assertEqual(decode_header(h), [(h, None)])
4012
4013 def test_long(self):
4014 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4015 maxlinelen=76)
4016 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004017 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004018
4019 def test_multilingual(self):
4020 eq = self.ndiffAssertEqual
4021 g = Charset("iso-8859-1")
4022 cz = Charset("iso-8859-2")
4023 utf8 = Charset("utf-8")
4024 g_head = (b'Die Mieter treten hier ein werden mit einem '
4025 b'Foerderband komfortabel den Korridor entlang, '
4026 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4027 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4028 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4029 b'd\xf9vtipu.. ')
4030 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4031 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4032 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4033 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4034 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4035 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4036 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4037 '\u3044\u307e\u3059\u3002')
4038 h = Header(g_head, g)
4039 h.append(cz_head, cz)
4040 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004041 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004042 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004043=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4044 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4045 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4046 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004047 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4048 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4049 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4050 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004051 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4052 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4053 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4054 decoded = decode_header(enc)
4055 eq(len(decoded), 3)
4056 eq(decoded[0], (g_head, 'iso-8859-1'))
4057 eq(decoded[1], (cz_head, 'iso-8859-2'))
4058 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004059 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004060 eq(ustr,
4061 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4062 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4063 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4064 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4065 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4066 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4067 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4068 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4069 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4070 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4071 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4072 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4073 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4074 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4075 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4076 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4077 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078 # Test make_header()
4079 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004080 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004081
4082 def test_empty_header_encode(self):
4083 h = Header()
4084 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004085
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004086 def test_header_ctor_default_args(self):
4087 eq = self.ndiffAssertEqual
4088 h = Header()
4089 eq(h, '')
4090 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004091 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004092
4093 def test_explicit_maxlinelen(self):
4094 eq = self.ndiffAssertEqual
4095 hstr = ('A very long line that must get split to something other '
4096 'than at the 76th character boundary to test the non-default '
4097 'behavior')
4098 h = Header(hstr)
4099 eq(h.encode(), '''\
4100A very long line that must get split to something other than at the 76th
4101 character boundary to test the non-default behavior''')
4102 eq(str(h), hstr)
4103 h = Header(hstr, header_name='Subject')
4104 eq(h.encode(), '''\
4105A very long line that must get split to something other than at the
4106 76th character boundary to test the non-default behavior''')
4107 eq(str(h), hstr)
4108 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4109 eq(h.encode(), hstr)
4110 eq(str(h), hstr)
4111
Guido van Rossum9604e662007-08-30 03:46:43 +00004112 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004113 eq = self.ndiffAssertEqual
4114 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004115 x = 'xxxx ' * 20
4116 h.append(x)
4117 s = h.encode()
4118 eq(s, """\
4119=?iso-8859-1?q?xxx?=
4120 =?iso-8859-1?q?x_?=
4121 =?iso-8859-1?q?xx?=
4122 =?iso-8859-1?q?xx?=
4123 =?iso-8859-1?q?_x?=
4124 =?iso-8859-1?q?xx?=
4125 =?iso-8859-1?q?x_?=
4126 =?iso-8859-1?q?xx?=
4127 =?iso-8859-1?q?xx?=
4128 =?iso-8859-1?q?_x?=
4129 =?iso-8859-1?q?xx?=
4130 =?iso-8859-1?q?x_?=
4131 =?iso-8859-1?q?xx?=
4132 =?iso-8859-1?q?xx?=
4133 =?iso-8859-1?q?_x?=
4134 =?iso-8859-1?q?xx?=
4135 =?iso-8859-1?q?x_?=
4136 =?iso-8859-1?q?xx?=
4137 =?iso-8859-1?q?xx?=
4138 =?iso-8859-1?q?_x?=
4139 =?iso-8859-1?q?xx?=
4140 =?iso-8859-1?q?x_?=
4141 =?iso-8859-1?q?xx?=
4142 =?iso-8859-1?q?xx?=
4143 =?iso-8859-1?q?_x?=
4144 =?iso-8859-1?q?xx?=
4145 =?iso-8859-1?q?x_?=
4146 =?iso-8859-1?q?xx?=
4147 =?iso-8859-1?q?xx?=
4148 =?iso-8859-1?q?_x?=
4149 =?iso-8859-1?q?xx?=
4150 =?iso-8859-1?q?x_?=
4151 =?iso-8859-1?q?xx?=
4152 =?iso-8859-1?q?xx?=
4153 =?iso-8859-1?q?_x?=
4154 =?iso-8859-1?q?xx?=
4155 =?iso-8859-1?q?x_?=
4156 =?iso-8859-1?q?xx?=
4157 =?iso-8859-1?q?xx?=
4158 =?iso-8859-1?q?_x?=
4159 =?iso-8859-1?q?xx?=
4160 =?iso-8859-1?q?x_?=
4161 =?iso-8859-1?q?xx?=
4162 =?iso-8859-1?q?xx?=
4163 =?iso-8859-1?q?_x?=
4164 =?iso-8859-1?q?xx?=
4165 =?iso-8859-1?q?x_?=
4166 =?iso-8859-1?q?xx?=
4167 =?iso-8859-1?q?xx?=
4168 =?iso-8859-1?q?_?=""")
4169 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004170 h = Header(charset='iso-8859-1', maxlinelen=40)
4171 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004172 s = h.encode()
4173 eq(s, """\
4174=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4175 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4176 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4177 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4178 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4179 eq(x, str(make_header(decode_header(s))))
4180
4181 def test_base64_splittable(self):
4182 eq = self.ndiffAssertEqual
4183 h = Header(charset='koi8-r', maxlinelen=20)
4184 x = 'xxxx ' * 20
4185 h.append(x)
4186 s = h.encode()
4187 eq(s, """\
4188=?koi8-r?b?eHh4?=
4189 =?koi8-r?b?eCB4?=
4190 =?koi8-r?b?eHh4?=
4191 =?koi8-r?b?IHh4?=
4192 =?koi8-r?b?eHgg?=
4193 =?koi8-r?b?eHh4?=
4194 =?koi8-r?b?eCB4?=
4195 =?koi8-r?b?eHh4?=
4196 =?koi8-r?b?IHh4?=
4197 =?koi8-r?b?eHgg?=
4198 =?koi8-r?b?eHh4?=
4199 =?koi8-r?b?eCB4?=
4200 =?koi8-r?b?eHh4?=
4201 =?koi8-r?b?IHh4?=
4202 =?koi8-r?b?eHgg?=
4203 =?koi8-r?b?eHh4?=
4204 =?koi8-r?b?eCB4?=
4205 =?koi8-r?b?eHh4?=
4206 =?koi8-r?b?IHh4?=
4207 =?koi8-r?b?eHgg?=
4208 =?koi8-r?b?eHh4?=
4209 =?koi8-r?b?eCB4?=
4210 =?koi8-r?b?eHh4?=
4211 =?koi8-r?b?IHh4?=
4212 =?koi8-r?b?eHgg?=
4213 =?koi8-r?b?eHh4?=
4214 =?koi8-r?b?eCB4?=
4215 =?koi8-r?b?eHh4?=
4216 =?koi8-r?b?IHh4?=
4217 =?koi8-r?b?eHgg?=
4218 =?koi8-r?b?eHh4?=
4219 =?koi8-r?b?eCB4?=
4220 =?koi8-r?b?eHh4?=
4221 =?koi8-r?b?IA==?=""")
4222 eq(x, str(make_header(decode_header(s))))
4223 h = Header(charset='koi8-r', maxlinelen=40)
4224 h.append(x)
4225 s = h.encode()
4226 eq(s, """\
4227=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4228 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4229 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4230 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4231 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4232 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4233 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004234
4235 def test_us_ascii_header(self):
4236 eq = self.assertEqual
4237 s = 'hello'
4238 x = decode_header(s)
4239 eq(x, [('hello', None)])
4240 h = make_header(x)
4241 eq(s, h.encode())
4242
4243 def test_string_charset(self):
4244 eq = self.assertEqual
4245 h = Header()
4246 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004247 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004248
4249## def test_unicode_error(self):
4250## raises = self.assertRaises
4251## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4252## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4253## h = Header()
4254## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4255## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4256## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4257
4258 def test_utf8_shortest(self):
4259 eq = self.assertEqual
4260 h = Header('p\xf6stal', 'utf-8')
4261 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4262 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4263 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4264
4265 def test_bad_8bit_header(self):
4266 raises = self.assertRaises
4267 eq = self.assertEqual
4268 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4269 raises(UnicodeError, Header, x)
4270 h = Header()
4271 raises(UnicodeError, h.append, x)
4272 e = x.decode('utf-8', 'replace')
4273 eq(str(Header(x, errors='replace')), e)
4274 h.append(x, errors='replace')
4275 eq(str(h), e)
4276
R David Murray041015c2011-03-25 15:10:55 -04004277 def test_escaped_8bit_header(self):
4278 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004279 e = x.decode('ascii', 'surrogateescape')
4280 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004281 self.assertEqual(str(h),
4282 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4283 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4284
R David Murraye5e366c2011-06-18 12:57:28 -04004285 def test_header_handles_binary_unknown8bit(self):
4286 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4287 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4288 self.assertEqual(str(h),
4289 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4290 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4291
4292 def test_make_header_handles_binary_unknown8bit(self):
4293 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4294 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4295 h2 = email.header.make_header(email.header.decode_header(h))
4296 self.assertEqual(str(h2),
4297 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4298 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4299
R David Murray041015c2011-03-25 15:10:55 -04004300 def test_modify_returned_list_does_not_change_header(self):
4301 h = Header('test')
4302 chunks = email.header.decode_header(h)
4303 chunks.append(('ascii', 'test2'))
4304 self.assertEqual(str(h), 'test')
4305
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004306 def test_encoded_adjacent_nonencoded(self):
4307 eq = self.assertEqual
4308 h = Header()
4309 h.append('hello', 'iso-8859-1')
4310 h.append('world')
4311 s = h.encode()
4312 eq(s, '=?iso-8859-1?q?hello?= world')
4313 h = make_header(decode_header(s))
4314 eq(h.encode(), s)
4315
4316 def test_whitespace_eater(self):
4317 eq = self.assertEqual
4318 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4319 parts = decode_header(s)
4320 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4321 hdr = make_header(parts)
4322 eq(hdr.encode(),
4323 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4324
4325 def test_broken_base64_header(self):
4326 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004327 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004328 raises(errors.HeaderParseError, decode_header, s)
4329
R. David Murray477efb32011-01-05 01:39:32 +00004330 def test_shift_jis_charset(self):
4331 h = Header('文', charset='shift_jis')
4332 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4333
R David Murrayde912762011-03-16 18:26:23 -04004334 def test_flatten_header_with_no_value(self):
4335 # Issue 11401 (regression from email 4.x) Note that the space after
4336 # the header doesn't reflect the input, but this is also the way
4337 # email 4.x behaved. At some point it would be nice to fix that.
4338 msg = email.message_from_string("EmptyHeader:")
4339 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4340
R David Murray01581ee2011-04-18 10:04:34 -04004341 def test_encode_preserves_leading_ws_on_value(self):
4342 msg = Message()
4343 msg['SomeHeader'] = ' value with leading ws'
4344 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4345
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004346
Ezio Melottib3aedd42010-11-20 19:04:17 +00004347
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004348# Test RFC 2231 header parameters (en/de)coding
4349class TestRFC2231(TestEmailBase):
4350 def test_get_param(self):
4351 eq = self.assertEqual
4352 msg = self._msgobj('msg_29.txt')
4353 eq(msg.get_param('title'),
4354 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4355 eq(msg.get_param('title', unquote=False),
4356 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4357
4358 def test_set_param(self):
4359 eq = self.ndiffAssertEqual
4360 msg = Message()
4361 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4362 charset='us-ascii')
4363 eq(msg.get_param('title'),
4364 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4365 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4366 charset='us-ascii', language='en')
4367 eq(msg.get_param('title'),
4368 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4369 msg = self._msgobj('msg_01.txt')
4370 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4371 charset='us-ascii', language='en')
4372 eq(msg.as_string(maxheaderlen=78), """\
4373Return-Path: <bbb@zzz.org>
4374Delivered-To: bbb@zzz.org
4375Received: by mail.zzz.org (Postfix, from userid 889)
4376\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4377MIME-Version: 1.0
4378Content-Transfer-Encoding: 7bit
4379Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4380From: bbb@ddd.com (John X. Doe)
4381To: bbb@zzz.org
4382Subject: This is a test message
4383Date: Fri, 4 May 2001 14:05:44 -0400
4384Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004385 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004386
4387
4388Hi,
4389
4390Do you like this message?
4391
4392-Me
4393""")
4394
R David Murraya2860e82011-04-16 09:20:30 -04004395 def test_set_param_requote(self):
4396 msg = Message()
4397 msg.set_param('title', 'foo')
4398 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4399 msg.set_param('title', 'bar', requote=False)
4400 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4401 # tspecial is still quoted.
4402 msg.set_param('title', "(bar)bell", requote=False)
4403 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4404
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004405 def test_del_param(self):
4406 eq = self.ndiffAssertEqual
4407 msg = self._msgobj('msg_01.txt')
4408 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4409 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4410 charset='us-ascii', language='en')
4411 msg.del_param('foo', header='Content-Type')
4412 eq(msg.as_string(maxheaderlen=78), """\
4413Return-Path: <bbb@zzz.org>
4414Delivered-To: bbb@zzz.org
4415Received: by mail.zzz.org (Postfix, from userid 889)
4416\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4417MIME-Version: 1.0
4418Content-Transfer-Encoding: 7bit
4419Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4420From: bbb@ddd.com (John X. Doe)
4421To: bbb@zzz.org
4422Subject: This is a test message
4423Date: Fri, 4 May 2001 14:05:44 -0400
4424Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004425 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004426
4427
4428Hi,
4429
4430Do you like this message?
4431
4432-Me
4433""")
4434
4435 def test_rfc2231_get_content_charset(self):
4436 eq = self.assertEqual
4437 msg = self._msgobj('msg_32.txt')
4438 eq(msg.get_content_charset(), 'us-ascii')
4439
R. David Murraydfd7eb02010-12-24 22:36:49 +00004440 def test_rfc2231_parse_rfc_quoting(self):
4441 m = textwrap.dedent('''\
4442 Content-Disposition: inline;
4443 \tfilename*0*=''This%20is%20even%20more%20;
4444 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4445 \tfilename*2="is it not.pdf"
4446
4447 ''')
4448 msg = email.message_from_string(m)
4449 self.assertEqual(msg.get_filename(),
4450 'This is even more ***fun*** is it not.pdf')
4451 self.assertEqual(m, msg.as_string())
4452
4453 def test_rfc2231_parse_extra_quoting(self):
4454 m = textwrap.dedent('''\
4455 Content-Disposition: inline;
4456 \tfilename*0*="''This%20is%20even%20more%20";
4457 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4458 \tfilename*2="is it not.pdf"
4459
4460 ''')
4461 msg = email.message_from_string(m)
4462 self.assertEqual(msg.get_filename(),
4463 'This is even more ***fun*** is it not.pdf')
4464 self.assertEqual(m, msg.as_string())
4465
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004466 def test_rfc2231_no_language_or_charset(self):
4467 m = '''\
4468Content-Transfer-Encoding: 8bit
4469Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4470Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4471
4472'''
4473 msg = email.message_from_string(m)
4474 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004475 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004476 self.assertEqual(
4477 param,
4478 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4479
4480 def test_rfc2231_no_language_or_charset_in_filename(self):
4481 m = '''\
4482Content-Disposition: inline;
4483\tfilename*0*="''This%20is%20even%20more%20";
4484\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4485\tfilename*2="is it not.pdf"
4486
4487'''
4488 msg = email.message_from_string(m)
4489 self.assertEqual(msg.get_filename(),
4490 'This is even more ***fun*** is it not.pdf')
4491
4492 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4493 m = '''\
4494Content-Disposition: inline;
4495\tfilename*0*="''This%20is%20even%20more%20";
4496\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4497\tfilename*2="is it not.pdf"
4498
4499'''
4500 msg = email.message_from_string(m)
4501 self.assertEqual(msg.get_filename(),
4502 'This is even more ***fun*** is it not.pdf')
4503
4504 def test_rfc2231_partly_encoded(self):
4505 m = '''\
4506Content-Disposition: inline;
4507\tfilename*0="''This%20is%20even%20more%20";
4508\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4509\tfilename*2="is it not.pdf"
4510
4511'''
4512 msg = email.message_from_string(m)
4513 self.assertEqual(
4514 msg.get_filename(),
4515 'This%20is%20even%20more%20***fun*** is it not.pdf')
4516
4517 def test_rfc2231_partly_nonencoded(self):
4518 m = '''\
4519Content-Disposition: inline;
4520\tfilename*0="This%20is%20even%20more%20";
4521\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4522\tfilename*2="is it not.pdf"
4523
4524'''
4525 msg = email.message_from_string(m)
4526 self.assertEqual(
4527 msg.get_filename(),
4528 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4529
4530 def test_rfc2231_no_language_or_charset_in_boundary(self):
4531 m = '''\
4532Content-Type: multipart/alternative;
4533\tboundary*0*="''This%20is%20even%20more%20";
4534\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4535\tboundary*2="is it not.pdf"
4536
4537'''
4538 msg = email.message_from_string(m)
4539 self.assertEqual(msg.get_boundary(),
4540 'This is even more ***fun*** is it not.pdf')
4541
4542 def test_rfc2231_no_language_or_charset_in_charset(self):
4543 # This is a nonsensical charset value, but tests the code anyway
4544 m = '''\
4545Content-Type: text/plain;
4546\tcharset*0*="This%20is%20even%20more%20";
4547\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4548\tcharset*2="is it not.pdf"
4549
4550'''
4551 msg = email.message_from_string(m)
4552 self.assertEqual(msg.get_content_charset(),
4553 'this is even more ***fun*** is it not.pdf')
4554
4555 def test_rfc2231_bad_encoding_in_filename(self):
4556 m = '''\
4557Content-Disposition: inline;
4558\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4559\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4560\tfilename*2="is it not.pdf"
4561
4562'''
4563 msg = email.message_from_string(m)
4564 self.assertEqual(msg.get_filename(),
4565 'This is even more ***fun*** is it not.pdf')
4566
4567 def test_rfc2231_bad_encoding_in_charset(self):
4568 m = """\
4569Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4570
4571"""
4572 msg = email.message_from_string(m)
4573 # This should return None because non-ascii characters in the charset
4574 # are not allowed.
4575 self.assertEqual(msg.get_content_charset(), None)
4576
4577 def test_rfc2231_bad_character_in_charset(self):
4578 m = """\
4579Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4580
4581"""
4582 msg = email.message_from_string(m)
4583 # This should return None because non-ascii characters in the charset
4584 # are not allowed.
4585 self.assertEqual(msg.get_content_charset(), None)
4586
4587 def test_rfc2231_bad_character_in_filename(self):
4588 m = '''\
4589Content-Disposition: inline;
4590\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4591\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4592\tfilename*2*="is it not.pdf%E2"
4593
4594'''
4595 msg = email.message_from_string(m)
4596 self.assertEqual(msg.get_filename(),
4597 'This is even more ***fun*** is it not.pdf\ufffd')
4598
4599 def test_rfc2231_unknown_encoding(self):
4600 m = """\
4601Content-Transfer-Encoding: 8bit
4602Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4603
4604"""
4605 msg = email.message_from_string(m)
4606 self.assertEqual(msg.get_filename(), 'myfile.txt')
4607
4608 def test_rfc2231_single_tick_in_filename_extended(self):
4609 eq = self.assertEqual
4610 m = """\
4611Content-Type: application/x-foo;
4612\tname*0*=\"Frank's\"; name*1*=\" Document\"
4613
4614"""
4615 msg = email.message_from_string(m)
4616 charset, language, s = msg.get_param('name')
4617 eq(charset, None)
4618 eq(language, None)
4619 eq(s, "Frank's Document")
4620
4621 def test_rfc2231_single_tick_in_filename(self):
4622 m = """\
4623Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4624
4625"""
4626 msg = email.message_from_string(m)
4627 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004628 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004629 self.assertEqual(param, "Frank's Document")
4630
4631 def test_rfc2231_tick_attack_extended(self):
4632 eq = self.assertEqual
4633 m = """\
4634Content-Type: application/x-foo;
4635\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4636
4637"""
4638 msg = email.message_from_string(m)
4639 charset, language, s = msg.get_param('name')
4640 eq(charset, 'us-ascii')
4641 eq(language, 'en-us')
4642 eq(s, "Frank's Document")
4643
4644 def test_rfc2231_tick_attack(self):
4645 m = """\
4646Content-Type: application/x-foo;
4647\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4648
4649"""
4650 msg = email.message_from_string(m)
4651 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004652 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004653 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4654
4655 def test_rfc2231_no_extended_values(self):
4656 eq = self.assertEqual
4657 m = """\
4658Content-Type: application/x-foo; name=\"Frank's Document\"
4659
4660"""
4661 msg = email.message_from_string(m)
4662 eq(msg.get_param('name'), "Frank's Document")
4663
4664 def test_rfc2231_encoded_then_unencoded_segments(self):
4665 eq = self.assertEqual
4666 m = """\
4667Content-Type: application/x-foo;
4668\tname*0*=\"us-ascii'en-us'My\";
4669\tname*1=\" Document\";
4670\tname*2*=\" For You\"
4671
4672"""
4673 msg = email.message_from_string(m)
4674 charset, language, s = msg.get_param('name')
4675 eq(charset, 'us-ascii')
4676 eq(language, 'en-us')
4677 eq(s, 'My Document For You')
4678
4679 def test_rfc2231_unencoded_then_encoded_segments(self):
4680 eq = self.assertEqual
4681 m = """\
4682Content-Type: application/x-foo;
4683\tname*0=\"us-ascii'en-us'My\";
4684\tname*1*=\" Document\";
4685\tname*2*=\" For You\"
4686
4687"""
4688 msg = email.message_from_string(m)
4689 charset, language, s = msg.get_param('name')
4690 eq(charset, 'us-ascii')
4691 eq(language, 'en-us')
4692 eq(s, 'My Document For You')
4693
4694
Ezio Melottib3aedd42010-11-20 19:04:17 +00004695
R. David Murraya8f480f2010-01-16 18:30:03 +00004696# Tests to ensure that signed parts of an email are completely preserved, as
4697# required by RFC1847 section 2.1. Note that these are incomplete, because the
4698# email package does not currently always preserve the body. See issue 1670765.
4699class TestSigned(TestEmailBase):
4700
4701 def _msg_and_obj(self, filename):
4702 with openfile(findfile(filename)) as fp:
4703 original = fp.read()
4704 msg = email.message_from_string(original)
4705 return original, msg
4706
4707 def _signed_parts_eq(self, original, result):
4708 # Extract the first mime part of each message
4709 import re
4710 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4711 inpart = repart.search(original).group(2)
4712 outpart = repart.search(result).group(2)
4713 self.assertEqual(outpart, inpart)
4714
4715 def test_long_headers_as_string(self):
4716 original, msg = self._msg_and_obj('msg_45.txt')
4717 result = msg.as_string()
4718 self._signed_parts_eq(original, result)
4719
4720 def test_long_headers_as_string_maxheaderlen(self):
4721 original, msg = self._msg_and_obj('msg_45.txt')
4722 result = msg.as_string(maxheaderlen=60)
4723 self._signed_parts_eq(original, result)
4724
4725 def test_long_headers_flatten(self):
4726 original, msg = self._msg_and_obj('msg_45.txt')
4727 fp = StringIO()
4728 Generator(fp).flatten(msg)
4729 result = fp.getvalue()
4730 self._signed_parts_eq(original, result)
4731
4732
Ezio Melottib3aedd42010-11-20 19:04:17 +00004733
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004734def _testclasses():
4735 mod = sys.modules[__name__]
4736 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4737
4738
4739def suite():
4740 suite = unittest.TestSuite()
4741 for testclass in _testclasses():
4742 suite.addTest(unittest.makeSuite(testclass))
4743 return suite
4744
4745
4746def test_main():
4747 for testclass in _testclasses():
4748 run_unittest(testclass)
4749
4750
Ezio Melottib3aedd42010-11-20 19:04:17 +00004751
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004752if __name__ == '__main__':
4753 unittest.main(defaultTest='suite')