blob: e66a410fee7128e0e9f7b41b75e207aacd75af55 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040023from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
R David Murray7441a7a2012-03-14 02:59:51 -0400622 def test_unicode_header_defaults_to_utf8_encoding(self):
623 # Issue 14291
624 m = MIMEText('abc\n')
625 m['Subject'] = 'É test'
626 self.assertEqual(str(m),textwrap.dedent("""\
627 Content-Type: text/plain; charset="us-ascii"
628 MIME-Version: 1.0
629 Content-Transfer-Encoding: 7bit
630 Subject: =?utf-8?q?=C3=89_test?=
631
632 abc
633 """))
634
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635# Test the email.encoders module
636class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400637
638 def test_EncodersEncode_base64(self):
639 with openfile('PyBanner048.gif', 'rb') as fp:
640 bindata = fp.read()
641 mimed = email.mime.image.MIMEImage(bindata)
642 base64ed = mimed.get_payload()
643 # the transfer-encoded body lines should all be <=76 characters
644 lines = base64ed.split('\n')
645 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
646
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000647 def test_encode_empty_payload(self):
648 eq = self.assertEqual
649 msg = Message()
650 msg.set_charset('us-ascii')
651 eq(msg['content-transfer-encoding'], '7bit')
652
653 def test_default_cte(self):
654 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000655 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656 msg = MIMEText('hello world')
657 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000658 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 msg = MIMEText('hello \xf8 world')
660 eq(msg['content-transfer-encoding'], '8bit')
661 # And now with a different charset
662 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
663 eq(msg['content-transfer-encoding'], 'quoted-printable')
664
R. David Murraye85200d2010-05-06 01:41:14 +0000665 def test_encode7or8bit(self):
666 # Make sure a charset whose input character set is 8bit but
667 # whose output character set is 7bit gets a transfer-encoding
668 # of 7bit.
669 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000670 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000671 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
R David Murrayf581b372013-02-05 10:49:49 -0500673 def test_qp_encode_latin1(self):
674 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
675 self.assertEqual(str(msg), textwrap.dedent("""\
676 MIME-Version: 1.0
677 Content-Type: text/text; charset="iso-8859-1"
678 Content-Transfer-Encoding: quoted-printable
679
680 =E1=F6
681 """))
682
683 def test_qp_encode_non_latin1(self):
684 # Issue 16948
685 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
686 self.assertEqual(str(msg), textwrap.dedent("""\
687 MIME-Version: 1.0
688 Content-Type: text/text; charset="iso-8859-2"
689 Content-Transfer-Encoding: quoted-printable
690
691 =BF
692 """))
693
Ezio Melottib3aedd42010-11-20 19:04:17 +0000694
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000695# Test long header wrapping
696class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400697
698 maxDiff = None
699
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000700 def test_split_long_continuation(self):
701 eq = self.ndiffAssertEqual
702 msg = email.message_from_string("""\
703Subject: bug demonstration
704\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
705\tmore text
706
707test
708""")
709 sfp = StringIO()
710 g = Generator(sfp)
711 g.flatten(msg)
712 eq(sfp.getvalue(), """\
713Subject: bug demonstration
714\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
715\tmore text
716
717test
718""")
719
720 def test_another_long_almost_unsplittable_header(self):
721 eq = self.ndiffAssertEqual
722 hstr = """\
723bug demonstration
724\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
725\tmore text"""
726 h = Header(hstr, continuation_ws='\t')
727 eq(h.encode(), """\
728bug demonstration
729\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
730\tmore text""")
731 h = Header(hstr.replace('\t', ' '))
732 eq(h.encode(), """\
733bug demonstration
734 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
735 more text""")
736
737 def test_long_nonstring(self):
738 eq = self.ndiffAssertEqual
739 g = Charset("iso-8859-1")
740 cz = Charset("iso-8859-2")
741 utf8 = Charset("utf-8")
742 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
743 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
744 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
745 b'bef\xf6rdert. ')
746 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
747 b'd\xf9vtipu.. ')
748 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
749 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
750 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
751 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
752 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
753 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
754 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
755 '\u3044\u307e\u3059\u3002')
756 h = Header(g_head, g, header_name='Subject')
757 h.append(cz_head, cz)
758 h.append(utf8_head, utf8)
759 msg = Message()
760 msg['Subject'] = h
761 sfp = StringIO()
762 g = Generator(sfp)
763 g.flatten(msg)
764 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000765Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
766 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
767 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
768 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
769 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
770 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
771 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
772 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
773 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
774 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
775 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000776
777""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000778 eq(h.encode(maxlinelen=76), """\
779=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
780 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
781 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
782 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
783 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
784 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
785 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
786 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
787 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
788 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
789 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000790
791 def test_long_header_encode(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit')
796 eq(h.encode(), '''\
797wasnipoop; giraffes="very-long-necked-animals";
798 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
799
800 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
801 eq = self.ndiffAssertEqual
802 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
803 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
804 header_name='X-Foobar-Spoink-Defrobnit',
805 continuation_ws='\t')
806 eq(h.encode(), '''\
807wasnipoop; giraffes="very-long-necked-animals";
808 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
809
810 def test_long_header_encode_with_tab_continuation(self):
811 eq = self.ndiffAssertEqual
812 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
813 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
814 header_name='X-Foobar-Spoink-Defrobnit',
815 continuation_ws='\t')
816 eq(h.encode(), '''\
817wasnipoop; giraffes="very-long-necked-animals";
818\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
819
R David Murray3a6152f2011-03-14 21:13:03 -0400820 def test_header_encode_with_different_output_charset(self):
821 h = Header('文', 'euc-jp')
822 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
823
824 def test_long_header_encode_with_different_output_charset(self):
825 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
826 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
827 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
828 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
829 res = """\
830=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
831 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
832 self.assertEqual(h.encode(), res)
833
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834 def test_header_splitter(self):
835 eq = self.ndiffAssertEqual
836 msg = MIMEText('')
837 # It'd be great if we could use add_header() here, but that doesn't
838 # guarantee an order of the parameters.
839 msg['X-Foobar-Spoink-Defrobnit'] = (
840 'wasnipoop; giraffes="very-long-necked-animals"; '
841 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
842 sfp = StringIO()
843 g = Generator(sfp)
844 g.flatten(msg)
845 eq(sfp.getvalue(), '''\
846Content-Type: text/plain; charset="us-ascii"
847MIME-Version: 1.0
848Content-Transfer-Encoding: 7bit
849X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
850 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
851
852''')
853
854 def test_no_semis_header_splitter(self):
855 eq = self.ndiffAssertEqual
856 msg = Message()
857 msg['From'] = 'test@dom.ain'
858 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
859 msg.set_payload('Test')
860 sfp = StringIO()
861 g = Generator(sfp)
862 g.flatten(msg)
863 eq(sfp.getvalue(), """\
864From: test@dom.ain
865References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
866 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
867
868Test""")
869
R David Murray7da4db12011-04-07 20:37:17 -0400870 def test_last_split_chunk_does_not_fit(self):
871 eq = self.ndiffAssertEqual
872 h = Header('Subject: the first part of this is short, but_the_second'
873 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
874 '_all_by_itself')
875 eq(h.encode(), """\
876Subject: the first part of this is short,
877 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
878
879 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
880 eq = self.ndiffAssertEqual
881 h = Header(', but_the_second'
882 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
883 '_all_by_itself')
884 eq(h.encode(), """\
885,
886 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
887
888 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
889 eq = self.ndiffAssertEqual
890 h = Header(', , but_the_second'
891 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
892 '_all_by_itself')
893 eq(h.encode(), """\
894, ,
895 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
896
897 def test_trailing_splitable_on_overlong_unsplitable(self):
898 eq = self.ndiffAssertEqual
899 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
900 'be_on_a_line_all_by_itself;')
901 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
902 "be_on_a_line_all_by_itself;")
903
904 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
905 eq = self.ndiffAssertEqual
906 h = Header('; '
907 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400908 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400909 eq(h.encode(), """\
910;
R David Murray01581ee2011-04-18 10:04:34 -0400911 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400912
R David Murraye1292a22011-04-07 20:54:03 -0400913 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400914 eq = self.ndiffAssertEqual
915 h = Header('This is a long line that has two whitespaces in a row. '
916 'This used to cause truncation of the header when folded')
917 eq(h.encode(), """\
918This is a long line that has two whitespaces in a row. This used to cause
919 truncation of the header when folded""")
920
R David Murray01581ee2011-04-18 10:04:34 -0400921 def test_splitter_split_on_punctuation_only_if_fws(self):
922 eq = self.ndiffAssertEqual
923 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
924 'they;arenotlegal;fold,points')
925 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
926 "arenotlegal;fold,points")
927
928 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
929 eq = self.ndiffAssertEqual
930 h = Header('this is a test where we need to have more than one line '
931 'before; our final line that is just too big to fit;; '
932 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
933 'be_on_a_line_all_by_itself;')
934 eq(h.encode(), """\
935this is a test where we need to have more than one line before;
936 our final line that is just too big to fit;;
937 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
938
939 def test_overlong_last_part_followed_by_split_point(self):
940 eq = self.ndiffAssertEqual
941 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
942 'be_on_a_line_all_by_itself ')
943 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
944 "should_be_on_a_line_all_by_itself ")
945
946 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
947 eq = self.ndiffAssertEqual
948 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
949 'before_our_final_line_; ; '
950 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
951 'be_on_a_line_all_by_itself; ')
952 eq(h.encode(), """\
953this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
954 ;
955 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
956
957 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
958 eq = self.ndiffAssertEqual
959 h = Header('this is a test where we need to have more than one line '
960 'before our final line; ; '
961 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
962 'be_on_a_line_all_by_itself; ')
963 eq(h.encode(), """\
964this is a test where we need to have more than one line before our final line;
965 ;
966 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
967
968 def test_long_header_with_whitespace_runs(self):
969 eq = self.ndiffAssertEqual
970 msg = Message()
971 msg['From'] = 'test@dom.ain'
972 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
973 msg.set_payload('Test')
974 sfp = StringIO()
975 g = Generator(sfp)
976 g.flatten(msg)
977 eq(sfp.getvalue(), """\
978From: test@dom.ain
979References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
980 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
981 <foo@dom.ain> <foo@dom.ain>\x20\x20
982
983Test""")
984
985 def test_long_run_with_semi_header_splitter(self):
986 eq = self.ndiffAssertEqual
987 msg = Message()
988 msg['From'] = 'test@dom.ain'
989 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
990 msg.set_payload('Test')
991 sfp = StringIO()
992 g = Generator(sfp)
993 g.flatten(msg)
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
997 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
998 <foo@dom.ain>; abc
999
1000Test""")
1001
1002 def test_splitter_split_on_punctuation_only_if_fws(self):
1003 eq = self.ndiffAssertEqual
1004 msg = Message()
1005 msg['From'] = 'test@dom.ain'
1006 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1007 'they;arenotlegal;fold,points')
1008 msg.set_payload('Test')
1009 sfp = StringIO()
1010 g = Generator(sfp)
1011 g.flatten(msg)
1012 # XXX the space after the header should not be there.
1013 eq(sfp.getvalue(), """\
1014From: test@dom.ain
1015References:\x20
1016 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1017
1018Test""")
1019
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020 def test_no_split_long_header(self):
1021 eq = self.ndiffAssertEqual
1022 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001023 h = Header(hstr)
1024 # These come on two lines because Headers are really field value
1025 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001026 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001027References:
1028 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1029 h = Header('x' * 80)
1030 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001031
1032 def test_splitting_multiple_long_lines(self):
1033 eq = self.ndiffAssertEqual
1034 hstr = """\
1035from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1036\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1037\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1038"""
1039 h = Header(hstr, continuation_ws='\t')
1040 eq(h.encode(), """\
1041from babylon.socal-raves.org (localhost [127.0.0.1]);
1042 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1043 for <mailman-admin@babylon.socal-raves.org>;
1044 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1046 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1047 for <mailman-admin@babylon.socal-raves.org>;
1048 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1049\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1050 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1051 for <mailman-admin@babylon.socal-raves.org>;
1052 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1053
1054 def test_splitting_first_line_only_is_long(self):
1055 eq = self.ndiffAssertEqual
1056 hstr = """\
1057from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1058\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1059\tid 17k4h5-00034i-00
1060\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1061 h = Header(hstr, maxlinelen=78, header_name='Received',
1062 continuation_ws='\t')
1063 eq(h.encode(), """\
1064from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1065 helo=cthulhu.gerg.ca)
1066\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1067\tid 17k4h5-00034i-00
1068\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1069
1070 def test_long_8bit_header(self):
1071 eq = self.ndiffAssertEqual
1072 msg = Message()
1073 h = Header('Britische Regierung gibt', 'iso-8859-1',
1074 header_name='Subject')
1075 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001076 eq(h.encode(maxlinelen=76), """\
1077=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1078 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001080 eq(msg.as_string(maxheaderlen=76), """\
1081Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1082 =?iso-8859-1?q?hore-Windkraftprojekte?=
1083
1084""")
1085 eq(msg.as_string(maxheaderlen=0), """\
1086Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087
1088""")
1089
1090 def test_long_8bit_header_no_charset(self):
1091 eq = self.ndiffAssertEqual
1092 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001093 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1094 'f\xfcr Offshore-Windkraftprojekte '
1095 '<a-very-long-address@example.com>')
1096 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001097 eq(msg.as_string(maxheaderlen=78), """\
1098Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1099 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1100
1101""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001102 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001103 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001104 header_name='Reply-To')
1105 eq(msg.as_string(maxheaderlen=78), """\
1106Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1107 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108
1109""")
1110
1111 def test_long_to_header(self):
1112 eq = self.ndiffAssertEqual
1113 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001114 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 '"Someone Test #B" <someone@umich.edu>, '
1116 '"Someone Test #C" <someone@eecs.umich.edu>, '
1117 '"Someone Test #D" <someone@eecs.umich.edu>')
1118 msg = Message()
1119 msg['To'] = to
1120 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001121To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001122 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001123 "Someone Test #C" <someone@eecs.umich.edu>,
1124 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125
1126''')
1127
1128 def test_long_line_after_append(self):
1129 eq = self.ndiffAssertEqual
1130 s = 'This is an example of string which has almost the limit of header length.'
1131 h = Header(s)
1132 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001133 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134This is an example of string which has almost the limit of header length.
1135 Add another line.""")
1136
1137 def test_shorter_line_with_append(self):
1138 eq = self.ndiffAssertEqual
1139 s = 'This is a shorter line.'
1140 h = Header(s)
1141 h.append('Add another sentence. (Surprise?)')
1142 eq(h.encode(),
1143 'This is a shorter line. Add another sentence. (Surprise?)')
1144
1145 def test_long_field_name(self):
1146 eq = self.ndiffAssertEqual
1147 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001148 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1149 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1150 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1151 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152 h = Header(gs, 'iso-8859-1', header_name=fn)
1153 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001154 eq(h.encode(maxlinelen=76), """\
1155=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1156 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1157 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1158 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159
1160 def test_long_received_header(self):
1161 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1162 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1163 'Wed, 05 Mar 2003 18:10:18 -0700')
1164 msg = Message()
1165 msg['Received-1'] = Header(h, continuation_ws='\t')
1166 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001167 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1170 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001171 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001172Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1173 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_string_headerinst_eq(self):
1179 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1180 'tu-muenchen.de> (David Bremner\'s message of '
1181 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1182 msg = Message()
1183 msg['Received-1'] = Header(h, header_name='Received-1',
1184 continuation_ws='\t')
1185 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001186 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001188Received-1:\x20
1189 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1190 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1191Received-2:\x20
1192 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1193 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194
1195""")
1196
1197 def test_long_unbreakable_lines_with_continuation(self):
1198 eq = self.ndiffAssertEqual
1199 msg = Message()
1200 t = """\
1201iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1202 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1203 msg['Face-1'] = t
1204 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001205 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001206 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001207 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001209Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001210 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001212Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001215Face-3:\x20
1216 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1217 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218
1219""")
1220
1221 def test_another_long_multiline_header(self):
1222 eq = self.ndiffAssertEqual
1223 m = ('Received: from siimage.com '
1224 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001225 'Microsoft SMTPSVC(5.0.2195.4905); '
1226 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227 msg = email.message_from_string(m)
1228 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001229Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1230 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232''')
1233
1234 def test_long_lines_with_different_header(self):
1235 eq = self.ndiffAssertEqual
1236 h = ('List-Unsubscribe: '
1237 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1238 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1239 '?subject=unsubscribe>')
1240 msg = Message()
1241 msg['List'] = h
1242 msg['List'] = Header(h, header_name='List')
1243 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001244List: List-Unsubscribe:
1245 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001246 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001247List: List-Unsubscribe:
1248 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001249 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001250
1251""")
1252
R. David Murray6f0022d2011-01-07 21:57:25 +00001253 def test_long_rfc2047_header_with_embedded_fws(self):
1254 h = Header(textwrap.dedent("""\
1255 We're going to pretend this header is in a non-ascii character set
1256 \tto see if line wrapping with encoded words and embedded
1257 folding white space works"""),
1258 charset='utf-8',
1259 header_name='Test')
1260 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1261 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1262 =?utf-8?q?cter_set?=
1263 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1264 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001267# Test mangling of "From " lines in the body of a message
1268class TestFromMangling(unittest.TestCase):
1269 def setUp(self):
1270 self.msg = Message()
1271 self.msg['From'] = 'aaa@bbb.org'
1272 self.msg.set_payload("""\
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277 def test_mangled_from(self):
1278 s = StringIO()
1279 g = Generator(s, mangle_from_=True)
1280 g.flatten(self.msg)
1281 self.assertEqual(s.getvalue(), """\
1282From: aaa@bbb.org
1283
1284>From the desk of A.A.A.:
1285Blah blah blah
1286""")
1287
1288 def test_dont_mangle_from(self):
1289 s = StringIO()
1290 g = Generator(s, mangle_from_=False)
1291 g.flatten(self.msg)
1292 self.assertEqual(s.getvalue(), """\
1293From: aaa@bbb.org
1294
1295From the desk of A.A.A.:
1296Blah blah blah
1297""")
1298
R David Murray6a31bc62012-07-22 21:47:53 -04001299 def test_mangle_from_in_preamble_and_epilog(self):
1300 s = StringIO()
1301 g = Generator(s, mangle_from_=True)
1302 msg = email.message_from_string(textwrap.dedent("""\
1303 From: foo@bar.com
1304 Mime-Version: 1.0
1305 Content-Type: multipart/mixed; boundary=XXX
1306
1307 From somewhere unknown
1308
1309 --XXX
1310 Content-Type: text/plain
1311
1312 foo
1313
1314 --XXX--
1315
1316 From somewhere unknowable
1317 """))
1318 g.flatten(msg)
1319 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1320 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
R David Murray638d40b2012-08-24 11:14:13 -04001322 def test_mangled_from_with_bad_bytes(self):
1323 source = textwrap.dedent("""\
1324 Content-Type: text/plain; charset="utf-8"
1325 MIME-Version: 1.0
1326 Content-Transfer-Encoding: 8bit
1327 From: aaa@bbb.org
1328
1329 """).encode('utf-8')
1330 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1331 b = BytesIO()
1332 g = BytesGenerator(b, mangle_from_=True)
1333 g.flatten(msg)
1334 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1335
Ezio Melottib3aedd42010-11-20 19:04:17 +00001336
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001337# Test the basic MIMEAudio class
1338class TestMIMEAudio(unittest.TestCase):
1339 def setUp(self):
1340 # Make sure we pick up the audiotest.au that lives in email/test/data.
1341 # In Python, there's an audiotest.au living in Lib/test but that isn't
1342 # included in some binary distros that don't include the test
1343 # package. The trailing empty string on the .join() is significant
1344 # since findfile() will do a dirname().
1345 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1346 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1347 self._audiodata = fp.read()
1348 self._au = MIMEAudio(self._audiodata)
1349
1350 def test_guess_minor_type(self):
1351 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1352
1353 def test_encoding(self):
1354 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001355 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1356 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357
1358 def test_checkSetMinor(self):
1359 au = MIMEAudio(self._audiodata, 'fish')
1360 self.assertEqual(au.get_content_type(), 'audio/fish')
1361
1362 def test_add_header(self):
1363 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001364 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365 self._au.add_header('Content-Disposition', 'attachment',
1366 filename='audiotest.au')
1367 eq(self._au['content-disposition'],
1368 'attachment; filename="audiotest.au"')
1369 eq(self._au.get_params(header='content-disposition'),
1370 [('attachment', ''), ('filename', 'audiotest.au')])
1371 eq(self._au.get_param('filename', header='content-disposition'),
1372 'audiotest.au')
1373 missing = []
1374 eq(self._au.get_param('attachment', header='content-disposition'), '')
1375 unless(self._au.get_param('foo', failobj=missing,
1376 header='content-disposition') is missing)
1377 # Try some missing stuff
1378 unless(self._au.get_param('foobar', missing) is missing)
1379 unless(self._au.get_param('attachment', missing,
1380 header='foobar') is missing)
1381
1382
Ezio Melottib3aedd42010-11-20 19:04:17 +00001383
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384# Test the basic MIMEImage class
1385class TestMIMEImage(unittest.TestCase):
1386 def setUp(self):
1387 with openfile('PyBanner048.gif', 'rb') as fp:
1388 self._imgdata = fp.read()
1389 self._im = MIMEImage(self._imgdata)
1390
1391 def test_guess_minor_type(self):
1392 self.assertEqual(self._im.get_content_type(), 'image/gif')
1393
1394 def test_encoding(self):
1395 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001396 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1397 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001398
1399 def test_checkSetMinor(self):
1400 im = MIMEImage(self._imgdata, 'fish')
1401 self.assertEqual(im.get_content_type(), 'image/fish')
1402
1403 def test_add_header(self):
1404 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001405 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001406 self._im.add_header('Content-Disposition', 'attachment',
1407 filename='dingusfish.gif')
1408 eq(self._im['content-disposition'],
1409 'attachment; filename="dingusfish.gif"')
1410 eq(self._im.get_params(header='content-disposition'),
1411 [('attachment', ''), ('filename', 'dingusfish.gif')])
1412 eq(self._im.get_param('filename', header='content-disposition'),
1413 'dingusfish.gif')
1414 missing = []
1415 eq(self._im.get_param('attachment', header='content-disposition'), '')
1416 unless(self._im.get_param('foo', failobj=missing,
1417 header='content-disposition') is missing)
1418 # Try some missing stuff
1419 unless(self._im.get_param('foobar', missing) is missing)
1420 unless(self._im.get_param('attachment', missing,
1421 header='foobar') is missing)
1422
1423
Ezio Melottib3aedd42010-11-20 19:04:17 +00001424
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001425# Test the basic MIMEApplication class
1426class TestMIMEApplication(unittest.TestCase):
1427 def test_headers(self):
1428 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001429 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001430 eq(msg.get_content_type(), 'application/octet-stream')
1431 eq(msg['content-transfer-encoding'], 'base64')
1432
1433 def test_body(self):
1434 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001435 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1436 msg = MIMEApplication(bytesdata)
1437 # whitespace in the cte encoded block is RFC-irrelevant.
1438 eq(msg.get_payload().strip(), '+vv8/f7/')
1439 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001440
R David Murrayceaa8b12013-02-09 13:02:58 -05001441 def test_body_with_encode_noop(self):
1442 # Issue 16564: This does not produce an RFC valid message, since to be
1443 # valid it should have a CTE of binary. But the below works in
1444 # Python2, and is documented as working this way.
1445 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1446 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1447 # Treated as a string, this will be invalid code points.
1448 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1449 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1450 s = BytesIO()
1451 g = BytesGenerator(s)
1452 g.flatten(msg)
1453 wireform = s.getvalue()
1454 msg2 = email.message_from_bytes(wireform)
1455 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1456 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001457
Ezio Melottib3aedd42010-11-20 19:04:17 +00001458
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001459# Test the basic MIMEText class
1460class TestMIMEText(unittest.TestCase):
1461 def setUp(self):
1462 self._msg = MIMEText('hello there')
1463
1464 def test_types(self):
1465 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001466 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001467 eq(self._msg.get_content_type(), 'text/plain')
1468 eq(self._msg.get_param('charset'), 'us-ascii')
1469 missing = []
1470 unless(self._msg.get_param('foobar', missing) is missing)
1471 unless(self._msg.get_param('charset', missing, header='foobar')
1472 is missing)
1473
1474 def test_payload(self):
1475 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001476 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001477
1478 def test_charset(self):
1479 eq = self.assertEqual
1480 msg = MIMEText('hello there', _charset='us-ascii')
1481 eq(msg.get_charset().input_charset, 'us-ascii')
1482 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1483
R. David Murray850fc852010-06-03 01:58:28 +00001484 def test_7bit_input(self):
1485 eq = self.assertEqual
1486 msg = MIMEText('hello there', _charset='us-ascii')
1487 eq(msg.get_charset().input_charset, 'us-ascii')
1488 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1489
1490 def test_7bit_input_no_charset(self):
1491 eq = self.assertEqual
1492 msg = MIMEText('hello there')
1493 eq(msg.get_charset(), 'us-ascii')
1494 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1495 self.assertTrue('hello there' in msg.as_string())
1496
1497 def test_utf8_input(self):
1498 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1499 eq = self.assertEqual
1500 msg = MIMEText(teststr, _charset='utf-8')
1501 eq(msg.get_charset().output_charset, 'utf-8')
1502 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1503 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1504
1505 @unittest.skip("can't fix because of backward compat in email5, "
1506 "will fix in email6")
1507 def test_utf8_input_no_charset(self):
1508 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1509 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1510
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001511
Ezio Melottib3aedd42010-11-20 19:04:17 +00001512
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001513# Test complicated multipart/* messages
1514class TestMultipart(TestEmailBase):
1515 def setUp(self):
1516 with openfile('PyBanner048.gif', 'rb') as fp:
1517 data = fp.read()
1518 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1519 image = MIMEImage(data, name='dingusfish.gif')
1520 image.add_header('content-disposition', 'attachment',
1521 filename='dingusfish.gif')
1522 intro = MIMEText('''\
1523Hi there,
1524
1525This is the dingus fish.
1526''')
1527 container.attach(intro)
1528 container.attach(image)
1529 container['From'] = 'Barry <barry@digicool.com>'
1530 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1531 container['Subject'] = 'Here is your dingus fish'
1532
1533 now = 987809702.54848599
1534 timetuple = time.localtime(now)
1535 if timetuple[-1] == 0:
1536 tzsecs = time.timezone
1537 else:
1538 tzsecs = time.altzone
1539 if tzsecs > 0:
1540 sign = '-'
1541 else:
1542 sign = '+'
1543 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1544 container['Date'] = time.strftime(
1545 '%a, %d %b %Y %H:%M:%S',
1546 time.localtime(now)) + tzoffset
1547 self._msg = container
1548 self._im = image
1549 self._txt = intro
1550
1551 def test_hierarchy(self):
1552 # convenience
1553 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001554 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001555 raises = self.assertRaises
1556 # tests
1557 m = self._msg
1558 unless(m.is_multipart())
1559 eq(m.get_content_type(), 'multipart/mixed')
1560 eq(len(m.get_payload()), 2)
1561 raises(IndexError, m.get_payload, 2)
1562 m0 = m.get_payload(0)
1563 m1 = m.get_payload(1)
1564 unless(m0 is self._txt)
1565 unless(m1 is self._im)
1566 eq(m.get_payload(), [m0, m1])
1567 unless(not m0.is_multipart())
1568 unless(not m1.is_multipart())
1569
1570 def test_empty_multipart_idempotent(self):
1571 text = """\
1572Content-Type: multipart/mixed; boundary="BOUNDARY"
1573MIME-Version: 1.0
1574Subject: A subject
1575To: aperson@dom.ain
1576From: bperson@dom.ain
1577
1578
1579--BOUNDARY
1580
1581
1582--BOUNDARY--
1583"""
1584 msg = Parser().parsestr(text)
1585 self.ndiffAssertEqual(text, msg.as_string())
1586
1587 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1588 outer = MIMEBase('multipart', 'mixed')
1589 outer['Subject'] = 'A subject'
1590 outer['To'] = 'aperson@dom.ain'
1591 outer['From'] = 'bperson@dom.ain'
1592 outer.set_boundary('BOUNDARY')
1593 self.ndiffAssertEqual(outer.as_string(), '''\
1594Content-Type: multipart/mixed; boundary="BOUNDARY"
1595MIME-Version: 1.0
1596Subject: A subject
1597To: aperson@dom.ain
1598From: bperson@dom.ain
1599
1600--BOUNDARY
1601
1602--BOUNDARY--''')
1603
1604 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1605 outer = MIMEBase('multipart', 'mixed')
1606 outer['Subject'] = 'A subject'
1607 outer['To'] = 'aperson@dom.ain'
1608 outer['From'] = 'bperson@dom.ain'
1609 outer.preamble = ''
1610 outer.epilogue = ''
1611 outer.set_boundary('BOUNDARY')
1612 self.ndiffAssertEqual(outer.as_string(), '''\
1613Content-Type: multipart/mixed; boundary="BOUNDARY"
1614MIME-Version: 1.0
1615Subject: A subject
1616To: aperson@dom.ain
1617From: bperson@dom.ain
1618
1619
1620--BOUNDARY
1621
1622--BOUNDARY--
1623''')
1624
1625 def test_one_part_in_a_multipart(self):
1626 eq = self.ndiffAssertEqual
1627 outer = MIMEBase('multipart', 'mixed')
1628 outer['Subject'] = 'A subject'
1629 outer['To'] = 'aperson@dom.ain'
1630 outer['From'] = 'bperson@dom.ain'
1631 outer.set_boundary('BOUNDARY')
1632 msg = MIMEText('hello world')
1633 outer.attach(msg)
1634 eq(outer.as_string(), '''\
1635Content-Type: multipart/mixed; boundary="BOUNDARY"
1636MIME-Version: 1.0
1637Subject: A subject
1638To: aperson@dom.ain
1639From: bperson@dom.ain
1640
1641--BOUNDARY
1642Content-Type: text/plain; charset="us-ascii"
1643MIME-Version: 1.0
1644Content-Transfer-Encoding: 7bit
1645
1646hello world
1647--BOUNDARY--''')
1648
1649 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1650 eq = self.ndiffAssertEqual
1651 outer = MIMEBase('multipart', 'mixed')
1652 outer['Subject'] = 'A subject'
1653 outer['To'] = 'aperson@dom.ain'
1654 outer['From'] = 'bperson@dom.ain'
1655 outer.preamble = ''
1656 msg = MIMEText('hello world')
1657 outer.attach(msg)
1658 outer.set_boundary('BOUNDARY')
1659 eq(outer.as_string(), '''\
1660Content-Type: multipart/mixed; boundary="BOUNDARY"
1661MIME-Version: 1.0
1662Subject: A subject
1663To: aperson@dom.ain
1664From: bperson@dom.ain
1665
1666
1667--BOUNDARY
1668Content-Type: text/plain; charset="us-ascii"
1669MIME-Version: 1.0
1670Content-Transfer-Encoding: 7bit
1671
1672hello world
1673--BOUNDARY--''')
1674
1675
1676 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1677 eq = self.ndiffAssertEqual
1678 outer = MIMEBase('multipart', 'mixed')
1679 outer['Subject'] = 'A subject'
1680 outer['To'] = 'aperson@dom.ain'
1681 outer['From'] = 'bperson@dom.ain'
1682 outer.preamble = None
1683 msg = MIMEText('hello world')
1684 outer.attach(msg)
1685 outer.set_boundary('BOUNDARY')
1686 eq(outer.as_string(), '''\
1687Content-Type: multipart/mixed; boundary="BOUNDARY"
1688MIME-Version: 1.0
1689Subject: A subject
1690To: aperson@dom.ain
1691From: bperson@dom.ain
1692
1693--BOUNDARY
1694Content-Type: text/plain; charset="us-ascii"
1695MIME-Version: 1.0
1696Content-Transfer-Encoding: 7bit
1697
1698hello world
1699--BOUNDARY--''')
1700
1701
1702 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1703 eq = self.ndiffAssertEqual
1704 outer = MIMEBase('multipart', 'mixed')
1705 outer['Subject'] = 'A subject'
1706 outer['To'] = 'aperson@dom.ain'
1707 outer['From'] = 'bperson@dom.ain'
1708 outer.epilogue = None
1709 msg = MIMEText('hello world')
1710 outer.attach(msg)
1711 outer.set_boundary('BOUNDARY')
1712 eq(outer.as_string(), '''\
1713Content-Type: multipart/mixed; boundary="BOUNDARY"
1714MIME-Version: 1.0
1715Subject: A subject
1716To: aperson@dom.ain
1717From: bperson@dom.ain
1718
1719--BOUNDARY
1720Content-Type: text/plain; charset="us-ascii"
1721MIME-Version: 1.0
1722Content-Transfer-Encoding: 7bit
1723
1724hello world
1725--BOUNDARY--''')
1726
1727
1728 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1729 eq = self.ndiffAssertEqual
1730 outer = MIMEBase('multipart', 'mixed')
1731 outer['Subject'] = 'A subject'
1732 outer['To'] = 'aperson@dom.ain'
1733 outer['From'] = 'bperson@dom.ain'
1734 outer.epilogue = ''
1735 msg = MIMEText('hello world')
1736 outer.attach(msg)
1737 outer.set_boundary('BOUNDARY')
1738 eq(outer.as_string(), '''\
1739Content-Type: multipart/mixed; boundary="BOUNDARY"
1740MIME-Version: 1.0
1741Subject: A subject
1742To: aperson@dom.ain
1743From: bperson@dom.ain
1744
1745--BOUNDARY
1746Content-Type: text/plain; charset="us-ascii"
1747MIME-Version: 1.0
1748Content-Transfer-Encoding: 7bit
1749
1750hello world
1751--BOUNDARY--
1752''')
1753
1754
1755 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1756 eq = self.ndiffAssertEqual
1757 outer = MIMEBase('multipart', 'mixed')
1758 outer['Subject'] = 'A subject'
1759 outer['To'] = 'aperson@dom.ain'
1760 outer['From'] = 'bperson@dom.ain'
1761 outer.epilogue = '\n'
1762 msg = MIMEText('hello world')
1763 outer.attach(msg)
1764 outer.set_boundary('BOUNDARY')
1765 eq(outer.as_string(), '''\
1766Content-Type: multipart/mixed; boundary="BOUNDARY"
1767MIME-Version: 1.0
1768Subject: A subject
1769To: aperson@dom.ain
1770From: bperson@dom.ain
1771
1772--BOUNDARY
1773Content-Type: text/plain; charset="us-ascii"
1774MIME-Version: 1.0
1775Content-Transfer-Encoding: 7bit
1776
1777hello world
1778--BOUNDARY--
1779
1780''')
1781
1782 def test_message_external_body(self):
1783 eq = self.assertEqual
1784 msg = self._msgobj('msg_36.txt')
1785 eq(len(msg.get_payload()), 2)
1786 msg1 = msg.get_payload(1)
1787 eq(msg1.get_content_type(), 'multipart/alternative')
1788 eq(len(msg1.get_payload()), 2)
1789 for subpart in msg1.get_payload():
1790 eq(subpart.get_content_type(), 'message/external-body')
1791 eq(len(subpart.get_payload()), 1)
1792 subsubpart = subpart.get_payload(0)
1793 eq(subsubpart.get_content_type(), 'text/plain')
1794
1795 def test_double_boundary(self):
1796 # msg_37.txt is a multipart that contains two dash-boundary's in a
1797 # row. Our interpretation of RFC 2046 calls for ignoring the second
1798 # and subsequent boundaries.
1799 msg = self._msgobj('msg_37.txt')
1800 self.assertEqual(len(msg.get_payload()), 3)
1801
1802 def test_nested_inner_contains_outer_boundary(self):
1803 eq = self.ndiffAssertEqual
1804 # msg_38.txt has an inner part that contains outer boundaries. My
1805 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1806 # these are illegal and should be interpreted as unterminated inner
1807 # parts.
1808 msg = self._msgobj('msg_38.txt')
1809 sfp = StringIO()
1810 iterators._structure(msg, sfp)
1811 eq(sfp.getvalue(), """\
1812multipart/mixed
1813 multipart/mixed
1814 multipart/alternative
1815 text/plain
1816 text/plain
1817 text/plain
1818 text/plain
1819""")
1820
1821 def test_nested_with_same_boundary(self):
1822 eq = self.ndiffAssertEqual
1823 # msg 39.txt is similarly evil in that it's got inner parts that use
1824 # the same boundary as outer parts. Again, I believe the way this is
1825 # parsed is closest to the spirit of RFC 2046
1826 msg = self._msgobj('msg_39.txt')
1827 sfp = StringIO()
1828 iterators._structure(msg, sfp)
1829 eq(sfp.getvalue(), """\
1830multipart/mixed
1831 multipart/mixed
1832 multipart/alternative
1833 application/octet-stream
1834 application/octet-stream
1835 text/plain
1836""")
1837
1838 def test_boundary_in_non_multipart(self):
1839 msg = self._msgobj('msg_40.txt')
1840 self.assertEqual(msg.as_string(), '''\
1841MIME-Version: 1.0
1842Content-Type: text/html; boundary="--961284236552522269"
1843
1844----961284236552522269
1845Content-Type: text/html;
1846Content-Transfer-Encoding: 7Bit
1847
1848<html></html>
1849
1850----961284236552522269--
1851''')
1852
1853 def test_boundary_with_leading_space(self):
1854 eq = self.assertEqual
1855 msg = email.message_from_string('''\
1856MIME-Version: 1.0
1857Content-Type: multipart/mixed; boundary=" XXXX"
1858
1859-- XXXX
1860Content-Type: text/plain
1861
1862
1863-- XXXX
1864Content-Type: text/plain
1865
1866-- XXXX--
1867''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001868 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001869 eq(msg.get_boundary(), ' XXXX')
1870 eq(len(msg.get_payload()), 2)
1871
1872 def test_boundary_without_trailing_newline(self):
1873 m = Parser().parsestr("""\
1874Content-Type: multipart/mixed; boundary="===============0012394164=="
1875MIME-Version: 1.0
1876
1877--===============0012394164==
1878Content-Type: image/file1.jpg
1879MIME-Version: 1.0
1880Content-Transfer-Encoding: base64
1881
1882YXNkZg==
1883--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001884 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001885
1886
Ezio Melottib3aedd42010-11-20 19:04:17 +00001887
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001888# Test some badly formatted messages
1889class TestNonConformant(TestEmailBase):
1890 def test_parse_missing_minor_type(self):
1891 eq = self.assertEqual
1892 msg = self._msgobj('msg_14.txt')
1893 eq(msg.get_content_type(), 'text/plain')
1894 eq(msg.get_content_maintype(), 'text')
1895 eq(msg.get_content_subtype(), 'plain')
1896
1897 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001898 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001899 msg = self._msgobj('msg_15.txt')
1900 # XXX We can probably eventually do better
1901 inner = msg.get_payload(0)
1902 unless(hasattr(inner, 'defects'))
1903 self.assertEqual(len(inner.defects), 1)
1904 unless(isinstance(inner.defects[0],
1905 errors.StartBoundaryNotFoundDefect))
1906
1907 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001908 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001909 msg = self._msgobj('msg_25.txt')
1910 unless(isinstance(msg.get_payload(), str))
1911 self.assertEqual(len(msg.defects), 2)
1912 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1913 unless(isinstance(msg.defects[1],
1914 errors.MultipartInvariantViolationDefect))
1915
1916 def test_invalid_content_type(self):
1917 eq = self.assertEqual
1918 neq = self.ndiffAssertEqual
1919 msg = Message()
1920 # RFC 2045, $5.2 says invalid yields text/plain
1921 msg['Content-Type'] = 'text'
1922 eq(msg.get_content_maintype(), 'text')
1923 eq(msg.get_content_subtype(), 'plain')
1924 eq(msg.get_content_type(), 'text/plain')
1925 # Clear the old value and try something /really/ invalid
1926 del msg['content-type']
1927 msg['Content-Type'] = 'foo'
1928 eq(msg.get_content_maintype(), 'text')
1929 eq(msg.get_content_subtype(), 'plain')
1930 eq(msg.get_content_type(), 'text/plain')
1931 # Still, make sure that the message is idempotently generated
1932 s = StringIO()
1933 g = Generator(s)
1934 g.flatten(msg)
1935 neq(s.getvalue(), 'Content-Type: foo\n\n')
1936
1937 def test_no_start_boundary(self):
1938 eq = self.ndiffAssertEqual
1939 msg = self._msgobj('msg_31.txt')
1940 eq(msg.get_payload(), """\
1941--BOUNDARY
1942Content-Type: text/plain
1943
1944message 1
1945
1946--BOUNDARY
1947Content-Type: text/plain
1948
1949message 2
1950
1951--BOUNDARY--
1952""")
1953
1954 def test_no_separating_blank_line(self):
1955 eq = self.ndiffAssertEqual
1956 msg = self._msgobj('msg_35.txt')
1957 eq(msg.as_string(), """\
1958From: aperson@dom.ain
1959To: bperson@dom.ain
1960Subject: here's something interesting
1961
1962counter to RFC 2822, there's no separating newline here
1963""")
1964
1965 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001966 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001967 msg = self._msgobj('msg_41.txt')
1968 unless(hasattr(msg, 'defects'))
1969 self.assertEqual(len(msg.defects), 2)
1970 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1971 unless(isinstance(msg.defects[1],
1972 errors.MultipartInvariantViolationDefect))
1973
1974 def test_missing_start_boundary(self):
1975 outer = self._msgobj('msg_42.txt')
1976 # The message structure is:
1977 #
1978 # multipart/mixed
1979 # text/plain
1980 # message/rfc822
1981 # multipart/mixed [*]
1982 #
1983 # [*] This message is missing its start boundary
1984 bad = outer.get_payload(1).get_payload(0)
1985 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001986 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001987 errors.StartBoundaryNotFoundDefect))
1988
1989 def test_first_line_is_continuation_header(self):
1990 eq = self.assertEqual
1991 m = ' Line 1\nLine 2\nLine 3'
1992 msg = email.message_from_string(m)
1993 eq(msg.keys(), [])
1994 eq(msg.get_payload(), 'Line 2\nLine 3')
1995 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001996 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001997 errors.FirstHeaderLineIsContinuationDefect))
1998 eq(msg.defects[0].line, ' Line 1\n')
1999
2000
Ezio Melottib3aedd42010-11-20 19:04:17 +00002001
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002002# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002003class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 def test_rfc2047_multiline(self):
2005 eq = self.assertEqual
2006 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2007 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2008 dh = decode_header(s)
2009 eq(dh, [
2010 (b'Re:', None),
2011 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2012 (b'baz foo bar', None),
2013 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2014 header = make_header(dh)
2015 eq(str(header),
2016 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002017 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002018Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2019 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002020
2021 def test_whitespace_eater_unicode(self):
2022 eq = self.assertEqual
2023 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2024 dh = decode_header(s)
2025 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2026 (b'Pirard <pirard@dom.ain>', None)])
2027 header = str(make_header(dh))
2028 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2029
2030 def test_whitespace_eater_unicode_2(self):
2031 eq = self.assertEqual
2032 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2033 dh = decode_header(s)
2034 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2035 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2036 hu = str(make_header(dh))
2037 eq(hu, 'The quick brown fox jumped over the lazy dog')
2038
2039 def test_rfc2047_missing_whitespace(self):
2040 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2041 dh = decode_header(s)
2042 self.assertEqual(dh, [(s, None)])
2043
2044 def test_rfc2047_with_whitespace(self):
2045 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2046 dh = decode_header(s)
2047 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2048 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2049 (b'sbord', None)])
2050
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002051 def test_rfc2047_B_bad_padding(self):
2052 s = '=?iso-8859-1?B?%s?='
2053 data = [ # only test complete bytes
2054 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2055 ('dmk=', b'vi'), ('dmk', b'vi')
2056 ]
2057 for q, a in data:
2058 dh = decode_header(s % q)
2059 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002060
R. David Murray31e984c2010-10-01 15:40:20 +00002061 def test_rfc2047_Q_invalid_digits(self):
2062 # issue 10004.
2063 s = '=?iso-8659-1?Q?andr=e9=zz?='
2064 self.assertEqual(decode_header(s),
2065 [(b'andr\xe9=zz', 'iso-8659-1')])
2066
Ezio Melottib3aedd42010-11-20 19:04:17 +00002067
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002068# Test the MIMEMessage class
2069class TestMIMEMessage(TestEmailBase):
2070 def setUp(self):
2071 with openfile('msg_11.txt') as fp:
2072 self._text = fp.read()
2073
2074 def test_type_error(self):
2075 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2076
2077 def test_valid_argument(self):
2078 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002079 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002080 subject = 'A sub-message'
2081 m = Message()
2082 m['Subject'] = subject
2083 r = MIMEMessage(m)
2084 eq(r.get_content_type(), 'message/rfc822')
2085 payload = r.get_payload()
2086 unless(isinstance(payload, list))
2087 eq(len(payload), 1)
2088 subpart = payload[0]
2089 unless(subpart is m)
2090 eq(subpart['subject'], subject)
2091
2092 def test_bad_multipart(self):
2093 eq = self.assertEqual
2094 msg1 = Message()
2095 msg1['Subject'] = 'subpart 1'
2096 msg2 = Message()
2097 msg2['Subject'] = 'subpart 2'
2098 r = MIMEMessage(msg1)
2099 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2100
2101 def test_generate(self):
2102 # First craft the message to be encapsulated
2103 m = Message()
2104 m['Subject'] = 'An enclosed message'
2105 m.set_payload('Here is the body of the message.\n')
2106 r = MIMEMessage(m)
2107 r['Subject'] = 'The enclosing message'
2108 s = StringIO()
2109 g = Generator(s)
2110 g.flatten(r)
2111 self.assertEqual(s.getvalue(), """\
2112Content-Type: message/rfc822
2113MIME-Version: 1.0
2114Subject: The enclosing message
2115
2116Subject: An enclosed message
2117
2118Here is the body of the message.
2119""")
2120
2121 def test_parse_message_rfc822(self):
2122 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002123 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124 msg = self._msgobj('msg_11.txt')
2125 eq(msg.get_content_type(), 'message/rfc822')
2126 payload = msg.get_payload()
2127 unless(isinstance(payload, list))
2128 eq(len(payload), 1)
2129 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002130 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002131 eq(submsg['subject'], 'An enclosed message')
2132 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2133
2134 def test_dsn(self):
2135 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002136 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002137 # msg 16 is a Delivery Status Notification, see RFC 1894
2138 msg = self._msgobj('msg_16.txt')
2139 eq(msg.get_content_type(), 'multipart/report')
2140 unless(msg.is_multipart())
2141 eq(len(msg.get_payload()), 3)
2142 # Subpart 1 is a text/plain, human readable section
2143 subpart = msg.get_payload(0)
2144 eq(subpart.get_content_type(), 'text/plain')
2145 eq(subpart.get_payload(), """\
2146This report relates to a message you sent with the following header fields:
2147
2148 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2149 Date: Sun, 23 Sep 2001 20:10:55 -0700
2150 From: "Ian T. Henry" <henryi@oxy.edu>
2151 To: SoCal Raves <scr@socal-raves.org>
2152 Subject: [scr] yeah for Ians!!
2153
2154Your message cannot be delivered to the following recipients:
2155
2156 Recipient address: jangel1@cougar.noc.ucla.edu
2157 Reason: recipient reached disk quota
2158
2159""")
2160 # Subpart 2 contains the machine parsable DSN information. It
2161 # consists of two blocks of headers, represented by two nested Message
2162 # objects.
2163 subpart = msg.get_payload(1)
2164 eq(subpart.get_content_type(), 'message/delivery-status')
2165 eq(len(subpart.get_payload()), 2)
2166 # message/delivery-status should treat each block as a bunch of
2167 # headers, i.e. a bunch of Message objects.
2168 dsn1 = subpart.get_payload(0)
2169 unless(isinstance(dsn1, Message))
2170 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2171 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2172 # Try a missing one <wink>
2173 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2174 dsn2 = subpart.get_payload(1)
2175 unless(isinstance(dsn2, Message))
2176 eq(dsn2['action'], 'failed')
2177 eq(dsn2.get_params(header='original-recipient'),
2178 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2179 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2180 # Subpart 3 is the original message
2181 subpart = msg.get_payload(2)
2182 eq(subpart.get_content_type(), 'message/rfc822')
2183 payload = subpart.get_payload()
2184 unless(isinstance(payload, list))
2185 eq(len(payload), 1)
2186 subsubpart = payload[0]
2187 unless(isinstance(subsubpart, Message))
2188 eq(subsubpart.get_content_type(), 'text/plain')
2189 eq(subsubpart['message-id'],
2190 '<002001c144a6$8752e060$56104586@oxy.edu>')
2191
2192 def test_epilogue(self):
2193 eq = self.ndiffAssertEqual
2194 with openfile('msg_21.txt') as fp:
2195 text = fp.read()
2196 msg = Message()
2197 msg['From'] = 'aperson@dom.ain'
2198 msg['To'] = 'bperson@dom.ain'
2199 msg['Subject'] = 'Test'
2200 msg.preamble = 'MIME message'
2201 msg.epilogue = 'End of MIME message\n'
2202 msg1 = MIMEText('One')
2203 msg2 = MIMEText('Two')
2204 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2205 msg.attach(msg1)
2206 msg.attach(msg2)
2207 sfp = StringIO()
2208 g = Generator(sfp)
2209 g.flatten(msg)
2210 eq(sfp.getvalue(), text)
2211
2212 def test_no_nl_preamble(self):
2213 eq = self.ndiffAssertEqual
2214 msg = Message()
2215 msg['From'] = 'aperson@dom.ain'
2216 msg['To'] = 'bperson@dom.ain'
2217 msg['Subject'] = 'Test'
2218 msg.preamble = 'MIME message'
2219 msg.epilogue = ''
2220 msg1 = MIMEText('One')
2221 msg2 = MIMEText('Two')
2222 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2223 msg.attach(msg1)
2224 msg.attach(msg2)
2225 eq(msg.as_string(), """\
2226From: aperson@dom.ain
2227To: bperson@dom.ain
2228Subject: Test
2229Content-Type: multipart/mixed; boundary="BOUNDARY"
2230
2231MIME message
2232--BOUNDARY
2233Content-Type: text/plain; charset="us-ascii"
2234MIME-Version: 1.0
2235Content-Transfer-Encoding: 7bit
2236
2237One
2238--BOUNDARY
2239Content-Type: text/plain; charset="us-ascii"
2240MIME-Version: 1.0
2241Content-Transfer-Encoding: 7bit
2242
2243Two
2244--BOUNDARY--
2245""")
2246
2247 def test_default_type(self):
2248 eq = self.assertEqual
2249 with openfile('msg_30.txt') as fp:
2250 msg = email.message_from_file(fp)
2251 container1 = msg.get_payload(0)
2252 eq(container1.get_default_type(), 'message/rfc822')
2253 eq(container1.get_content_type(), 'message/rfc822')
2254 container2 = msg.get_payload(1)
2255 eq(container2.get_default_type(), 'message/rfc822')
2256 eq(container2.get_content_type(), 'message/rfc822')
2257 container1a = container1.get_payload(0)
2258 eq(container1a.get_default_type(), 'text/plain')
2259 eq(container1a.get_content_type(), 'text/plain')
2260 container2a = container2.get_payload(0)
2261 eq(container2a.get_default_type(), 'text/plain')
2262 eq(container2a.get_content_type(), 'text/plain')
2263
2264 def test_default_type_with_explicit_container_type(self):
2265 eq = self.assertEqual
2266 with openfile('msg_28.txt') as fp:
2267 msg = email.message_from_file(fp)
2268 container1 = msg.get_payload(0)
2269 eq(container1.get_default_type(), 'message/rfc822')
2270 eq(container1.get_content_type(), 'message/rfc822')
2271 container2 = msg.get_payload(1)
2272 eq(container2.get_default_type(), 'message/rfc822')
2273 eq(container2.get_content_type(), 'message/rfc822')
2274 container1a = container1.get_payload(0)
2275 eq(container1a.get_default_type(), 'text/plain')
2276 eq(container1a.get_content_type(), 'text/plain')
2277 container2a = container2.get_payload(0)
2278 eq(container2a.get_default_type(), 'text/plain')
2279 eq(container2a.get_content_type(), 'text/plain')
2280
2281 def test_default_type_non_parsed(self):
2282 eq = self.assertEqual
2283 neq = self.ndiffAssertEqual
2284 # Set up container
2285 container = MIMEMultipart('digest', 'BOUNDARY')
2286 container.epilogue = ''
2287 # Set up subparts
2288 subpart1a = MIMEText('message 1\n')
2289 subpart2a = MIMEText('message 2\n')
2290 subpart1 = MIMEMessage(subpart1a)
2291 subpart2 = MIMEMessage(subpart2a)
2292 container.attach(subpart1)
2293 container.attach(subpart2)
2294 eq(subpart1.get_content_type(), 'message/rfc822')
2295 eq(subpart1.get_default_type(), 'message/rfc822')
2296 eq(subpart2.get_content_type(), 'message/rfc822')
2297 eq(subpart2.get_default_type(), 'message/rfc822')
2298 neq(container.as_string(0), '''\
2299Content-Type: multipart/digest; boundary="BOUNDARY"
2300MIME-Version: 1.0
2301
2302--BOUNDARY
2303Content-Type: message/rfc822
2304MIME-Version: 1.0
2305
2306Content-Type: text/plain; charset="us-ascii"
2307MIME-Version: 1.0
2308Content-Transfer-Encoding: 7bit
2309
2310message 1
2311
2312--BOUNDARY
2313Content-Type: message/rfc822
2314MIME-Version: 1.0
2315
2316Content-Type: text/plain; charset="us-ascii"
2317MIME-Version: 1.0
2318Content-Transfer-Encoding: 7bit
2319
2320message 2
2321
2322--BOUNDARY--
2323''')
2324 del subpart1['content-type']
2325 del subpart1['mime-version']
2326 del subpart2['content-type']
2327 del subpart2['mime-version']
2328 eq(subpart1.get_content_type(), 'message/rfc822')
2329 eq(subpart1.get_default_type(), 'message/rfc822')
2330 eq(subpart2.get_content_type(), 'message/rfc822')
2331 eq(subpart2.get_default_type(), 'message/rfc822')
2332 neq(container.as_string(0), '''\
2333Content-Type: multipart/digest; boundary="BOUNDARY"
2334MIME-Version: 1.0
2335
2336--BOUNDARY
2337
2338Content-Type: text/plain; charset="us-ascii"
2339MIME-Version: 1.0
2340Content-Transfer-Encoding: 7bit
2341
2342message 1
2343
2344--BOUNDARY
2345
2346Content-Type: text/plain; charset="us-ascii"
2347MIME-Version: 1.0
2348Content-Transfer-Encoding: 7bit
2349
2350message 2
2351
2352--BOUNDARY--
2353''')
2354
2355 def test_mime_attachments_in_constructor(self):
2356 eq = self.assertEqual
2357 text1 = MIMEText('')
2358 text2 = MIMEText('')
2359 msg = MIMEMultipart(_subparts=(text1, text2))
2360 eq(len(msg.get_payload()), 2)
2361 eq(msg.get_payload(0), text1)
2362 eq(msg.get_payload(1), text2)
2363
Christian Heimes587c2bf2008-01-19 16:21:02 +00002364 def test_default_multipart_constructor(self):
2365 msg = MIMEMultipart()
2366 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002367
Ezio Melottib3aedd42010-11-20 19:04:17 +00002368
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002369# A general test of parser->model->generator idempotency. IOW, read a message
2370# in, parse it into a message object tree, then without touching the tree,
2371# regenerate the plain text. The original text and the transformed text
2372# should be identical. Note: that we ignore the Unix-From since that may
2373# contain a changed date.
2374class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002375
2376 linesep = '\n'
2377
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002378 def _msgobj(self, filename):
2379 with openfile(filename) as fp:
2380 data = fp.read()
2381 msg = email.message_from_string(data)
2382 return msg, data
2383
R. David Murray719a4492010-11-21 16:53:48 +00002384 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002385 eq = self.ndiffAssertEqual
2386 s = StringIO()
2387 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002388 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002389 eq(text, s.getvalue())
2390
2391 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002392 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002393 msg, text = self._msgobj('msg_01.txt')
2394 eq(msg.get_content_type(), 'text/plain')
2395 eq(msg.get_content_maintype(), 'text')
2396 eq(msg.get_content_subtype(), 'plain')
2397 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2398 eq(msg.get_param('charset'), 'us-ascii')
2399 eq(msg.preamble, None)
2400 eq(msg.epilogue, None)
2401 self._idempotent(msg, text)
2402
2403 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002404 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002405 msg, text = self._msgobj('msg_03.txt')
2406 eq(msg.get_content_type(), 'text/plain')
2407 eq(msg.get_params(), None)
2408 eq(msg.get_param('charset'), None)
2409 self._idempotent(msg, text)
2410
2411 def test_simple_multipart(self):
2412 msg, text = self._msgobj('msg_04.txt')
2413 self._idempotent(msg, text)
2414
2415 def test_MIME_digest(self):
2416 msg, text = self._msgobj('msg_02.txt')
2417 self._idempotent(msg, text)
2418
2419 def test_long_header(self):
2420 msg, text = self._msgobj('msg_27.txt')
2421 self._idempotent(msg, text)
2422
2423 def test_MIME_digest_with_part_headers(self):
2424 msg, text = self._msgobj('msg_28.txt')
2425 self._idempotent(msg, text)
2426
2427 def test_mixed_with_image(self):
2428 msg, text = self._msgobj('msg_06.txt')
2429 self._idempotent(msg, text)
2430
2431 def test_multipart_report(self):
2432 msg, text = self._msgobj('msg_05.txt')
2433 self._idempotent(msg, text)
2434
2435 def test_dsn(self):
2436 msg, text = self._msgobj('msg_16.txt')
2437 self._idempotent(msg, text)
2438
2439 def test_preamble_epilogue(self):
2440 msg, text = self._msgobj('msg_21.txt')
2441 self._idempotent(msg, text)
2442
2443 def test_multipart_one_part(self):
2444 msg, text = self._msgobj('msg_23.txt')
2445 self._idempotent(msg, text)
2446
2447 def test_multipart_no_parts(self):
2448 msg, text = self._msgobj('msg_24.txt')
2449 self._idempotent(msg, text)
2450
2451 def test_no_start_boundary(self):
2452 msg, text = self._msgobj('msg_31.txt')
2453 self._idempotent(msg, text)
2454
2455 def test_rfc2231_charset(self):
2456 msg, text = self._msgobj('msg_32.txt')
2457 self._idempotent(msg, text)
2458
2459 def test_more_rfc2231_parameters(self):
2460 msg, text = self._msgobj('msg_33.txt')
2461 self._idempotent(msg, text)
2462
2463 def test_text_plain_in_a_multipart_digest(self):
2464 msg, text = self._msgobj('msg_34.txt')
2465 self._idempotent(msg, text)
2466
2467 def test_nested_multipart_mixeds(self):
2468 msg, text = self._msgobj('msg_12a.txt')
2469 self._idempotent(msg, text)
2470
2471 def test_message_external_body_idempotent(self):
2472 msg, text = self._msgobj('msg_36.txt')
2473 self._idempotent(msg, text)
2474
R. David Murray719a4492010-11-21 16:53:48 +00002475 def test_message_delivery_status(self):
2476 msg, text = self._msgobj('msg_43.txt')
2477 self._idempotent(msg, text, unixfrom=True)
2478
R. David Murray96fd54e2010-10-08 15:55:28 +00002479 def test_message_signed_idempotent(self):
2480 msg, text = self._msgobj('msg_45.txt')
2481 self._idempotent(msg, text)
2482
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002483 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002484 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002485 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002486 # Get a message object and reset the seek pointer for other tests
2487 msg, text = self._msgobj('msg_05.txt')
2488 eq(msg.get_content_type(), 'multipart/report')
2489 # Test the Content-Type: parameters
2490 params = {}
2491 for pk, pv in msg.get_params():
2492 params[pk] = pv
2493 eq(params['report-type'], 'delivery-status')
2494 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002495 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2496 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002497 eq(len(msg.get_payload()), 3)
2498 # Make sure the subparts are what we expect
2499 msg1 = msg.get_payload(0)
2500 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002501 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002502 msg2 = msg.get_payload(1)
2503 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002504 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002505 msg3 = msg.get_payload(2)
2506 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002507 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002508 payload = msg3.get_payload()
2509 unless(isinstance(payload, list))
2510 eq(len(payload), 1)
2511 msg4 = payload[0]
2512 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002513 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002514
2515 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002516 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002517 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002518 msg, text = self._msgobj('msg_06.txt')
2519 # Check some of the outer headers
2520 eq(msg.get_content_type(), 'message/rfc822')
2521 # Make sure the payload is a list of exactly one sub-Message, and that
2522 # that submessage has a type of text/plain
2523 payload = msg.get_payload()
2524 unless(isinstance(payload, list))
2525 eq(len(payload), 1)
2526 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002527 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002528 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002529 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002530 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002531
2532
Ezio Melottib3aedd42010-11-20 19:04:17 +00002533
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002534# Test various other bits of the package's functionality
2535class TestMiscellaneous(TestEmailBase):
2536 def test_message_from_string(self):
2537 with openfile('msg_01.txt') as fp:
2538 text = fp.read()
2539 msg = email.message_from_string(text)
2540 s = StringIO()
2541 # Don't wrap/continue long headers since we're trying to test
2542 # idempotency.
2543 g = Generator(s, maxheaderlen=0)
2544 g.flatten(msg)
2545 self.assertEqual(text, s.getvalue())
2546
2547 def test_message_from_file(self):
2548 with openfile('msg_01.txt') as fp:
2549 text = fp.read()
2550 fp.seek(0)
2551 msg = email.message_from_file(fp)
2552 s = StringIO()
2553 # Don't wrap/continue long headers since we're trying to test
2554 # idempotency.
2555 g = Generator(s, maxheaderlen=0)
2556 g.flatten(msg)
2557 self.assertEqual(text, s.getvalue())
2558
2559 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002560 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002561 with openfile('msg_01.txt') as fp:
2562 text = fp.read()
2563
2564 # Create a subclass
2565 class MyMessage(Message):
2566 pass
2567
2568 msg = email.message_from_string(text, MyMessage)
2569 unless(isinstance(msg, MyMessage))
2570 # Try something more complicated
2571 with openfile('msg_02.txt') as fp:
2572 text = fp.read()
2573 msg = email.message_from_string(text, MyMessage)
2574 for subpart in msg.walk():
2575 unless(isinstance(subpart, MyMessage))
2576
2577 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002578 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579 # Create a subclass
2580 class MyMessage(Message):
2581 pass
2582
2583 with openfile('msg_01.txt') as fp:
2584 msg = email.message_from_file(fp, MyMessage)
2585 unless(isinstance(msg, MyMessage))
2586 # Try something more complicated
2587 with openfile('msg_02.txt') as fp:
2588 msg = email.message_from_file(fp, MyMessage)
2589 for subpart in msg.walk():
2590 unless(isinstance(subpart, MyMessage))
2591
2592 def test__all__(self):
2593 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002594 self.assertEqual(sorted(module.__all__), [
2595 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2596 'generator', 'header', 'iterators', 'message',
2597 'message_from_binary_file', 'message_from_bytes',
2598 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002599 'quoprimime', 'utils',
2600 ])
2601
2602 def test_formatdate(self):
2603 now = time.time()
2604 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2605 time.gmtime(now)[:6])
2606
2607 def test_formatdate_localtime(self):
2608 now = time.time()
2609 self.assertEqual(
2610 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2611 time.localtime(now)[:6])
2612
2613 def test_formatdate_usegmt(self):
2614 now = time.time()
2615 self.assertEqual(
2616 utils.formatdate(now, localtime=False),
2617 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2618 self.assertEqual(
2619 utils.formatdate(now, localtime=False, usegmt=True),
2620 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2621
2622 def test_parsedate_none(self):
2623 self.assertEqual(utils.parsedate(''), None)
2624
2625 def test_parsedate_compact(self):
2626 # The FWS after the comma is optional
2627 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2628 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2629
2630 def test_parsedate_no_dayofweek(self):
2631 eq = self.assertEqual
2632 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2633 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2634
2635 def test_parsedate_compact_no_dayofweek(self):
2636 eq = self.assertEqual
2637 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2638 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2639
R. David Murray4a62e892010-12-23 20:35:46 +00002640 def test_parsedate_no_space_before_positive_offset(self):
2641 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2642 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2643
2644 def test_parsedate_no_space_before_negative_offset(self):
2645 # Issue 1155362: we already handled '+' for this case.
2646 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2647 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2648
2649
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002650 def test_parsedate_acceptable_to_time_functions(self):
2651 eq = self.assertEqual
2652 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2653 t = int(time.mktime(timetup))
2654 eq(time.localtime(t)[:6], timetup[:6])
2655 eq(int(time.strftime('%Y', timetup)), 2003)
2656 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2657 t = int(time.mktime(timetup[:9]))
2658 eq(time.localtime(t)[:6], timetup[:6])
2659 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2660
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002661 def test_mktime_tz(self):
2662 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2663 -1, -1, -1, 0)), 0)
2664 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2665 -1, -1, -1, 1234)), -1234)
2666
R. David Murray219d1c82010-08-25 00:45:55 +00002667 def test_parsedate_y2k(self):
2668 """Test for parsing a date with a two-digit year.
2669
2670 Parsing a date with a two-digit year should return the correct
2671 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2672 obsoletes RFC822) requires four-digit years.
2673
2674 """
2675 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2676 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2677 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2678 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2679
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002680 def test_parseaddr_empty(self):
2681 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2682 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2683
2684 def test_noquote_dump(self):
2685 self.assertEqual(
2686 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2687 'A Silly Person <person@dom.ain>')
2688
2689 def test_escape_dump(self):
2690 self.assertEqual(
2691 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2692 r'"A \(Very\) Silly Person" <person@dom.ain>')
2693 a = r'A \(Special\) Person'
2694 b = 'person@dom.ain'
2695 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2696
2697 def test_escape_backslashes(self):
2698 self.assertEqual(
2699 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2700 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2701 a = r'Arthur \Backslash\ Foobar'
2702 b = 'person@dom.ain'
2703 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2704
2705 def test_name_with_dot(self):
2706 x = 'John X. Doe <jxd@example.com>'
2707 y = '"John X. Doe" <jxd@example.com>'
2708 a, b = ('John X. Doe', 'jxd@example.com')
2709 self.assertEqual(utils.parseaddr(x), (a, b))
2710 self.assertEqual(utils.parseaddr(y), (a, b))
2711 # formataddr() quotes the name if there's a dot in it
2712 self.assertEqual(utils.formataddr((a, b)), y)
2713
R. David Murray5397e862010-10-02 15:58:26 +00002714 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2715 # issue 10005. Note that in the third test the second pair of
2716 # backslashes is not actually a quoted pair because it is not inside a
2717 # comment or quoted string: the address being parsed has a quoted
2718 # string containing a quoted backslash, followed by 'example' and two
2719 # backslashes, followed by another quoted string containing a space and
2720 # the word 'example'. parseaddr copies those two backslashes
2721 # literally. Per rfc5322 this is not technically correct since a \ may
2722 # not appear in an address outside of a quoted string. It is probably
2723 # a sensible Postel interpretation, though.
2724 eq = self.assertEqual
2725 eq(utils.parseaddr('""example" example"@example.com'),
2726 ('', '""example" example"@example.com'))
2727 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2728 ('', '"\\"example\\" example"@example.com'))
2729 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2730 ('', '"\\\\"example\\\\" example"@example.com'))
2731
R. David Murray63563cd2010-12-18 18:25:38 +00002732 def test_parseaddr_preserves_spaces_in_local_part(self):
2733 # issue 9286. A normal RFC5322 local part should not contain any
2734 # folding white space, but legacy local parts can (they are a sequence
2735 # of atoms, not dotatoms). On the other hand we strip whitespace from
2736 # before the @ and around dots, on the assumption that the whitespace
2737 # around the punctuation is a mistake in what would otherwise be
2738 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2739 self.assertEqual(('', "merwok wok@xample.com"),
2740 utils.parseaddr("merwok wok@xample.com"))
2741 self.assertEqual(('', "merwok wok@xample.com"),
2742 utils.parseaddr("merwok wok@xample.com"))
2743 self.assertEqual(('', "merwok wok@xample.com"),
2744 utils.parseaddr(" merwok wok @xample.com"))
2745 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2746 utils.parseaddr('merwok"wok" wok@xample.com'))
2747 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2748 utils.parseaddr('merwok. wok . wok@xample.com'))
2749
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002750 def test_multiline_from_comment(self):
2751 x = """\
2752Foo
2753\tBar <foo@example.com>"""
2754 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2755
2756 def test_quote_dump(self):
2757 self.assertEqual(
2758 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2759 r'"A Silly; Person" <person@dom.ain>')
2760
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002761 def test_charset_richcomparisons(self):
2762 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002763 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002764 cset1 = Charset()
2765 cset2 = Charset()
2766 eq(cset1, 'us-ascii')
2767 eq(cset1, 'US-ASCII')
2768 eq(cset1, 'Us-AsCiI')
2769 eq('us-ascii', cset1)
2770 eq('US-ASCII', cset1)
2771 eq('Us-AsCiI', cset1)
2772 ne(cset1, 'usascii')
2773 ne(cset1, 'USASCII')
2774 ne(cset1, 'UsAsCiI')
2775 ne('usascii', cset1)
2776 ne('USASCII', cset1)
2777 ne('UsAsCiI', cset1)
2778 eq(cset1, cset2)
2779 eq(cset2, cset1)
2780
2781 def test_getaddresses(self):
2782 eq = self.assertEqual
2783 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2784 'Bud Person <bperson@dom.ain>']),
2785 [('Al Person', 'aperson@dom.ain'),
2786 ('Bud Person', 'bperson@dom.ain')])
2787
2788 def test_getaddresses_nasty(self):
2789 eq = self.assertEqual
2790 eq(utils.getaddresses(['foo: ;']), [('', '')])
2791 eq(utils.getaddresses(
2792 ['[]*-- =~$']),
2793 [('', ''), ('', ''), ('', '*--')])
2794 eq(utils.getaddresses(
2795 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2796 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2797
2798 def test_getaddresses_embedded_comment(self):
2799 """Test proper handling of a nested comment"""
2800 eq = self.assertEqual
2801 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2802 eq(addrs[0][1], 'foo@bar.com')
2803
2804 def test_utils_quote_unquote(self):
2805 eq = self.assertEqual
2806 msg = Message()
2807 msg.add_header('content-disposition', 'attachment',
2808 filename='foo\\wacky"name')
2809 eq(msg.get_filename(), 'foo\\wacky"name')
2810
2811 def test_get_body_encoding_with_bogus_charset(self):
2812 charset = Charset('not a charset')
2813 self.assertEqual(charset.get_body_encoding(), 'base64')
2814
2815 def test_get_body_encoding_with_uppercase_charset(self):
2816 eq = self.assertEqual
2817 msg = Message()
2818 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2819 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2820 charsets = msg.get_charsets()
2821 eq(len(charsets), 1)
2822 eq(charsets[0], 'utf-8')
2823 charset = Charset(charsets[0])
2824 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002825 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002826 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2827 eq(msg.get_payload(decode=True), b'hello world')
2828 eq(msg['content-transfer-encoding'], 'base64')
2829 # Try another one
2830 msg = Message()
2831 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2832 charsets = msg.get_charsets()
2833 eq(len(charsets), 1)
2834 eq(charsets[0], 'us-ascii')
2835 charset = Charset(charsets[0])
2836 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2837 msg.set_payload('hello world', charset=charset)
2838 eq(msg.get_payload(), 'hello world')
2839 eq(msg['content-transfer-encoding'], '7bit')
2840
2841 def test_charsets_case_insensitive(self):
2842 lc = Charset('us-ascii')
2843 uc = Charset('US-ASCII')
2844 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2845
2846 def test_partial_falls_inside_message_delivery_status(self):
2847 eq = self.ndiffAssertEqual
2848 # The Parser interface provides chunks of data to FeedParser in 8192
2849 # byte gulps. SF bug #1076485 found one of those chunks inside
2850 # message/delivery-status header block, which triggered an
2851 # unreadline() of NeedMoreData.
2852 msg = self._msgobj('msg_43.txt')
2853 sfp = StringIO()
2854 iterators._structure(msg, sfp)
2855 eq(sfp.getvalue(), """\
2856multipart/report
2857 text/plain
2858 message/delivery-status
2859 text/plain
2860 text/plain
2861 text/plain
2862 text/plain
2863 text/plain
2864 text/plain
2865 text/plain
2866 text/plain
2867 text/plain
2868 text/plain
2869 text/plain
2870 text/plain
2871 text/plain
2872 text/plain
2873 text/plain
2874 text/plain
2875 text/plain
2876 text/plain
2877 text/plain
2878 text/plain
2879 text/plain
2880 text/plain
2881 text/plain
2882 text/plain
2883 text/plain
2884 text/plain
2885 text/rfc822-headers
2886""")
2887
R. David Murraya0b44b52010-12-02 21:47:19 +00002888 def test_make_msgid_domain(self):
2889 self.assertEqual(
2890 email.utils.make_msgid(domain='testdomain-string')[-19:],
2891 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002892
Ezio Melottib3aedd42010-11-20 19:04:17 +00002893
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002894# Test the iterator/generators
2895class TestIterators(TestEmailBase):
2896 def test_body_line_iterator(self):
2897 eq = self.assertEqual
2898 neq = self.ndiffAssertEqual
2899 # First a simple non-multipart message
2900 msg = self._msgobj('msg_01.txt')
2901 it = iterators.body_line_iterator(msg)
2902 lines = list(it)
2903 eq(len(lines), 6)
2904 neq(EMPTYSTRING.join(lines), msg.get_payload())
2905 # Now a more complicated multipart
2906 msg = self._msgobj('msg_02.txt')
2907 it = iterators.body_line_iterator(msg)
2908 lines = list(it)
2909 eq(len(lines), 43)
2910 with openfile('msg_19.txt') as fp:
2911 neq(EMPTYSTRING.join(lines), fp.read())
2912
2913 def test_typed_subpart_iterator(self):
2914 eq = self.assertEqual
2915 msg = self._msgobj('msg_04.txt')
2916 it = iterators.typed_subpart_iterator(msg, 'text')
2917 lines = []
2918 subparts = 0
2919 for subpart in it:
2920 subparts += 1
2921 lines.append(subpart.get_payload())
2922 eq(subparts, 2)
2923 eq(EMPTYSTRING.join(lines), """\
2924a simple kind of mirror
2925to reflect upon our own
2926a simple kind of mirror
2927to reflect upon our own
2928""")
2929
2930 def test_typed_subpart_iterator_default_type(self):
2931 eq = self.assertEqual
2932 msg = self._msgobj('msg_03.txt')
2933 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2934 lines = []
2935 subparts = 0
2936 for subpart in it:
2937 subparts += 1
2938 lines.append(subpart.get_payload())
2939 eq(subparts, 1)
2940 eq(EMPTYSTRING.join(lines), """\
2941
2942Hi,
2943
2944Do you like this message?
2945
2946-Me
2947""")
2948
R. David Murray45bf773f2010-07-17 01:19:57 +00002949 def test_pushCR_LF(self):
2950 '''FeedParser BufferedSubFile.push() assumed it received complete
2951 line endings. A CR ending one push() followed by a LF starting
2952 the next push() added an empty line.
2953 '''
2954 imt = [
2955 ("a\r \n", 2),
2956 ("b", 0),
2957 ("c\n", 1),
2958 ("", 0),
2959 ("d\r\n", 1),
2960 ("e\r", 0),
2961 ("\nf", 1),
2962 ("\r\n", 1),
2963 ]
2964 from email.feedparser import BufferedSubFile, NeedMoreData
2965 bsf = BufferedSubFile()
2966 om = []
2967 nt = 0
2968 for il, n in imt:
2969 bsf.push(il)
2970 nt += n
2971 n1 = 0
2972 while True:
2973 ol = bsf.readline()
2974 if ol == NeedMoreData:
2975 break
2976 om.append(ol)
2977 n1 += 1
2978 self.assertTrue(n == n1)
2979 self.assertTrue(len(om) == nt)
2980 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2981
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002982
Ezio Melottib3aedd42010-11-20 19:04:17 +00002983
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002984class TestParsers(TestEmailBase):
2985 def test_header_parser(self):
2986 eq = self.assertEqual
2987 # Parse only the headers of a complex multipart MIME document
2988 with openfile('msg_02.txt') as fp:
2989 msg = HeaderParser().parse(fp)
2990 eq(msg['from'], 'ppp-request@zzz.org')
2991 eq(msg['to'], 'ppp@zzz.org')
2992 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002993 self.assertFalse(msg.is_multipart())
2994 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002995
2996 def test_whitespace_continuation(self):
2997 eq = self.assertEqual
2998 # This message contains a line after the Subject: header that has only
2999 # whitespace, but it is not empty!
3000 msg = email.message_from_string("""\
3001From: aperson@dom.ain
3002To: bperson@dom.ain
3003Subject: the next line has a space on it
3004\x20
3005Date: Mon, 8 Apr 2002 15:09:19 -0400
3006Message-ID: spam
3007
3008Here's the message body
3009""")
3010 eq(msg['subject'], 'the next line has a space on it\n ')
3011 eq(msg['message-id'], 'spam')
3012 eq(msg.get_payload(), "Here's the message body\n")
3013
3014 def test_whitespace_continuation_last_header(self):
3015 eq = self.assertEqual
3016 # Like the previous test, but the subject line is the last
3017 # header.
3018 msg = email.message_from_string("""\
3019From: aperson@dom.ain
3020To: bperson@dom.ain
3021Date: Mon, 8 Apr 2002 15:09:19 -0400
3022Message-ID: spam
3023Subject: the next line has a space on it
3024\x20
3025
3026Here's the message body
3027""")
3028 eq(msg['subject'], 'the next line has a space on it\n ')
3029 eq(msg['message-id'], 'spam')
3030 eq(msg.get_payload(), "Here's the message body\n")
3031
3032 def test_crlf_separation(self):
3033 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003034 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035 msg = Parser().parse(fp)
3036 eq(len(msg.get_payload()), 2)
3037 part1 = msg.get_payload(0)
3038 eq(part1.get_content_type(), 'text/plain')
3039 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3040 part2 = msg.get_payload(1)
3041 eq(part2.get_content_type(), 'application/riscos')
3042
R. David Murray8451c4b2010-10-23 22:19:56 +00003043 def test_crlf_flatten(self):
3044 # Using newline='\n' preserves the crlfs in this input file.
3045 with openfile('msg_26.txt', newline='\n') as fp:
3046 text = fp.read()
3047 msg = email.message_from_string(text)
3048 s = StringIO()
3049 g = Generator(s)
3050 g.flatten(msg, linesep='\r\n')
3051 self.assertEqual(s.getvalue(), text)
3052
3053 maxDiff = None
3054
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003055 def test_multipart_digest_with_extra_mime_headers(self):
3056 eq = self.assertEqual
3057 neq = self.ndiffAssertEqual
3058 with openfile('msg_28.txt') as fp:
3059 msg = email.message_from_file(fp)
3060 # Structure is:
3061 # multipart/digest
3062 # message/rfc822
3063 # text/plain
3064 # message/rfc822
3065 # text/plain
3066 eq(msg.is_multipart(), 1)
3067 eq(len(msg.get_payload()), 2)
3068 part1 = msg.get_payload(0)
3069 eq(part1.get_content_type(), 'message/rfc822')
3070 eq(part1.is_multipart(), 1)
3071 eq(len(part1.get_payload()), 1)
3072 part1a = part1.get_payload(0)
3073 eq(part1a.is_multipart(), 0)
3074 eq(part1a.get_content_type(), 'text/plain')
3075 neq(part1a.get_payload(), 'message 1\n')
3076 # next message/rfc822
3077 part2 = msg.get_payload(1)
3078 eq(part2.get_content_type(), 'message/rfc822')
3079 eq(part2.is_multipart(), 1)
3080 eq(len(part2.get_payload()), 1)
3081 part2a = part2.get_payload(0)
3082 eq(part2a.is_multipart(), 0)
3083 eq(part2a.get_content_type(), 'text/plain')
3084 neq(part2a.get_payload(), 'message 2\n')
3085
3086 def test_three_lines(self):
3087 # A bug report by Andrew McNamara
3088 lines = ['From: Andrew Person <aperson@dom.ain',
3089 'Subject: Test',
3090 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3091 msg = email.message_from_string(NL.join(lines))
3092 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3093
3094 def test_strip_line_feed_and_carriage_return_in_headers(self):
3095 eq = self.assertEqual
3096 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3097 value1 = 'text'
3098 value2 = 'more text'
3099 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3100 value1, value2)
3101 msg = email.message_from_string(m)
3102 eq(msg.get('Header'), value1)
3103 eq(msg.get('Next-Header'), value2)
3104
3105 def test_rfc2822_header_syntax(self):
3106 eq = self.assertEqual
3107 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3108 msg = email.message_from_string(m)
3109 eq(len(msg), 3)
3110 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3111 eq(msg.get_payload(), 'body')
3112
3113 def test_rfc2822_space_not_allowed_in_header(self):
3114 eq = self.assertEqual
3115 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3116 msg = email.message_from_string(m)
3117 eq(len(msg.keys()), 0)
3118
3119 def test_rfc2822_one_character_header(self):
3120 eq = self.assertEqual
3121 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3122 msg = email.message_from_string(m)
3123 headers = msg.keys()
3124 headers.sort()
3125 eq(headers, ['A', 'B', 'CC'])
3126 eq(msg.get_payload(), 'body')
3127
R. David Murray45e0e142010-06-16 02:19:40 +00003128 def test_CRLFLF_at_end_of_part(self):
3129 # issue 5610: feedparser should not eat two chars from body part ending
3130 # with "\r\n\n".
3131 m = (
3132 "From: foo@bar.com\n"
3133 "To: baz\n"
3134 "Mime-Version: 1.0\n"
3135 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3136 "\n"
3137 "--BOUNDARY\n"
3138 "Content-Type: text/plain\n"
3139 "\n"
3140 "body ending with CRLF newline\r\n"
3141 "\n"
3142 "--BOUNDARY--\n"
3143 )
3144 msg = email.message_from_string(m)
3145 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003146
Ezio Melottib3aedd42010-11-20 19:04:17 +00003147
R. David Murray96fd54e2010-10-08 15:55:28 +00003148class Test8BitBytesHandling(unittest.TestCase):
3149 # In Python3 all input is string, but that doesn't work if the actual input
3150 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3151 # decode byte streams using the surrogateescape error handler, and
3152 # reconvert to binary at appropriate places if we detect surrogates. This
3153 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3154 # but it does allow us to parse and preserve them, and to decode body
3155 # parts that use an 8bit CTE.
3156
3157 bodytest_msg = textwrap.dedent("""\
3158 From: foo@bar.com
3159 To: baz
3160 Mime-Version: 1.0
3161 Content-Type: text/plain; charset={charset}
3162 Content-Transfer-Encoding: {cte}
3163
3164 {bodyline}
3165 """)
3166
3167 def test_known_8bit_CTE(self):
3168 m = self.bodytest_msg.format(charset='utf-8',
3169 cte='8bit',
3170 bodyline='pöstal').encode('utf-8')
3171 msg = email.message_from_bytes(m)
3172 self.assertEqual(msg.get_payload(), "pöstal\n")
3173 self.assertEqual(msg.get_payload(decode=True),
3174 "pöstal\n".encode('utf-8'))
3175
3176 def test_unknown_8bit_CTE(self):
3177 m = self.bodytest_msg.format(charset='notavalidcharset',
3178 cte='8bit',
3179 bodyline='pöstal').encode('utf-8')
3180 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003181 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003182 self.assertEqual(msg.get_payload(decode=True),
3183 "pöstal\n".encode('utf-8'))
3184
3185 def test_8bit_in_quopri_body(self):
3186 # This is non-RFC compliant data...without 'decode' the library code
3187 # decodes the body using the charset from the headers, and because the
3188 # source byte really is utf-8 this works. This is likely to fail
3189 # against real dirty data (ie: produce mojibake), but the data is
3190 # invalid anyway so it is as good a guess as any. But this means that
3191 # this test just confirms the current behavior; that behavior is not
3192 # necessarily the best possible behavior. With 'decode' it is
3193 # returning the raw bytes, so that test should be of correct behavior,
3194 # or at least produce the same result that email4 did.
3195 m = self.bodytest_msg.format(charset='utf-8',
3196 cte='quoted-printable',
3197 bodyline='p=C3=B6stál').encode('utf-8')
3198 msg = email.message_from_bytes(m)
3199 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3200 self.assertEqual(msg.get_payload(decode=True),
3201 'pöstál\n'.encode('utf-8'))
3202
3203 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3204 # This is similar to the previous test, but proves that if the 8bit
3205 # byte is undecodeable in the specified charset, it gets replaced
3206 # by the unicode 'unknown' character. Again, this may or may not
3207 # be the ideal behavior. Note that if decode=False none of the
3208 # decoders will get involved, so this is the only test we need
3209 # for this behavior.
3210 m = self.bodytest_msg.format(charset='ascii',
3211 cte='quoted-printable',
3212 bodyline='p=C3=B6stál').encode('utf-8')
3213 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003214 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003215 self.assertEqual(msg.get_payload(decode=True),
3216 'pöstál\n'.encode('utf-8'))
3217
3218 def test_8bit_in_base64_body(self):
3219 # Sticking an 8bit byte in a base64 block makes it undecodable by
3220 # normal means, so the block is returned undecoded, but as bytes.
3221 m = self.bodytest_msg.format(charset='utf-8',
3222 cte='base64',
3223 bodyline='cMO2c3RhbAá=').encode('utf-8')
3224 msg = email.message_from_bytes(m)
3225 self.assertEqual(msg.get_payload(decode=True),
3226 'cMO2c3RhbAá=\n'.encode('utf-8'))
3227
3228 def test_8bit_in_uuencode_body(self):
3229 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3230 # normal means, so the block is returned undecoded, but as bytes.
3231 m = self.bodytest_msg.format(charset='utf-8',
3232 cte='uuencode',
3233 bodyline='<,.V<W1A; á ').encode('utf-8')
3234 msg = email.message_from_bytes(m)
3235 self.assertEqual(msg.get_payload(decode=True),
3236 '<,.V<W1A; á \n'.encode('utf-8'))
3237
3238
R. David Murray92532142011-01-07 23:25:30 +00003239 headertest_headers = (
3240 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3241 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3242 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3243 '\tJean de Baddie',
3244 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3245 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3246 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3247 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3248 )
3249 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3250 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003251
3252 def test_get_8bit_header(self):
3253 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003254 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3255 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003256
3257 def test_print_8bit_headers(self):
3258 msg = email.message_from_bytes(self.headertest_msg)
3259 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003260 textwrap.dedent("""\
3261 From: {}
3262 To: {}
3263 Subject: {}
3264 From: {}
3265
3266 Yes, they are flying.
3267 """).format(*[expected[1] for (_, expected) in
3268 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003269
3270 def test_values_with_8bit_headers(self):
3271 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003272 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003273 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003274 'b\uFFFD\uFFFDz',
3275 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3276 'coll\uFFFD\uFFFDgue, le pouf '
3277 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003278 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003279 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003280
3281 def test_items_with_8bit_headers(self):
3282 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003283 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003284 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003285 ('To', 'b\uFFFD\uFFFDz'),
3286 ('Subject', 'Maintenant je vous '
3287 'pr\uFFFD\uFFFDsente '
3288 'mon coll\uFFFD\uFFFDgue, le pouf '
3289 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3290 '\tJean de Baddie'),
3291 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003292
3293 def test_get_all_with_8bit_headers(self):
3294 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003295 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003296 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003297 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003298
R David Murraya2150232011-03-16 21:11:23 -04003299 def test_get_content_type_with_8bit(self):
3300 msg = email.message_from_bytes(textwrap.dedent("""\
3301 Content-Type: text/pl\xA7in; charset=utf-8
3302 """).encode('latin-1'))
3303 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3304 self.assertEqual(msg.get_content_maintype(), "text")
3305 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3306
3307 def test_get_params_with_8bit(self):
3308 msg = email.message_from_bytes(
3309 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3310 self.assertEqual(msg.get_params(header='x-header'),
3311 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3312 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3313 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3314 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3315
3316 def test_get_rfc2231_params_with_8bit(self):
3317 msg = email.message_from_bytes(textwrap.dedent("""\
3318 Content-Type: text/plain; charset=us-ascii;
3319 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3320 ).encode('latin-1'))
3321 self.assertEqual(msg.get_param('title'),
3322 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3323
3324 def test_set_rfc2231_params_with_8bit(self):
3325 msg = email.message_from_bytes(textwrap.dedent("""\
3326 Content-Type: text/plain; charset=us-ascii;
3327 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3328 ).encode('latin-1'))
3329 msg.set_param('title', 'test')
3330 self.assertEqual(msg.get_param('title'), 'test')
3331
3332 def test_del_rfc2231_params_with_8bit(self):
3333 msg = email.message_from_bytes(textwrap.dedent("""\
3334 Content-Type: text/plain; charset=us-ascii;
3335 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3336 ).encode('latin-1'))
3337 msg.del_param('title')
3338 self.assertEqual(msg.get_param('title'), None)
3339 self.assertEqual(msg.get_content_maintype(), 'text')
3340
3341 def test_get_payload_with_8bit_cte_header(self):
3342 msg = email.message_from_bytes(textwrap.dedent("""\
3343 Content-Transfer-Encoding: b\xa7se64
3344 Content-Type: text/plain; charset=latin-1
3345
3346 payload
3347 """).encode('latin-1'))
3348 self.assertEqual(msg.get_payload(), 'payload\n')
3349 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3350
R. David Murray96fd54e2010-10-08 15:55:28 +00003351 non_latin_bin_msg = textwrap.dedent("""\
3352 From: foo@bar.com
3353 To: báz
3354 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3355 \tJean de Baddie
3356 Mime-Version: 1.0
3357 Content-Type: text/plain; charset="utf-8"
3358 Content-Transfer-Encoding: 8bit
3359
3360 Да, они летят.
3361 """).encode('utf-8')
3362
3363 def test_bytes_generator(self):
3364 msg = email.message_from_bytes(self.non_latin_bin_msg)
3365 out = BytesIO()
3366 email.generator.BytesGenerator(out).flatten(msg)
3367 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3368
R. David Murray7372a072011-01-26 21:21:32 +00003369 def test_bytes_generator_handles_None_body(self):
3370 #Issue 11019
3371 msg = email.message.Message()
3372 out = BytesIO()
3373 email.generator.BytesGenerator(out).flatten(msg)
3374 self.assertEqual(out.getvalue(), b"\n")
3375
R. David Murray92532142011-01-07 23:25:30 +00003376 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003377 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003378 To: =?unknown-8bit?q?b=C3=A1z?=
3379 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3380 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3381 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003382 Mime-Version: 1.0
3383 Content-Type: text/plain; charset="utf-8"
3384 Content-Transfer-Encoding: base64
3385
3386 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3387 """)
3388
3389 def test_generator_handles_8bit(self):
3390 msg = email.message_from_bytes(self.non_latin_bin_msg)
3391 out = StringIO()
3392 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003393 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003394
3395 def test_bytes_generator_with_unix_from(self):
3396 # The unixfrom contains a current date, so we can't check it
3397 # literally. Just make sure the first word is 'From' and the
3398 # rest of the message matches the input.
3399 msg = email.message_from_bytes(self.non_latin_bin_msg)
3400 out = BytesIO()
3401 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3402 lines = out.getvalue().split(b'\n')
3403 self.assertEqual(lines[0].split()[0], b'From')
3404 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3405
R. David Murray92532142011-01-07 23:25:30 +00003406 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3407 non_latin_bin_msg_as7bit[2:4] = [
3408 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3409 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3410 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3411
R. David Murray96fd54e2010-10-08 15:55:28 +00003412 def test_message_from_binary_file(self):
3413 fn = 'test.msg'
3414 self.addCleanup(unlink, fn)
3415 with open(fn, 'wb') as testfile:
3416 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003417 with open(fn, 'rb') as testfile:
3418 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003419 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3420
3421 latin_bin_msg = textwrap.dedent("""\
3422 From: foo@bar.com
3423 To: Dinsdale
3424 Subject: Nudge nudge, wink, wink
3425 Mime-Version: 1.0
3426 Content-Type: text/plain; charset="latin-1"
3427 Content-Transfer-Encoding: 8bit
3428
3429 oh là là, know what I mean, know what I mean?
3430 """).encode('latin-1')
3431
3432 latin_bin_msg_as7bit = textwrap.dedent("""\
3433 From: foo@bar.com
3434 To: Dinsdale
3435 Subject: Nudge nudge, wink, wink
3436 Mime-Version: 1.0
3437 Content-Type: text/plain; charset="iso-8859-1"
3438 Content-Transfer-Encoding: quoted-printable
3439
3440 oh l=E0 l=E0, know what I mean, know what I mean?
3441 """)
3442
3443 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3444 m = email.message_from_bytes(self.latin_bin_msg)
3445 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3446
3447 def test_decoded_generator_emits_unicode_body(self):
3448 m = email.message_from_bytes(self.latin_bin_msg)
3449 out = StringIO()
3450 email.generator.DecodedGenerator(out).flatten(m)
3451 #DecodedHeader output contains an extra blank line compared
3452 #to the input message. RDM: not sure if this is a bug or not,
3453 #but it is not specific to the 8bit->7bit conversion.
3454 self.assertEqual(out.getvalue(),
3455 self.latin_bin_msg.decode('latin-1')+'\n')
3456
3457 def test_bytes_feedparser(self):
3458 bfp = email.feedparser.BytesFeedParser()
3459 for i in range(0, len(self.latin_bin_msg), 10):
3460 bfp.feed(self.latin_bin_msg[i:i+10])
3461 m = bfp.close()
3462 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3463
R. David Murray8451c4b2010-10-23 22:19:56 +00003464 def test_crlf_flatten(self):
3465 with openfile('msg_26.txt', 'rb') as fp:
3466 text = fp.read()
3467 msg = email.message_from_bytes(text)
3468 s = BytesIO()
3469 g = email.generator.BytesGenerator(s)
3470 g.flatten(msg, linesep='\r\n')
3471 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003472
3473 def test_8bit_multipart(self):
3474 # Issue 11605
3475 source = textwrap.dedent("""\
3476 Date: Fri, 18 Mar 2011 17:15:43 +0100
3477 To: foo@example.com
3478 From: foodwatch-Newsletter <bar@example.com>
3479 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3480 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3481 MIME-Version: 1.0
3482 Content-Type: multipart/alternative;
3483 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3484
3485 --b1_76a486bee62b0d200f33dc2ca08220ad
3486 Content-Type: text/plain; charset="utf-8"
3487 Content-Transfer-Encoding: 8bit
3488
3489 Guten Tag, ,
3490
3491 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3492 Nachrichten aus Japan.
3493
3494
3495 --b1_76a486bee62b0d200f33dc2ca08220ad
3496 Content-Type: text/html; charset="utf-8"
3497 Content-Transfer-Encoding: 8bit
3498
3499 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3500 "http://www.w3.org/TR/html4/loose.dtd">
3501 <html lang="de">
3502 <head>
3503 <title>foodwatch - Newsletter</title>
3504 </head>
3505 <body>
3506 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3507 die Nachrichten aus Japan.</p>
3508 </body>
3509 </html>
3510 --b1_76a486bee62b0d200f33dc2ca08220ad--
3511
3512 """).encode('utf-8')
3513 msg = email.message_from_bytes(source)
3514 s = BytesIO()
3515 g = email.generator.BytesGenerator(s)
3516 g.flatten(msg)
3517 self.assertEqual(s.getvalue(), source)
3518
R David Murray9fd170e2012-03-14 14:05:03 -04003519 def test_bytes_generator_b_encoding_linesep(self):
3520 # Issue 14062: b encoding was tacking on an extra \n.
3521 m = Message()
3522 # This has enough non-ascii that it should always end up b encoded.
3523 m['Subject'] = Header('žluťoučký kůň')
3524 s = BytesIO()
3525 g = email.generator.BytesGenerator(s)
3526 g.flatten(m, linesep='\r\n')
3527 self.assertEqual(
3528 s.getvalue(),
3529 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3530
3531 def test_generator_b_encoding_linesep(self):
3532 # Since this broke in ByteGenerator, test Generator for completeness.
3533 m = Message()
3534 # This has enough non-ascii that it should always end up b encoded.
3535 m['Subject'] = Header('žluťoučký kůň')
3536 s = StringIO()
3537 g = email.generator.Generator(s)
3538 g.flatten(m, linesep='\r\n')
3539 self.assertEqual(
3540 s.getvalue(),
3541 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3542
R. David Murray8451c4b2010-10-23 22:19:56 +00003543 maxDiff = None
3544
Ezio Melottib3aedd42010-11-20 19:04:17 +00003545
R. David Murray719a4492010-11-21 16:53:48 +00003546class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003547
R. David Murraye5db2632010-11-20 15:10:13 +00003548 maxDiff = None
3549
R. David Murray96fd54e2010-10-08 15:55:28 +00003550 def _msgobj(self, filename):
3551 with openfile(filename, 'rb') as fp:
3552 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003553 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003554 msg = email.message_from_bytes(data)
3555 return msg, data
3556
R. David Murray719a4492010-11-21 16:53:48 +00003557 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003558 b = BytesIO()
3559 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003560 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003561 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003562
R. David Murraye5db2632010-11-20 15:10:13 +00003563 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003564 # Not using self.blinesep here is intentional. This way the output
3565 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003566 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3567
3568
R. David Murray719a4492010-11-21 16:53:48 +00003569class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3570 TestIdempotent):
3571 linesep = '\n'
3572 blinesep = b'\n'
3573 normalize_linesep_regex = re.compile(br'\r\n')
3574
3575
3576class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3577 TestIdempotent):
3578 linesep = '\r\n'
3579 blinesep = b'\r\n'
3580 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3581
Ezio Melottib3aedd42010-11-20 19:04:17 +00003582
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003583class TestBase64(unittest.TestCase):
3584 def test_len(self):
3585 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003586 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003587 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003588 for size in range(15):
3589 if size == 0 : bsize = 0
3590 elif size <= 3 : bsize = 4
3591 elif size <= 6 : bsize = 8
3592 elif size <= 9 : bsize = 12
3593 elif size <= 12: bsize = 16
3594 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003595 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003596
3597 def test_decode(self):
3598 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003599 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003600 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003601
3602 def test_encode(self):
3603 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003604 eq(base64mime.body_encode(b''), b'')
3605 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003606 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003607 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003608 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003609 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003610eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3611eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3612eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3613eHh4eCB4eHh4IA==
3614""")
3615 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003616 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003617 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003618eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3619eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3620eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3621eHh4eCB4eHh4IA==\r
3622""")
3623
3624 def test_header_encode(self):
3625 eq = self.assertEqual
3626 he = base64mime.header_encode
3627 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003628 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3629 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003630 # Test the charset option
3631 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3632 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003633
3634
Ezio Melottib3aedd42010-11-20 19:04:17 +00003635
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003636class TestQuopri(unittest.TestCase):
3637 def setUp(self):
3638 # Set of characters (as byte integers) that don't need to be encoded
3639 # in headers.
3640 self.hlit = list(chain(
3641 range(ord('a'), ord('z') + 1),
3642 range(ord('A'), ord('Z') + 1),
3643 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003644 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003645 # Set of characters (as byte integers) that do need to be encoded in
3646 # headers.
3647 self.hnon = [c for c in range(256) if c not in self.hlit]
3648 assert len(self.hlit) + len(self.hnon) == 256
3649 # Set of characters (as byte integers) that don't need to be encoded
3650 # in bodies.
3651 self.blit = list(range(ord(' '), ord('~') + 1))
3652 self.blit.append(ord('\t'))
3653 self.blit.remove(ord('='))
3654 # Set of characters (as byte integers) that do need to be encoded in
3655 # bodies.
3656 self.bnon = [c for c in range(256) if c not in self.blit]
3657 assert len(self.blit) + len(self.bnon) == 256
3658
Guido van Rossum9604e662007-08-30 03:46:43 +00003659 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003660 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003661 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003662 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003663 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003664 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003665 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003666
Guido van Rossum9604e662007-08-30 03:46:43 +00003667 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003668 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003669 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003670 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003671 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003672 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003673 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003674
3675 def test_header_quopri_len(self):
3676 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003677 eq(quoprimime.header_length(b'hello'), 5)
3678 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003679 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003680 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003681 # =?xxx?q?...?= means 10 extra characters
3682 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003683 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3684 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003685 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003686 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003687 # =?xxx?q?...?= means 10 extra characters
3688 10)
3689 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003690 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003691 'expected length 1 for %r' % chr(c))
3692 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003693 # Space is special; it's encoded to _
3694 if c == ord(' '):
3695 continue
3696 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003697 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003698 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003699
3700 def test_body_quopri_len(self):
3701 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003702 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003703 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003705 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003706
3707 def test_quote_unquote_idempotent(self):
3708 for x in range(256):
3709 c = chr(x)
3710 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3711
R David Murrayec1b5b82011-03-23 14:19:05 -04003712 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3713 if charset is None:
3714 encoded_header = quoprimime.header_encode(header)
3715 else:
3716 encoded_header = quoprimime.header_encode(header, charset)
3717 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718
R David Murraycafd79d2011-03-23 15:25:55 -04003719 def test_header_encode_null(self):
3720 self._test_header_encode(b'', '')
3721
R David Murrayec1b5b82011-03-23 14:19:05 -04003722 def test_header_encode_one_word(self):
3723 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3724
3725 def test_header_encode_two_lines(self):
3726 self._test_header_encode(b'hello\nworld',
3727 '=?iso-8859-1?q?hello=0Aworld?=')
3728
3729 def test_header_encode_non_ascii(self):
3730 self._test_header_encode(b'hello\xc7there',
3731 '=?iso-8859-1?q?hello=C7there?=')
3732
3733 def test_header_encode_alt_charset(self):
3734 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3735 charset='iso-8859-2')
3736
3737 def _test_header_decode(self, encoded_header, expected_decoded_header):
3738 decoded_header = quoprimime.header_decode(encoded_header)
3739 self.assertEqual(decoded_header, expected_decoded_header)
3740
3741 def test_header_decode_null(self):
3742 self._test_header_decode('', '')
3743
3744 def test_header_decode_one_word(self):
3745 self._test_header_decode('hello', 'hello')
3746
3747 def test_header_decode_two_lines(self):
3748 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3749
3750 def test_header_decode_non_ascii(self):
3751 self._test_header_decode('hello=C7there', 'hello\xc7there')
3752
3753 def _test_decode(self, encoded, expected_decoded, eol=None):
3754 if eol is None:
3755 decoded = quoprimime.decode(encoded)
3756 else:
3757 decoded = quoprimime.decode(encoded, eol=eol)
3758 self.assertEqual(decoded, expected_decoded)
3759
3760 def test_decode_null_word(self):
3761 self._test_decode('', '')
3762
3763 def test_decode_null_line_null_word(self):
3764 self._test_decode('\r\n', '\n')
3765
3766 def test_decode_one_word(self):
3767 self._test_decode('hello', 'hello')
3768
3769 def test_decode_one_word_eol(self):
3770 self._test_decode('hello', 'hello', eol='X')
3771
3772 def test_decode_one_line(self):
3773 self._test_decode('hello\r\n', 'hello\n')
3774
3775 def test_decode_one_line_lf(self):
3776 self._test_decode('hello\n', 'hello\n')
3777
R David Murraycafd79d2011-03-23 15:25:55 -04003778 def test_decode_one_line_cr(self):
3779 self._test_decode('hello\r', 'hello\n')
3780
3781 def test_decode_one_line_nl(self):
3782 self._test_decode('hello\n', 'helloX', eol='X')
3783
3784 def test_decode_one_line_crnl(self):
3785 self._test_decode('hello\r\n', 'helloX', eol='X')
3786
R David Murrayec1b5b82011-03-23 14:19:05 -04003787 def test_decode_one_line_one_word(self):
3788 self._test_decode('hello\r\nworld', 'hello\nworld')
3789
3790 def test_decode_one_line_one_word_eol(self):
3791 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3792
3793 def test_decode_two_lines(self):
3794 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3795
R David Murraycafd79d2011-03-23 15:25:55 -04003796 def test_decode_two_lines_eol(self):
3797 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3798
R David Murrayec1b5b82011-03-23 14:19:05 -04003799 def test_decode_one_long_line(self):
3800 self._test_decode('Spam' * 250, 'Spam' * 250)
3801
3802 def test_decode_one_space(self):
3803 self._test_decode(' ', '')
3804
3805 def test_decode_multiple_spaces(self):
3806 self._test_decode(' ' * 5, '')
3807
3808 def test_decode_one_line_trailing_spaces(self):
3809 self._test_decode('hello \r\n', 'hello\n')
3810
3811 def test_decode_two_lines_trailing_spaces(self):
3812 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3813
3814 def test_decode_quoted_word(self):
3815 self._test_decode('=22quoted=20words=22', '"quoted words"')
3816
3817 def test_decode_uppercase_quoting(self):
3818 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3819
3820 def test_decode_lowercase_quoting(self):
3821 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3822
3823 def test_decode_soft_line_break(self):
3824 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3825
3826 def test_decode_false_quoting(self):
3827 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3828
3829 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3830 kwargs = {}
3831 if maxlinelen is None:
3832 # Use body_encode's default.
3833 maxlinelen = 76
3834 else:
3835 kwargs['maxlinelen'] = maxlinelen
3836 if eol is None:
3837 # Use body_encode's default.
3838 eol = '\n'
3839 else:
3840 kwargs['eol'] = eol
3841 encoded_body = quoprimime.body_encode(body, **kwargs)
3842 self.assertEqual(encoded_body, expected_encoded_body)
3843 if eol == '\n' or eol == '\r\n':
3844 # We know how to split the result back into lines, so maxlinelen
3845 # can be checked.
3846 for line in encoded_body.splitlines():
3847 self.assertLessEqual(len(line), maxlinelen)
3848
3849 def test_encode_null(self):
3850 self._test_encode('', '')
3851
3852 def test_encode_null_lines(self):
3853 self._test_encode('\n\n', '\n\n')
3854
3855 def test_encode_one_line(self):
3856 self._test_encode('hello\n', 'hello\n')
3857
3858 def test_encode_one_line_crlf(self):
3859 self._test_encode('hello\r\n', 'hello\n')
3860
3861 def test_encode_one_line_eol(self):
3862 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3863
3864 def test_encode_one_space(self):
3865 self._test_encode(' ', '=20')
3866
3867 def test_encode_one_line_one_space(self):
3868 self._test_encode(' \n', '=20\n')
3869
R David Murrayb938c8c2011-03-24 12:19:26 -04003870# XXX: body_encode() expect strings, but uses ord(char) from these strings
3871# to index into a 256-entry list. For code points above 255, this will fail.
3872# Should there be a check for 8-bit only ord() values in body, or at least
3873# a comment about the expected input?
3874
3875 def test_encode_two_lines_one_space(self):
3876 self._test_encode(' \n \n', '=20\n=20\n')
3877
R David Murrayec1b5b82011-03-23 14:19:05 -04003878 def test_encode_one_word_trailing_spaces(self):
3879 self._test_encode('hello ', 'hello =20')
3880
3881 def test_encode_one_line_trailing_spaces(self):
3882 self._test_encode('hello \n', 'hello =20\n')
3883
3884 def test_encode_one_word_trailing_tab(self):
3885 self._test_encode('hello \t', 'hello =09')
3886
3887 def test_encode_one_line_trailing_tab(self):
3888 self._test_encode('hello \t\n', 'hello =09\n')
3889
3890 def test_encode_trailing_space_before_maxlinelen(self):
3891 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3892
R David Murrayb938c8c2011-03-24 12:19:26 -04003893 def test_encode_trailing_space_at_maxlinelen(self):
3894 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3895
R David Murrayec1b5b82011-03-23 14:19:05 -04003896 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003897 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3898
3899 def test_encode_whitespace_lines(self):
3900 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003901
3902 def test_encode_quoted_equals(self):
3903 self._test_encode('a = b', 'a =3D b')
3904
3905 def test_encode_one_long_string(self):
3906 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3907
3908 def test_encode_one_long_line(self):
3909 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3910
3911 def test_encode_one_very_long_line(self):
3912 self._test_encode('x' * 200 + '\n',
3913 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3914
3915 def test_encode_one_long_line(self):
3916 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3917
3918 def test_encode_shortest_maxlinelen(self):
3919 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003920
R David Murrayb938c8c2011-03-24 12:19:26 -04003921 def test_encode_maxlinelen_too_small(self):
3922 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3923
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003924 def test_encode(self):
3925 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003926 eq(quoprimime.body_encode(''), '')
3927 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003928 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003929 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003930 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003931 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003932xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3933 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3934x xxxx xxxx xxxx xxxx=20""")
3935 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003936 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3937 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003938xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3939 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3940x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003941 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003942one line
3943
3944two line"""), """\
3945one line
3946
3947two line""")
3948
3949
Ezio Melottib3aedd42010-11-20 19:04:17 +00003950
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003951# Test the Charset class
3952class TestCharset(unittest.TestCase):
3953 def tearDown(self):
3954 from email import charset as CharsetModule
3955 try:
3956 del CharsetModule.CHARSETS['fake']
3957 except KeyError:
3958 pass
3959
Guido van Rossum9604e662007-08-30 03:46:43 +00003960 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003961 eq = self.assertEqual
3962 # Make sure us-ascii = no Unicode conversion
3963 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003964 eq(c.header_encode('Hello World!'), 'Hello World!')
3965 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003966 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003967 self.assertRaises(UnicodeError, c.header_encode, s)
3968 c = Charset('utf-8')
3969 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003970
3971 def test_body_encode(self):
3972 eq = self.assertEqual
3973 # Try a charset with QP body encoding
3974 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003975 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003976 # Try a charset with Base64 body encoding
3977 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003978 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # Try a charset with None body encoding
3980 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003981 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003982 # Try the convert argument, where input codec != output codec
3983 c = Charset('euc-jp')
3984 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003985 # XXX FIXME
3986## try:
3987## eq('\x1b$B5FCO;~IW\x1b(B',
3988## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3989## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3990## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3991## except LookupError:
3992## # We probably don't have the Japanese codecs installed
3993## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003994 # Testing SF bug #625509, which we have to fake, since there are no
3995 # built-in encodings where the header encoding is QP but the body
3996 # encoding is not.
3997 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003998 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003999 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004000 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004001
4002 def test_unicode_charset_name(self):
4003 charset = Charset('us-ascii')
4004 self.assertEqual(str(charset), 'us-ascii')
4005 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4006
4007
Ezio Melottib3aedd42010-11-20 19:04:17 +00004008
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004009# Test multilingual MIME headers.
4010class TestHeader(TestEmailBase):
4011 def test_simple(self):
4012 eq = self.ndiffAssertEqual
4013 h = Header('Hello World!')
4014 eq(h.encode(), 'Hello World!')
4015 h.append(' Goodbye World!')
4016 eq(h.encode(), 'Hello World! Goodbye World!')
4017
4018 def test_simple_surprise(self):
4019 eq = self.ndiffAssertEqual
4020 h = Header('Hello World!')
4021 eq(h.encode(), 'Hello World!')
4022 h.append('Goodbye World!')
4023 eq(h.encode(), 'Hello World! Goodbye World!')
4024
4025 def test_header_needs_no_decoding(self):
4026 h = 'no decoding needed'
4027 self.assertEqual(decode_header(h), [(h, None)])
4028
4029 def test_long(self):
4030 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4031 maxlinelen=76)
4032 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004033 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004034
4035 def test_multilingual(self):
4036 eq = self.ndiffAssertEqual
4037 g = Charset("iso-8859-1")
4038 cz = Charset("iso-8859-2")
4039 utf8 = Charset("utf-8")
4040 g_head = (b'Die Mieter treten hier ein werden mit einem '
4041 b'Foerderband komfortabel den Korridor entlang, '
4042 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4043 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4044 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4045 b'd\xf9vtipu.. ')
4046 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4047 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4048 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4049 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4050 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4051 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4052 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4053 '\u3044\u307e\u3059\u3002')
4054 h = Header(g_head, g)
4055 h.append(cz_head, cz)
4056 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004057 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004058 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004059=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4060 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4061 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4062 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004063 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4064 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4065 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4066 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004067 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4068 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4069 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4070 decoded = decode_header(enc)
4071 eq(len(decoded), 3)
4072 eq(decoded[0], (g_head, 'iso-8859-1'))
4073 eq(decoded[1], (cz_head, 'iso-8859-2'))
4074 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004075 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004076 eq(ustr,
4077 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4078 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4079 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4080 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4081 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4082 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4083 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4084 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4085 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4086 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4087 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4088 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4089 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4090 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4091 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4092 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4093 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004094 # Test make_header()
4095 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004096 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097
4098 def test_empty_header_encode(self):
4099 h = Header()
4100 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004101
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004102 def test_header_ctor_default_args(self):
4103 eq = self.ndiffAssertEqual
4104 h = Header()
4105 eq(h, '')
4106 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004107 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004108
4109 def test_explicit_maxlinelen(self):
4110 eq = self.ndiffAssertEqual
4111 hstr = ('A very long line that must get split to something other '
4112 'than at the 76th character boundary to test the non-default '
4113 'behavior')
4114 h = Header(hstr)
4115 eq(h.encode(), '''\
4116A very long line that must get split to something other than at the 76th
4117 character boundary to test the non-default behavior''')
4118 eq(str(h), hstr)
4119 h = Header(hstr, header_name='Subject')
4120 eq(h.encode(), '''\
4121A very long line that must get split to something other than at the
4122 76th character boundary to test the non-default behavior''')
4123 eq(str(h), hstr)
4124 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4125 eq(h.encode(), hstr)
4126 eq(str(h), hstr)
4127
Guido van Rossum9604e662007-08-30 03:46:43 +00004128 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004129 eq = self.ndiffAssertEqual
4130 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004131 x = 'xxxx ' * 20
4132 h.append(x)
4133 s = h.encode()
4134 eq(s, """\
4135=?iso-8859-1?q?xxx?=
4136 =?iso-8859-1?q?x_?=
4137 =?iso-8859-1?q?xx?=
4138 =?iso-8859-1?q?xx?=
4139 =?iso-8859-1?q?_x?=
4140 =?iso-8859-1?q?xx?=
4141 =?iso-8859-1?q?x_?=
4142 =?iso-8859-1?q?xx?=
4143 =?iso-8859-1?q?xx?=
4144 =?iso-8859-1?q?_x?=
4145 =?iso-8859-1?q?xx?=
4146 =?iso-8859-1?q?x_?=
4147 =?iso-8859-1?q?xx?=
4148 =?iso-8859-1?q?xx?=
4149 =?iso-8859-1?q?_x?=
4150 =?iso-8859-1?q?xx?=
4151 =?iso-8859-1?q?x_?=
4152 =?iso-8859-1?q?xx?=
4153 =?iso-8859-1?q?xx?=
4154 =?iso-8859-1?q?_x?=
4155 =?iso-8859-1?q?xx?=
4156 =?iso-8859-1?q?x_?=
4157 =?iso-8859-1?q?xx?=
4158 =?iso-8859-1?q?xx?=
4159 =?iso-8859-1?q?_x?=
4160 =?iso-8859-1?q?xx?=
4161 =?iso-8859-1?q?x_?=
4162 =?iso-8859-1?q?xx?=
4163 =?iso-8859-1?q?xx?=
4164 =?iso-8859-1?q?_x?=
4165 =?iso-8859-1?q?xx?=
4166 =?iso-8859-1?q?x_?=
4167 =?iso-8859-1?q?xx?=
4168 =?iso-8859-1?q?xx?=
4169 =?iso-8859-1?q?_x?=
4170 =?iso-8859-1?q?xx?=
4171 =?iso-8859-1?q?x_?=
4172 =?iso-8859-1?q?xx?=
4173 =?iso-8859-1?q?xx?=
4174 =?iso-8859-1?q?_x?=
4175 =?iso-8859-1?q?xx?=
4176 =?iso-8859-1?q?x_?=
4177 =?iso-8859-1?q?xx?=
4178 =?iso-8859-1?q?xx?=
4179 =?iso-8859-1?q?_x?=
4180 =?iso-8859-1?q?xx?=
4181 =?iso-8859-1?q?x_?=
4182 =?iso-8859-1?q?xx?=
4183 =?iso-8859-1?q?xx?=
4184 =?iso-8859-1?q?_?=""")
4185 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004186 h = Header(charset='iso-8859-1', maxlinelen=40)
4187 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004188 s = h.encode()
4189 eq(s, """\
4190=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4191 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4192 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4193 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4194 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4195 eq(x, str(make_header(decode_header(s))))
4196
4197 def test_base64_splittable(self):
4198 eq = self.ndiffAssertEqual
4199 h = Header(charset='koi8-r', maxlinelen=20)
4200 x = 'xxxx ' * 20
4201 h.append(x)
4202 s = h.encode()
4203 eq(s, """\
4204=?koi8-r?b?eHh4?=
4205 =?koi8-r?b?eCB4?=
4206 =?koi8-r?b?eHh4?=
4207 =?koi8-r?b?IHh4?=
4208 =?koi8-r?b?eHgg?=
4209 =?koi8-r?b?eHh4?=
4210 =?koi8-r?b?eCB4?=
4211 =?koi8-r?b?eHh4?=
4212 =?koi8-r?b?IHh4?=
4213 =?koi8-r?b?eHgg?=
4214 =?koi8-r?b?eHh4?=
4215 =?koi8-r?b?eCB4?=
4216 =?koi8-r?b?eHh4?=
4217 =?koi8-r?b?IHh4?=
4218 =?koi8-r?b?eHgg?=
4219 =?koi8-r?b?eHh4?=
4220 =?koi8-r?b?eCB4?=
4221 =?koi8-r?b?eHh4?=
4222 =?koi8-r?b?IHh4?=
4223 =?koi8-r?b?eHgg?=
4224 =?koi8-r?b?eHh4?=
4225 =?koi8-r?b?eCB4?=
4226 =?koi8-r?b?eHh4?=
4227 =?koi8-r?b?IHh4?=
4228 =?koi8-r?b?eHgg?=
4229 =?koi8-r?b?eHh4?=
4230 =?koi8-r?b?eCB4?=
4231 =?koi8-r?b?eHh4?=
4232 =?koi8-r?b?IHh4?=
4233 =?koi8-r?b?eHgg?=
4234 =?koi8-r?b?eHh4?=
4235 =?koi8-r?b?eCB4?=
4236 =?koi8-r?b?eHh4?=
4237 =?koi8-r?b?IA==?=""")
4238 eq(x, str(make_header(decode_header(s))))
4239 h = Header(charset='koi8-r', maxlinelen=40)
4240 h.append(x)
4241 s = h.encode()
4242 eq(s, """\
4243=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4244 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4245 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4246 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4247 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4248 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4249 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004250
4251 def test_us_ascii_header(self):
4252 eq = self.assertEqual
4253 s = 'hello'
4254 x = decode_header(s)
4255 eq(x, [('hello', None)])
4256 h = make_header(x)
4257 eq(s, h.encode())
4258
4259 def test_string_charset(self):
4260 eq = self.assertEqual
4261 h = Header()
4262 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004263 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004264
4265## def test_unicode_error(self):
4266## raises = self.assertRaises
4267## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4268## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4269## h = Header()
4270## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4271## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4272## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4273
4274 def test_utf8_shortest(self):
4275 eq = self.assertEqual
4276 h = Header('p\xf6stal', 'utf-8')
4277 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4278 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4279 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4280
4281 def test_bad_8bit_header(self):
4282 raises = self.assertRaises
4283 eq = self.assertEqual
4284 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4285 raises(UnicodeError, Header, x)
4286 h = Header()
4287 raises(UnicodeError, h.append, x)
4288 e = x.decode('utf-8', 'replace')
4289 eq(str(Header(x, errors='replace')), e)
4290 h.append(x, errors='replace')
4291 eq(str(h), e)
4292
R David Murray041015c2011-03-25 15:10:55 -04004293 def test_escaped_8bit_header(self):
4294 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004295 e = x.decode('ascii', 'surrogateescape')
4296 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004297 self.assertEqual(str(h),
4298 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4299 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4300
R David Murraye5e366c2011-06-18 12:57:28 -04004301 def test_header_handles_binary_unknown8bit(self):
4302 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4303 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4304 self.assertEqual(str(h),
4305 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4306 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4307
4308 def test_make_header_handles_binary_unknown8bit(self):
4309 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4310 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4311 h2 = email.header.make_header(email.header.decode_header(h))
4312 self.assertEqual(str(h2),
4313 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4314 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4315
R David Murray041015c2011-03-25 15:10:55 -04004316 def test_modify_returned_list_does_not_change_header(self):
4317 h = Header('test')
4318 chunks = email.header.decode_header(h)
4319 chunks.append(('ascii', 'test2'))
4320 self.assertEqual(str(h), 'test')
4321
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004322 def test_encoded_adjacent_nonencoded(self):
4323 eq = self.assertEqual
4324 h = Header()
4325 h.append('hello', 'iso-8859-1')
4326 h.append('world')
4327 s = h.encode()
4328 eq(s, '=?iso-8859-1?q?hello?= world')
4329 h = make_header(decode_header(s))
4330 eq(h.encode(), s)
4331
4332 def test_whitespace_eater(self):
4333 eq = self.assertEqual
4334 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4335 parts = decode_header(s)
4336 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4337 hdr = make_header(parts)
4338 eq(hdr.encode(),
4339 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4340
4341 def test_broken_base64_header(self):
4342 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004343 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004344 raises(errors.HeaderParseError, decode_header, s)
4345
R. David Murray477efb32011-01-05 01:39:32 +00004346 def test_shift_jis_charset(self):
4347 h = Header('文', charset='shift_jis')
4348 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4349
R David Murrayde912762011-03-16 18:26:23 -04004350 def test_flatten_header_with_no_value(self):
4351 # Issue 11401 (regression from email 4.x) Note that the space after
4352 # the header doesn't reflect the input, but this is also the way
4353 # email 4.x behaved. At some point it would be nice to fix that.
4354 msg = email.message_from_string("EmptyHeader:")
4355 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4356
R David Murray01581ee2011-04-18 10:04:34 -04004357 def test_encode_preserves_leading_ws_on_value(self):
4358 msg = Message()
4359 msg['SomeHeader'] = ' value with leading ws'
4360 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4361
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004362
Ezio Melottib3aedd42010-11-20 19:04:17 +00004363
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004364# Test RFC 2231 header parameters (en/de)coding
4365class TestRFC2231(TestEmailBase):
4366 def test_get_param(self):
4367 eq = self.assertEqual
4368 msg = self._msgobj('msg_29.txt')
4369 eq(msg.get_param('title'),
4370 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4371 eq(msg.get_param('title', unquote=False),
4372 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4373
4374 def test_set_param(self):
4375 eq = self.ndiffAssertEqual
4376 msg = Message()
4377 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4378 charset='us-ascii')
4379 eq(msg.get_param('title'),
4380 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4381 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4382 charset='us-ascii', language='en')
4383 eq(msg.get_param('title'),
4384 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4385 msg = self._msgobj('msg_01.txt')
4386 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4387 charset='us-ascii', language='en')
4388 eq(msg.as_string(maxheaderlen=78), """\
4389Return-Path: <bbb@zzz.org>
4390Delivered-To: bbb@zzz.org
4391Received: by mail.zzz.org (Postfix, from userid 889)
4392\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4393MIME-Version: 1.0
4394Content-Transfer-Encoding: 7bit
4395Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4396From: bbb@ddd.com (John X. Doe)
4397To: bbb@zzz.org
4398Subject: This is a test message
4399Date: Fri, 4 May 2001 14:05:44 -0400
4400Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004401 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004402
4403
4404Hi,
4405
4406Do you like this message?
4407
4408-Me
4409""")
4410
R David Murraya2860e82011-04-16 09:20:30 -04004411 def test_set_param_requote(self):
4412 msg = Message()
4413 msg.set_param('title', 'foo')
4414 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4415 msg.set_param('title', 'bar', requote=False)
4416 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4417 # tspecial is still quoted.
4418 msg.set_param('title', "(bar)bell", requote=False)
4419 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4420
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004421 def test_del_param(self):
4422 eq = self.ndiffAssertEqual
4423 msg = self._msgobj('msg_01.txt')
4424 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4425 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4426 charset='us-ascii', language='en')
4427 msg.del_param('foo', header='Content-Type')
4428 eq(msg.as_string(maxheaderlen=78), """\
4429Return-Path: <bbb@zzz.org>
4430Delivered-To: bbb@zzz.org
4431Received: by mail.zzz.org (Postfix, from userid 889)
4432\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4433MIME-Version: 1.0
4434Content-Transfer-Encoding: 7bit
4435Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4436From: bbb@ddd.com (John X. Doe)
4437To: bbb@zzz.org
4438Subject: This is a test message
4439Date: Fri, 4 May 2001 14:05:44 -0400
4440Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004441 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004442
4443
4444Hi,
4445
4446Do you like this message?
4447
4448-Me
4449""")
4450
4451 def test_rfc2231_get_content_charset(self):
4452 eq = self.assertEqual
4453 msg = self._msgobj('msg_32.txt')
4454 eq(msg.get_content_charset(), 'us-ascii')
4455
R. David Murraydfd7eb02010-12-24 22:36:49 +00004456 def test_rfc2231_parse_rfc_quoting(self):
4457 m = textwrap.dedent('''\
4458 Content-Disposition: inline;
4459 \tfilename*0*=''This%20is%20even%20more%20;
4460 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4461 \tfilename*2="is it not.pdf"
4462
4463 ''')
4464 msg = email.message_from_string(m)
4465 self.assertEqual(msg.get_filename(),
4466 'This is even more ***fun*** is it not.pdf')
4467 self.assertEqual(m, msg.as_string())
4468
4469 def test_rfc2231_parse_extra_quoting(self):
4470 m = textwrap.dedent('''\
4471 Content-Disposition: inline;
4472 \tfilename*0*="''This%20is%20even%20more%20";
4473 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4474 \tfilename*2="is it not.pdf"
4475
4476 ''')
4477 msg = email.message_from_string(m)
4478 self.assertEqual(msg.get_filename(),
4479 'This is even more ***fun*** is it not.pdf')
4480 self.assertEqual(m, msg.as_string())
4481
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004482 def test_rfc2231_no_language_or_charset(self):
4483 m = '''\
4484Content-Transfer-Encoding: 8bit
4485Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4486Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4487
4488'''
4489 msg = email.message_from_string(m)
4490 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004491 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004492 self.assertEqual(
4493 param,
4494 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4495
4496 def test_rfc2231_no_language_or_charset_in_filename(self):
4497 m = '''\
4498Content-Disposition: inline;
4499\tfilename*0*="''This%20is%20even%20more%20";
4500\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4501\tfilename*2="is it not.pdf"
4502
4503'''
4504 msg = email.message_from_string(m)
4505 self.assertEqual(msg.get_filename(),
4506 'This is even more ***fun*** is it not.pdf')
4507
4508 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4509 m = '''\
4510Content-Disposition: inline;
4511\tfilename*0*="''This%20is%20even%20more%20";
4512\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4513\tfilename*2="is it not.pdf"
4514
4515'''
4516 msg = email.message_from_string(m)
4517 self.assertEqual(msg.get_filename(),
4518 'This is even more ***fun*** is it not.pdf')
4519
4520 def test_rfc2231_partly_encoded(self):
4521 m = '''\
4522Content-Disposition: inline;
4523\tfilename*0="''This%20is%20even%20more%20";
4524\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4525\tfilename*2="is it not.pdf"
4526
4527'''
4528 msg = email.message_from_string(m)
4529 self.assertEqual(
4530 msg.get_filename(),
4531 'This%20is%20even%20more%20***fun*** is it not.pdf')
4532
4533 def test_rfc2231_partly_nonencoded(self):
4534 m = '''\
4535Content-Disposition: inline;
4536\tfilename*0="This%20is%20even%20more%20";
4537\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4538\tfilename*2="is it not.pdf"
4539
4540'''
4541 msg = email.message_from_string(m)
4542 self.assertEqual(
4543 msg.get_filename(),
4544 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4545
4546 def test_rfc2231_no_language_or_charset_in_boundary(self):
4547 m = '''\
4548Content-Type: multipart/alternative;
4549\tboundary*0*="''This%20is%20even%20more%20";
4550\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4551\tboundary*2="is it not.pdf"
4552
4553'''
4554 msg = email.message_from_string(m)
4555 self.assertEqual(msg.get_boundary(),
4556 'This is even more ***fun*** is it not.pdf')
4557
4558 def test_rfc2231_no_language_or_charset_in_charset(self):
4559 # This is a nonsensical charset value, but tests the code anyway
4560 m = '''\
4561Content-Type: text/plain;
4562\tcharset*0*="This%20is%20even%20more%20";
4563\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4564\tcharset*2="is it not.pdf"
4565
4566'''
4567 msg = email.message_from_string(m)
4568 self.assertEqual(msg.get_content_charset(),
4569 'this is even more ***fun*** is it not.pdf')
4570
4571 def test_rfc2231_bad_encoding_in_filename(self):
4572 m = '''\
4573Content-Disposition: inline;
4574\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4575\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4576\tfilename*2="is it not.pdf"
4577
4578'''
4579 msg = email.message_from_string(m)
4580 self.assertEqual(msg.get_filename(),
4581 'This is even more ***fun*** is it not.pdf')
4582
4583 def test_rfc2231_bad_encoding_in_charset(self):
4584 m = """\
4585Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4586
4587"""
4588 msg = email.message_from_string(m)
4589 # This should return None because non-ascii characters in the charset
4590 # are not allowed.
4591 self.assertEqual(msg.get_content_charset(), None)
4592
4593 def test_rfc2231_bad_character_in_charset(self):
4594 m = """\
4595Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4596
4597"""
4598 msg = email.message_from_string(m)
4599 # This should return None because non-ascii characters in the charset
4600 # are not allowed.
4601 self.assertEqual(msg.get_content_charset(), None)
4602
4603 def test_rfc2231_bad_character_in_filename(self):
4604 m = '''\
4605Content-Disposition: inline;
4606\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4607\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4608\tfilename*2*="is it not.pdf%E2"
4609
4610'''
4611 msg = email.message_from_string(m)
4612 self.assertEqual(msg.get_filename(),
4613 'This is even more ***fun*** is it not.pdf\ufffd')
4614
4615 def test_rfc2231_unknown_encoding(self):
4616 m = """\
4617Content-Transfer-Encoding: 8bit
4618Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4619
4620"""
4621 msg = email.message_from_string(m)
4622 self.assertEqual(msg.get_filename(), 'myfile.txt')
4623
4624 def test_rfc2231_single_tick_in_filename_extended(self):
4625 eq = self.assertEqual
4626 m = """\
4627Content-Type: application/x-foo;
4628\tname*0*=\"Frank's\"; name*1*=\" Document\"
4629
4630"""
4631 msg = email.message_from_string(m)
4632 charset, language, s = msg.get_param('name')
4633 eq(charset, None)
4634 eq(language, None)
4635 eq(s, "Frank's Document")
4636
4637 def test_rfc2231_single_tick_in_filename(self):
4638 m = """\
4639Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4640
4641"""
4642 msg = email.message_from_string(m)
4643 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004644 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004645 self.assertEqual(param, "Frank's Document")
4646
4647 def test_rfc2231_tick_attack_extended(self):
4648 eq = self.assertEqual
4649 m = """\
4650Content-Type: application/x-foo;
4651\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4652
4653"""
4654 msg = email.message_from_string(m)
4655 charset, language, s = msg.get_param('name')
4656 eq(charset, 'us-ascii')
4657 eq(language, 'en-us')
4658 eq(s, "Frank's Document")
4659
4660 def test_rfc2231_tick_attack(self):
4661 m = """\
4662Content-Type: application/x-foo;
4663\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4664
4665"""
4666 msg = email.message_from_string(m)
4667 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004668 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004669 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4670
4671 def test_rfc2231_no_extended_values(self):
4672 eq = self.assertEqual
4673 m = """\
4674Content-Type: application/x-foo; name=\"Frank's Document\"
4675
4676"""
4677 msg = email.message_from_string(m)
4678 eq(msg.get_param('name'), "Frank's Document")
4679
4680 def test_rfc2231_encoded_then_unencoded_segments(self):
4681 eq = self.assertEqual
4682 m = """\
4683Content-Type: application/x-foo;
4684\tname*0*=\"us-ascii'en-us'My\";
4685\tname*1=\" Document\";
4686\tname*2*=\" For You\"
4687
4688"""
4689 msg = email.message_from_string(m)
4690 charset, language, s = msg.get_param('name')
4691 eq(charset, 'us-ascii')
4692 eq(language, 'en-us')
4693 eq(s, 'My Document For You')
4694
4695 def test_rfc2231_unencoded_then_encoded_segments(self):
4696 eq = self.assertEqual
4697 m = """\
4698Content-Type: application/x-foo;
4699\tname*0=\"us-ascii'en-us'My\";
4700\tname*1*=\" Document\";
4701\tname*2*=\" For You\"
4702
4703"""
4704 msg = email.message_from_string(m)
4705 charset, language, s = msg.get_param('name')
4706 eq(charset, 'us-ascii')
4707 eq(language, 'en-us')
4708 eq(s, 'My Document For You')
4709
4710
Ezio Melottib3aedd42010-11-20 19:04:17 +00004711
R. David Murraya8f480f2010-01-16 18:30:03 +00004712# Tests to ensure that signed parts of an email are completely preserved, as
4713# required by RFC1847 section 2.1. Note that these are incomplete, because the
4714# email package does not currently always preserve the body. See issue 1670765.
4715class TestSigned(TestEmailBase):
4716
4717 def _msg_and_obj(self, filename):
4718 with openfile(findfile(filename)) as fp:
4719 original = fp.read()
4720 msg = email.message_from_string(original)
4721 return original, msg
4722
4723 def _signed_parts_eq(self, original, result):
4724 # Extract the first mime part of each message
4725 import re
4726 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4727 inpart = repart.search(original).group(2)
4728 outpart = repart.search(result).group(2)
4729 self.assertEqual(outpart, inpart)
4730
4731 def test_long_headers_as_string(self):
4732 original, msg = self._msg_and_obj('msg_45.txt')
4733 result = msg.as_string()
4734 self._signed_parts_eq(original, result)
4735
4736 def test_long_headers_as_string_maxheaderlen(self):
4737 original, msg = self._msg_and_obj('msg_45.txt')
4738 result = msg.as_string(maxheaderlen=60)
4739 self._signed_parts_eq(original, result)
4740
4741 def test_long_headers_flatten(self):
4742 original, msg = self._msg_and_obj('msg_45.txt')
4743 fp = StringIO()
4744 Generator(fp).flatten(msg)
4745 result = fp.getvalue()
4746 self._signed_parts_eq(original, result)
4747
4748
Ezio Melottib3aedd42010-11-20 19:04:17 +00004749
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004750def _testclasses():
4751 mod = sys.modules[__name__]
4752 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4753
4754
4755def suite():
4756 suite = unittest.TestSuite()
4757 for testclass in _testclasses():
4758 suite.addTest(unittest.makeSuite(testclass))
4759 return suite
4760
4761
4762def test_main():
4763 for testclass in _testclasses():
4764 run_unittest(testclass)
4765
4766
Ezio Melottib3aedd42010-11-20 19:04:17 +00004767
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004768if __name__ == '__main__':
4769 unittest.main(defaultTest='suite')