blob: 605c1be105377ab1523c5172b5deef858b2169d1 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576# Test the email.encoders module
577class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400578
579 def test_EncodersEncode_base64(self):
580 with openfile('PyBanner048.gif', 'rb') as fp:
581 bindata = fp.read()
582 mimed = email.mime.image.MIMEImage(bindata)
583 base64ed = mimed.get_payload()
584 # the transfer-encoded body lines should all be <=76 characters
585 lines = base64ed.split('\n')
586 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
587
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 def test_encode_empty_payload(self):
589 eq = self.assertEqual
590 msg = Message()
591 msg.set_charset('us-ascii')
592 eq(msg['content-transfer-encoding'], '7bit')
593
594 def test_default_cte(self):
595 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000596 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000597 msg = MIMEText('hello world')
598 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000599 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000600 msg = MIMEText('hello \xf8 world')
601 eq(msg['content-transfer-encoding'], '8bit')
602 # And now with a different charset
603 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
604 eq(msg['content-transfer-encoding'], 'quoted-printable')
605
R. David Murraye85200d2010-05-06 01:41:14 +0000606 def test_encode7or8bit(self):
607 # Make sure a charset whose input character set is 8bit but
608 # whose output character set is 7bit gets a transfer-encoding
609 # of 7bit.
610 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000611 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000612 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000613
Ezio Melottib3aedd42010-11-20 19:04:17 +0000614
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000615# Test long header wrapping
616class TestLongHeaders(TestEmailBase):
617 def test_split_long_continuation(self):
618 eq = self.ndiffAssertEqual
619 msg = email.message_from_string("""\
620Subject: bug demonstration
621\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
622\tmore text
623
624test
625""")
626 sfp = StringIO()
627 g = Generator(sfp)
628 g.flatten(msg)
629 eq(sfp.getvalue(), """\
630Subject: bug demonstration
631\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
632\tmore text
633
634test
635""")
636
637 def test_another_long_almost_unsplittable_header(self):
638 eq = self.ndiffAssertEqual
639 hstr = """\
640bug demonstration
641\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
642\tmore text"""
643 h = Header(hstr, continuation_ws='\t')
644 eq(h.encode(), """\
645bug demonstration
646\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
647\tmore text""")
648 h = Header(hstr.replace('\t', ' '))
649 eq(h.encode(), """\
650bug demonstration
651 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
652 more text""")
653
654 def test_long_nonstring(self):
655 eq = self.ndiffAssertEqual
656 g = Charset("iso-8859-1")
657 cz = Charset("iso-8859-2")
658 utf8 = Charset("utf-8")
659 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
660 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
661 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
662 b'bef\xf6rdert. ')
663 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
664 b'd\xf9vtipu.. ')
665 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
666 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
667 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
668 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
669 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
670 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
671 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
672 '\u3044\u307e\u3059\u3002')
673 h = Header(g_head, g, header_name='Subject')
674 h.append(cz_head, cz)
675 h.append(utf8_head, utf8)
676 msg = Message()
677 msg['Subject'] = h
678 sfp = StringIO()
679 g = Generator(sfp)
680 g.flatten(msg)
681 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000682Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
683 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
684 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
685 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
686 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
687 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
688 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
689 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
690 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
691 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
692 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000693
694""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000695 eq(h.encode(maxlinelen=76), """\
696=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
697 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
698 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
699 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
700 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
701 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
702 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
703 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
704 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
705 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
706 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707
708 def test_long_header_encode(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit')
713 eq(h.encode(), '''\
714wasnipoop; giraffes="very-long-necked-animals";
715 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
716
717 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
718 eq = self.ndiffAssertEqual
719 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
720 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
721 header_name='X-Foobar-Spoink-Defrobnit',
722 continuation_ws='\t')
723 eq(h.encode(), '''\
724wasnipoop; giraffes="very-long-necked-animals";
725 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
726
727 def test_long_header_encode_with_tab_continuation(self):
728 eq = self.ndiffAssertEqual
729 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
730 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
731 header_name='X-Foobar-Spoink-Defrobnit',
732 continuation_ws='\t')
733 eq(h.encode(), '''\
734wasnipoop; giraffes="very-long-necked-animals";
735\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
736
R David Murray3a6152f2011-03-14 21:13:03 -0400737 def test_header_encode_with_different_output_charset(self):
738 h = Header('文', 'euc-jp')
739 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
740
741 def test_long_header_encode_with_different_output_charset(self):
742 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
743 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
744 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
745 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
746 res = """\
747=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
748 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
749 self.assertEqual(h.encode(), res)
750
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000751 def test_header_splitter(self):
752 eq = self.ndiffAssertEqual
753 msg = MIMEText('')
754 # It'd be great if we could use add_header() here, but that doesn't
755 # guarantee an order of the parameters.
756 msg['X-Foobar-Spoink-Defrobnit'] = (
757 'wasnipoop; giraffes="very-long-necked-animals"; '
758 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
759 sfp = StringIO()
760 g = Generator(sfp)
761 g.flatten(msg)
762 eq(sfp.getvalue(), '''\
763Content-Type: text/plain; charset="us-ascii"
764MIME-Version: 1.0
765Content-Transfer-Encoding: 7bit
766X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
767 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
768
769''')
770
771 def test_no_semis_header_splitter(self):
772 eq = self.ndiffAssertEqual
773 msg = Message()
774 msg['From'] = 'test@dom.ain'
775 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
776 msg.set_payload('Test')
777 sfp = StringIO()
778 g = Generator(sfp)
779 g.flatten(msg)
780 eq(sfp.getvalue(), """\
781From: test@dom.ain
782References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
783 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
784
785Test""")
786
R David Murray7da4db12011-04-07 20:37:17 -0400787 def test_last_split_chunk_does_not_fit(self):
788 eq = self.ndiffAssertEqual
789 h = Header('Subject: the first part of this is short, but_the_second'
790 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
791 '_all_by_itself')
792 eq(h.encode(), """\
793Subject: the first part of this is short,
794 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
795
796 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
797 eq = self.ndiffAssertEqual
798 h = Header(', but_the_second'
799 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
800 '_all_by_itself')
801 eq(h.encode(), """\
802,
803 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
804
805 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
806 eq = self.ndiffAssertEqual
807 h = Header(', , but_the_second'
808 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
809 '_all_by_itself')
810 eq(h.encode(), """\
811, ,
812 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
813
814 def test_trailing_splitable_on_overlong_unsplitable(self):
815 eq = self.ndiffAssertEqual
816 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
817 'be_on_a_line_all_by_itself;')
818 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
819 "be_on_a_line_all_by_itself;")
820
821 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
822 eq = self.ndiffAssertEqual
823 h = Header('; '
824 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
825 'be_on_a_line_all_by_itself;')
826 eq(h.encode(), """\
827;
828 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
829
R David Murraye1292a22011-04-07 20:54:03 -0400830 def test_long_header_with_multiple_sequential_split_chars(self):
831 # Issue 11492
832
833 eq = self.ndiffAssertEqual
834 h = Header('This is a long line that has two whitespaces in a row. '
835 'This used to cause truncation of the header when folded')
836 eq(h.encode(), """\
837This is a long line that has two whitespaces in a row. This used to cause
838 truncation of the header when folded""")
839
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000840 def test_no_split_long_header(self):
841 eq = self.ndiffAssertEqual
842 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000843 h = Header(hstr)
844 # These come on two lines because Headers are really field value
845 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000847References:
848 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
849 h = Header('x' * 80)
850 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000851
852 def test_splitting_multiple_long_lines(self):
853 eq = self.ndiffAssertEqual
854 hstr = """\
855from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
856\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
857\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
858"""
859 h = Header(hstr, continuation_ws='\t')
860 eq(h.encode(), """\
861from babylon.socal-raves.org (localhost [127.0.0.1]);
862 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
863 for <mailman-admin@babylon.socal-raves.org>;
864 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
865\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
866 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
867 for <mailman-admin@babylon.socal-raves.org>;
868 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
869\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
870 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
871 for <mailman-admin@babylon.socal-raves.org>;
872 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
873
874 def test_splitting_first_line_only_is_long(self):
875 eq = self.ndiffAssertEqual
876 hstr = """\
877from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
878\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
879\tid 17k4h5-00034i-00
880\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
881 h = Header(hstr, maxlinelen=78, header_name='Received',
882 continuation_ws='\t')
883 eq(h.encode(), """\
884from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
885 helo=cthulhu.gerg.ca)
886\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
887\tid 17k4h5-00034i-00
888\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
889
890 def test_long_8bit_header(self):
891 eq = self.ndiffAssertEqual
892 msg = Message()
893 h = Header('Britische Regierung gibt', 'iso-8859-1',
894 header_name='Subject')
895 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000896 eq(h.encode(maxlinelen=76), """\
897=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
898 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000899 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000900 eq(msg.as_string(maxheaderlen=76), """\
901Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
902 =?iso-8859-1?q?hore-Windkraftprojekte?=
903
904""")
905 eq(msg.as_string(maxheaderlen=0), """\
906Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000907
908""")
909
910 def test_long_8bit_header_no_charset(self):
911 eq = self.ndiffAssertEqual
912 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000913 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
914 'f\xfcr Offshore-Windkraftprojekte '
915 '<a-very-long-address@example.com>')
916 msg['Reply-To'] = header_string
917 self.assertRaises(UnicodeEncodeError, msg.as_string)
918 msg = Message()
919 msg['Reply-To'] = Header(header_string, 'utf-8',
920 header_name='Reply-To')
921 eq(msg.as_string(maxheaderlen=78), """\
922Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
923 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000924
925""")
926
927 def test_long_to_header(self):
928 eq = self.ndiffAssertEqual
929 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
930 '<someone@eecs.umich.edu>,'
931 '"Someone Test #B" <someone@umich.edu>, '
932 '"Someone Test #C" <someone@eecs.umich.edu>, '
933 '"Someone Test #D" <someone@eecs.umich.edu>')
934 msg = Message()
935 msg['To'] = to
936 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000937To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000938 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000939 "Someone Test #C" <someone@eecs.umich.edu>,
940 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000941
942''')
943
944 def test_long_line_after_append(self):
945 eq = self.ndiffAssertEqual
946 s = 'This is an example of string which has almost the limit of header length.'
947 h = Header(s)
948 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000949 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000950This is an example of string which has almost the limit of header length.
951 Add another line.""")
952
953 def test_shorter_line_with_append(self):
954 eq = self.ndiffAssertEqual
955 s = 'This is a shorter line.'
956 h = Header(s)
957 h.append('Add another sentence. (Surprise?)')
958 eq(h.encode(),
959 'This is a shorter line. Add another sentence. (Surprise?)')
960
961 def test_long_field_name(self):
962 eq = self.ndiffAssertEqual
963 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000964 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
965 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
966 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
967 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000968 h = Header(gs, 'iso-8859-1', header_name=fn)
969 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000970 eq(h.encode(maxlinelen=76), """\
971=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
972 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
973 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
974 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000975
976 def test_long_received_header(self):
977 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
978 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
979 'Wed, 05 Mar 2003 18:10:18 -0700')
980 msg = Message()
981 msg['Received-1'] = Header(h, continuation_ws='\t')
982 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000983 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000985Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
986 Wed, 05 Mar 2003 18:10:18 -0700
987Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
988 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000989
990""")
991
992 def test_string_headerinst_eq(self):
993 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
994 'tu-muenchen.de> (David Bremner\'s message of '
995 '"Thu, 6 Mar 2003 13:58:21 +0100")')
996 msg = Message()
997 msg['Received-1'] = Header(h, header_name='Received-1',
998 continuation_ws='\t')
999 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001000 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +00001002Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
1003 6 Mar 2003 13:58:21 +0100\")
1004Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
1005 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001006
1007""")
1008
1009 def test_long_unbreakable_lines_with_continuation(self):
1010 eq = self.ndiffAssertEqual
1011 msg = Message()
1012 t = """\
1013iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1014 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1015 msg['Face-1'] = t
1016 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +00001017 # XXX This splitting is all wrong. It the first value line should be
1018 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001019 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001020Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001021 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001022 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001023Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001024 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001025 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1026
1027""")
1028
1029 def test_another_long_multiline_header(self):
1030 eq = self.ndiffAssertEqual
1031 m = ('Received: from siimage.com '
1032 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001033 'Microsoft SMTPSVC(5.0.2195.4905); '
1034 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001035 msg = email.message_from_string(m)
1036 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +00001037Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
1038 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001039
1040''')
1041
1042 def test_long_lines_with_different_header(self):
1043 eq = self.ndiffAssertEqual
1044 h = ('List-Unsubscribe: '
1045 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1046 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1047 '?subject=unsubscribe>')
1048 msg = Message()
1049 msg['List'] = h
1050 msg['List'] = Header(h, header_name='List')
1051 eq(msg.as_string(maxheaderlen=78), """\
1052List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001053 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001054List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001055 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056
1057""")
1058
R. David Murray6f0022d2011-01-07 21:57:25 +00001059 def test_long_rfc2047_header_with_embedded_fws(self):
1060 h = Header(textwrap.dedent("""\
1061 We're going to pretend this header is in a non-ascii character set
1062 \tto see if line wrapping with encoded words and embedded
1063 folding white space works"""),
1064 charset='utf-8',
1065 header_name='Test')
1066 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1067 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1068 =?utf-8?q?cter_set?=
1069 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1070 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1071
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001072
Ezio Melottib3aedd42010-11-20 19:04:17 +00001073
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001074# Test mangling of "From " lines in the body of a message
1075class TestFromMangling(unittest.TestCase):
1076 def setUp(self):
1077 self.msg = Message()
1078 self.msg['From'] = 'aaa@bbb.org'
1079 self.msg.set_payload("""\
1080From the desk of A.A.A.:
1081Blah blah blah
1082""")
1083
1084 def test_mangled_from(self):
1085 s = StringIO()
1086 g = Generator(s, mangle_from_=True)
1087 g.flatten(self.msg)
1088 self.assertEqual(s.getvalue(), """\
1089From: aaa@bbb.org
1090
1091>From the desk of A.A.A.:
1092Blah blah blah
1093""")
1094
1095 def test_dont_mangle_from(self):
1096 s = StringIO()
1097 g = Generator(s, mangle_from_=False)
1098 g.flatten(self.msg)
1099 self.assertEqual(s.getvalue(), """\
1100From: aaa@bbb.org
1101
1102From the desk of A.A.A.:
1103Blah blah blah
1104""")
1105
1106
Ezio Melottib3aedd42010-11-20 19:04:17 +00001107
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108# Test the basic MIMEAudio class
1109class TestMIMEAudio(unittest.TestCase):
1110 def setUp(self):
1111 # Make sure we pick up the audiotest.au that lives in email/test/data.
1112 # In Python, there's an audiotest.au living in Lib/test but that isn't
1113 # included in some binary distros that don't include the test
1114 # package. The trailing empty string on the .join() is significant
1115 # since findfile() will do a dirname().
1116 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1117 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1118 self._audiodata = fp.read()
1119 self._au = MIMEAudio(self._audiodata)
1120
1121 def test_guess_minor_type(self):
1122 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1123
1124 def test_encoding(self):
1125 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001126 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1127 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001128
1129 def test_checkSetMinor(self):
1130 au = MIMEAudio(self._audiodata, 'fish')
1131 self.assertEqual(au.get_content_type(), 'audio/fish')
1132
1133 def test_add_header(self):
1134 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001135 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001136 self._au.add_header('Content-Disposition', 'attachment',
1137 filename='audiotest.au')
1138 eq(self._au['content-disposition'],
1139 'attachment; filename="audiotest.au"')
1140 eq(self._au.get_params(header='content-disposition'),
1141 [('attachment', ''), ('filename', 'audiotest.au')])
1142 eq(self._au.get_param('filename', header='content-disposition'),
1143 'audiotest.au')
1144 missing = []
1145 eq(self._au.get_param('attachment', header='content-disposition'), '')
1146 unless(self._au.get_param('foo', failobj=missing,
1147 header='content-disposition') is missing)
1148 # Try some missing stuff
1149 unless(self._au.get_param('foobar', missing) is missing)
1150 unless(self._au.get_param('attachment', missing,
1151 header='foobar') is missing)
1152
1153
Ezio Melottib3aedd42010-11-20 19:04:17 +00001154
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155# Test the basic MIMEImage class
1156class TestMIMEImage(unittest.TestCase):
1157 def setUp(self):
1158 with openfile('PyBanner048.gif', 'rb') as fp:
1159 self._imgdata = fp.read()
1160 self._im = MIMEImage(self._imgdata)
1161
1162 def test_guess_minor_type(self):
1163 self.assertEqual(self._im.get_content_type(), 'image/gif')
1164
1165 def test_encoding(self):
1166 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001167 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1168 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169
1170 def test_checkSetMinor(self):
1171 im = MIMEImage(self._imgdata, 'fish')
1172 self.assertEqual(im.get_content_type(), 'image/fish')
1173
1174 def test_add_header(self):
1175 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001176 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001177 self._im.add_header('Content-Disposition', 'attachment',
1178 filename='dingusfish.gif')
1179 eq(self._im['content-disposition'],
1180 'attachment; filename="dingusfish.gif"')
1181 eq(self._im.get_params(header='content-disposition'),
1182 [('attachment', ''), ('filename', 'dingusfish.gif')])
1183 eq(self._im.get_param('filename', header='content-disposition'),
1184 'dingusfish.gif')
1185 missing = []
1186 eq(self._im.get_param('attachment', header='content-disposition'), '')
1187 unless(self._im.get_param('foo', failobj=missing,
1188 header='content-disposition') is missing)
1189 # Try some missing stuff
1190 unless(self._im.get_param('foobar', missing) is missing)
1191 unless(self._im.get_param('attachment', missing,
1192 header='foobar') is missing)
1193
1194
Ezio Melottib3aedd42010-11-20 19:04:17 +00001195
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001196# Test the basic MIMEApplication class
1197class TestMIMEApplication(unittest.TestCase):
1198 def test_headers(self):
1199 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001200 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001201 eq(msg.get_content_type(), 'application/octet-stream')
1202 eq(msg['content-transfer-encoding'], 'base64')
1203
1204 def test_body(self):
1205 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001206 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1207 msg = MIMEApplication(bytesdata)
1208 # whitespace in the cte encoded block is RFC-irrelevant.
1209 eq(msg.get_payload().strip(), '+vv8/f7/')
1210 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211
1212
Ezio Melottib3aedd42010-11-20 19:04:17 +00001213
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214# Test the basic MIMEText class
1215class TestMIMEText(unittest.TestCase):
1216 def setUp(self):
1217 self._msg = MIMEText('hello there')
1218
1219 def test_types(self):
1220 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001221 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001222 eq(self._msg.get_content_type(), 'text/plain')
1223 eq(self._msg.get_param('charset'), 'us-ascii')
1224 missing = []
1225 unless(self._msg.get_param('foobar', missing) is missing)
1226 unless(self._msg.get_param('charset', missing, header='foobar')
1227 is missing)
1228
1229 def test_payload(self):
1230 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001231 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001232
1233 def test_charset(self):
1234 eq = self.assertEqual
1235 msg = MIMEText('hello there', _charset='us-ascii')
1236 eq(msg.get_charset().input_charset, 'us-ascii')
1237 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1238
R. David Murray850fc852010-06-03 01:58:28 +00001239 def test_7bit_input(self):
1240 eq = self.assertEqual
1241 msg = MIMEText('hello there', _charset='us-ascii')
1242 eq(msg.get_charset().input_charset, 'us-ascii')
1243 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1244
1245 def test_7bit_input_no_charset(self):
1246 eq = self.assertEqual
1247 msg = MIMEText('hello there')
1248 eq(msg.get_charset(), 'us-ascii')
1249 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1250 self.assertTrue('hello there' in msg.as_string())
1251
1252 def test_utf8_input(self):
1253 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1254 eq = self.assertEqual
1255 msg = MIMEText(teststr, _charset='utf-8')
1256 eq(msg.get_charset().output_charset, 'utf-8')
1257 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1258 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1259
1260 @unittest.skip("can't fix because of backward compat in email5, "
1261 "will fix in email6")
1262 def test_utf8_input_no_charset(self):
1263 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1264 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266
Ezio Melottib3aedd42010-11-20 19:04:17 +00001267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001268# Test complicated multipart/* messages
1269class TestMultipart(TestEmailBase):
1270 def setUp(self):
1271 with openfile('PyBanner048.gif', 'rb') as fp:
1272 data = fp.read()
1273 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1274 image = MIMEImage(data, name='dingusfish.gif')
1275 image.add_header('content-disposition', 'attachment',
1276 filename='dingusfish.gif')
1277 intro = MIMEText('''\
1278Hi there,
1279
1280This is the dingus fish.
1281''')
1282 container.attach(intro)
1283 container.attach(image)
1284 container['From'] = 'Barry <barry@digicool.com>'
1285 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1286 container['Subject'] = 'Here is your dingus fish'
1287
1288 now = 987809702.54848599
1289 timetuple = time.localtime(now)
1290 if timetuple[-1] == 0:
1291 tzsecs = time.timezone
1292 else:
1293 tzsecs = time.altzone
1294 if tzsecs > 0:
1295 sign = '-'
1296 else:
1297 sign = '+'
1298 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1299 container['Date'] = time.strftime(
1300 '%a, %d %b %Y %H:%M:%S',
1301 time.localtime(now)) + tzoffset
1302 self._msg = container
1303 self._im = image
1304 self._txt = intro
1305
1306 def test_hierarchy(self):
1307 # convenience
1308 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001309 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001310 raises = self.assertRaises
1311 # tests
1312 m = self._msg
1313 unless(m.is_multipart())
1314 eq(m.get_content_type(), 'multipart/mixed')
1315 eq(len(m.get_payload()), 2)
1316 raises(IndexError, m.get_payload, 2)
1317 m0 = m.get_payload(0)
1318 m1 = m.get_payload(1)
1319 unless(m0 is self._txt)
1320 unless(m1 is self._im)
1321 eq(m.get_payload(), [m0, m1])
1322 unless(not m0.is_multipart())
1323 unless(not m1.is_multipart())
1324
1325 def test_empty_multipart_idempotent(self):
1326 text = """\
1327Content-Type: multipart/mixed; boundary="BOUNDARY"
1328MIME-Version: 1.0
1329Subject: A subject
1330To: aperson@dom.ain
1331From: bperson@dom.ain
1332
1333
1334--BOUNDARY
1335
1336
1337--BOUNDARY--
1338"""
1339 msg = Parser().parsestr(text)
1340 self.ndiffAssertEqual(text, msg.as_string())
1341
1342 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1343 outer = MIMEBase('multipart', 'mixed')
1344 outer['Subject'] = 'A subject'
1345 outer['To'] = 'aperson@dom.ain'
1346 outer['From'] = 'bperson@dom.ain'
1347 outer.set_boundary('BOUNDARY')
1348 self.ndiffAssertEqual(outer.as_string(), '''\
1349Content-Type: multipart/mixed; boundary="BOUNDARY"
1350MIME-Version: 1.0
1351Subject: A subject
1352To: aperson@dom.ain
1353From: bperson@dom.ain
1354
1355--BOUNDARY
1356
1357--BOUNDARY--''')
1358
1359 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1360 outer = MIMEBase('multipart', 'mixed')
1361 outer['Subject'] = 'A subject'
1362 outer['To'] = 'aperson@dom.ain'
1363 outer['From'] = 'bperson@dom.ain'
1364 outer.preamble = ''
1365 outer.epilogue = ''
1366 outer.set_boundary('BOUNDARY')
1367 self.ndiffAssertEqual(outer.as_string(), '''\
1368Content-Type: multipart/mixed; boundary="BOUNDARY"
1369MIME-Version: 1.0
1370Subject: A subject
1371To: aperson@dom.ain
1372From: bperson@dom.ain
1373
1374
1375--BOUNDARY
1376
1377--BOUNDARY--
1378''')
1379
1380 def test_one_part_in_a_multipart(self):
1381 eq = self.ndiffAssertEqual
1382 outer = MIMEBase('multipart', 'mixed')
1383 outer['Subject'] = 'A subject'
1384 outer['To'] = 'aperson@dom.ain'
1385 outer['From'] = 'bperson@dom.ain'
1386 outer.set_boundary('BOUNDARY')
1387 msg = MIMEText('hello world')
1388 outer.attach(msg)
1389 eq(outer.as_string(), '''\
1390Content-Type: multipart/mixed; boundary="BOUNDARY"
1391MIME-Version: 1.0
1392Subject: A subject
1393To: aperson@dom.ain
1394From: bperson@dom.ain
1395
1396--BOUNDARY
1397Content-Type: text/plain; charset="us-ascii"
1398MIME-Version: 1.0
1399Content-Transfer-Encoding: 7bit
1400
1401hello world
1402--BOUNDARY--''')
1403
1404 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1405 eq = self.ndiffAssertEqual
1406 outer = MIMEBase('multipart', 'mixed')
1407 outer['Subject'] = 'A subject'
1408 outer['To'] = 'aperson@dom.ain'
1409 outer['From'] = 'bperson@dom.ain'
1410 outer.preamble = ''
1411 msg = MIMEText('hello world')
1412 outer.attach(msg)
1413 outer.set_boundary('BOUNDARY')
1414 eq(outer.as_string(), '''\
1415Content-Type: multipart/mixed; boundary="BOUNDARY"
1416MIME-Version: 1.0
1417Subject: A subject
1418To: aperson@dom.ain
1419From: bperson@dom.ain
1420
1421
1422--BOUNDARY
1423Content-Type: text/plain; charset="us-ascii"
1424MIME-Version: 1.0
1425Content-Transfer-Encoding: 7bit
1426
1427hello world
1428--BOUNDARY--''')
1429
1430
1431 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1432 eq = self.ndiffAssertEqual
1433 outer = MIMEBase('multipart', 'mixed')
1434 outer['Subject'] = 'A subject'
1435 outer['To'] = 'aperson@dom.ain'
1436 outer['From'] = 'bperson@dom.ain'
1437 outer.preamble = None
1438 msg = MIMEText('hello world')
1439 outer.attach(msg)
1440 outer.set_boundary('BOUNDARY')
1441 eq(outer.as_string(), '''\
1442Content-Type: multipart/mixed; boundary="BOUNDARY"
1443MIME-Version: 1.0
1444Subject: A subject
1445To: aperson@dom.ain
1446From: bperson@dom.ain
1447
1448--BOUNDARY
1449Content-Type: text/plain; charset="us-ascii"
1450MIME-Version: 1.0
1451Content-Transfer-Encoding: 7bit
1452
1453hello world
1454--BOUNDARY--''')
1455
1456
1457 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1458 eq = self.ndiffAssertEqual
1459 outer = MIMEBase('multipart', 'mixed')
1460 outer['Subject'] = 'A subject'
1461 outer['To'] = 'aperson@dom.ain'
1462 outer['From'] = 'bperson@dom.ain'
1463 outer.epilogue = None
1464 msg = MIMEText('hello world')
1465 outer.attach(msg)
1466 outer.set_boundary('BOUNDARY')
1467 eq(outer.as_string(), '''\
1468Content-Type: multipart/mixed; boundary="BOUNDARY"
1469MIME-Version: 1.0
1470Subject: A subject
1471To: aperson@dom.ain
1472From: bperson@dom.ain
1473
1474--BOUNDARY
1475Content-Type: text/plain; charset="us-ascii"
1476MIME-Version: 1.0
1477Content-Transfer-Encoding: 7bit
1478
1479hello world
1480--BOUNDARY--''')
1481
1482
1483 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1484 eq = self.ndiffAssertEqual
1485 outer = MIMEBase('multipart', 'mixed')
1486 outer['Subject'] = 'A subject'
1487 outer['To'] = 'aperson@dom.ain'
1488 outer['From'] = 'bperson@dom.ain'
1489 outer.epilogue = ''
1490 msg = MIMEText('hello world')
1491 outer.attach(msg)
1492 outer.set_boundary('BOUNDARY')
1493 eq(outer.as_string(), '''\
1494Content-Type: multipart/mixed; boundary="BOUNDARY"
1495MIME-Version: 1.0
1496Subject: A subject
1497To: aperson@dom.ain
1498From: bperson@dom.ain
1499
1500--BOUNDARY
1501Content-Type: text/plain; charset="us-ascii"
1502MIME-Version: 1.0
1503Content-Transfer-Encoding: 7bit
1504
1505hello world
1506--BOUNDARY--
1507''')
1508
1509
1510 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1511 eq = self.ndiffAssertEqual
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.epilogue = '\n'
1517 msg = MIMEText('hello world')
1518 outer.attach(msg)
1519 outer.set_boundary('BOUNDARY')
1520 eq(outer.as_string(), '''\
1521Content-Type: multipart/mixed; boundary="BOUNDARY"
1522MIME-Version: 1.0
1523Subject: A subject
1524To: aperson@dom.ain
1525From: bperson@dom.ain
1526
1527--BOUNDARY
1528Content-Type: text/plain; charset="us-ascii"
1529MIME-Version: 1.0
1530Content-Transfer-Encoding: 7bit
1531
1532hello world
1533--BOUNDARY--
1534
1535''')
1536
1537 def test_message_external_body(self):
1538 eq = self.assertEqual
1539 msg = self._msgobj('msg_36.txt')
1540 eq(len(msg.get_payload()), 2)
1541 msg1 = msg.get_payload(1)
1542 eq(msg1.get_content_type(), 'multipart/alternative')
1543 eq(len(msg1.get_payload()), 2)
1544 for subpart in msg1.get_payload():
1545 eq(subpart.get_content_type(), 'message/external-body')
1546 eq(len(subpart.get_payload()), 1)
1547 subsubpart = subpart.get_payload(0)
1548 eq(subsubpart.get_content_type(), 'text/plain')
1549
1550 def test_double_boundary(self):
1551 # msg_37.txt is a multipart that contains two dash-boundary's in a
1552 # row. Our interpretation of RFC 2046 calls for ignoring the second
1553 # and subsequent boundaries.
1554 msg = self._msgobj('msg_37.txt')
1555 self.assertEqual(len(msg.get_payload()), 3)
1556
1557 def test_nested_inner_contains_outer_boundary(self):
1558 eq = self.ndiffAssertEqual
1559 # msg_38.txt has an inner part that contains outer boundaries. My
1560 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1561 # these are illegal and should be interpreted as unterminated inner
1562 # parts.
1563 msg = self._msgobj('msg_38.txt')
1564 sfp = StringIO()
1565 iterators._structure(msg, sfp)
1566 eq(sfp.getvalue(), """\
1567multipart/mixed
1568 multipart/mixed
1569 multipart/alternative
1570 text/plain
1571 text/plain
1572 text/plain
1573 text/plain
1574""")
1575
1576 def test_nested_with_same_boundary(self):
1577 eq = self.ndiffAssertEqual
1578 # msg 39.txt is similarly evil in that it's got inner parts that use
1579 # the same boundary as outer parts. Again, I believe the way this is
1580 # parsed is closest to the spirit of RFC 2046
1581 msg = self._msgobj('msg_39.txt')
1582 sfp = StringIO()
1583 iterators._structure(msg, sfp)
1584 eq(sfp.getvalue(), """\
1585multipart/mixed
1586 multipart/mixed
1587 multipart/alternative
1588 application/octet-stream
1589 application/octet-stream
1590 text/plain
1591""")
1592
1593 def test_boundary_in_non_multipart(self):
1594 msg = self._msgobj('msg_40.txt')
1595 self.assertEqual(msg.as_string(), '''\
1596MIME-Version: 1.0
1597Content-Type: text/html; boundary="--961284236552522269"
1598
1599----961284236552522269
1600Content-Type: text/html;
1601Content-Transfer-Encoding: 7Bit
1602
1603<html></html>
1604
1605----961284236552522269--
1606''')
1607
1608 def test_boundary_with_leading_space(self):
1609 eq = self.assertEqual
1610 msg = email.message_from_string('''\
1611MIME-Version: 1.0
1612Content-Type: multipart/mixed; boundary=" XXXX"
1613
1614-- XXXX
1615Content-Type: text/plain
1616
1617
1618-- XXXX
1619Content-Type: text/plain
1620
1621-- XXXX--
1622''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001623 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001624 eq(msg.get_boundary(), ' XXXX')
1625 eq(len(msg.get_payload()), 2)
1626
1627 def test_boundary_without_trailing_newline(self):
1628 m = Parser().parsestr("""\
1629Content-Type: multipart/mixed; boundary="===============0012394164=="
1630MIME-Version: 1.0
1631
1632--===============0012394164==
1633Content-Type: image/file1.jpg
1634MIME-Version: 1.0
1635Content-Transfer-Encoding: base64
1636
1637YXNkZg==
1638--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001639 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001640
1641
Ezio Melottib3aedd42010-11-20 19:04:17 +00001642
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001643# Test some badly formatted messages
1644class TestNonConformant(TestEmailBase):
1645 def test_parse_missing_minor_type(self):
1646 eq = self.assertEqual
1647 msg = self._msgobj('msg_14.txt')
1648 eq(msg.get_content_type(), 'text/plain')
1649 eq(msg.get_content_maintype(), 'text')
1650 eq(msg.get_content_subtype(), 'plain')
1651
1652 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001653 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001654 msg = self._msgobj('msg_15.txt')
1655 # XXX We can probably eventually do better
1656 inner = msg.get_payload(0)
1657 unless(hasattr(inner, 'defects'))
1658 self.assertEqual(len(inner.defects), 1)
1659 unless(isinstance(inner.defects[0],
1660 errors.StartBoundaryNotFoundDefect))
1661
1662 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001663 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001664 msg = self._msgobj('msg_25.txt')
1665 unless(isinstance(msg.get_payload(), str))
1666 self.assertEqual(len(msg.defects), 2)
1667 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1668 unless(isinstance(msg.defects[1],
1669 errors.MultipartInvariantViolationDefect))
1670
1671 def test_invalid_content_type(self):
1672 eq = self.assertEqual
1673 neq = self.ndiffAssertEqual
1674 msg = Message()
1675 # RFC 2045, $5.2 says invalid yields text/plain
1676 msg['Content-Type'] = 'text'
1677 eq(msg.get_content_maintype(), 'text')
1678 eq(msg.get_content_subtype(), 'plain')
1679 eq(msg.get_content_type(), 'text/plain')
1680 # Clear the old value and try something /really/ invalid
1681 del msg['content-type']
1682 msg['Content-Type'] = 'foo'
1683 eq(msg.get_content_maintype(), 'text')
1684 eq(msg.get_content_subtype(), 'plain')
1685 eq(msg.get_content_type(), 'text/plain')
1686 # Still, make sure that the message is idempotently generated
1687 s = StringIO()
1688 g = Generator(s)
1689 g.flatten(msg)
1690 neq(s.getvalue(), 'Content-Type: foo\n\n')
1691
1692 def test_no_start_boundary(self):
1693 eq = self.ndiffAssertEqual
1694 msg = self._msgobj('msg_31.txt')
1695 eq(msg.get_payload(), """\
1696--BOUNDARY
1697Content-Type: text/plain
1698
1699message 1
1700
1701--BOUNDARY
1702Content-Type: text/plain
1703
1704message 2
1705
1706--BOUNDARY--
1707""")
1708
1709 def test_no_separating_blank_line(self):
1710 eq = self.ndiffAssertEqual
1711 msg = self._msgobj('msg_35.txt')
1712 eq(msg.as_string(), """\
1713From: aperson@dom.ain
1714To: bperson@dom.ain
1715Subject: here's something interesting
1716
1717counter to RFC 2822, there's no separating newline here
1718""")
1719
1720 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001721 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001722 msg = self._msgobj('msg_41.txt')
1723 unless(hasattr(msg, 'defects'))
1724 self.assertEqual(len(msg.defects), 2)
1725 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1726 unless(isinstance(msg.defects[1],
1727 errors.MultipartInvariantViolationDefect))
1728
1729 def test_missing_start_boundary(self):
1730 outer = self._msgobj('msg_42.txt')
1731 # The message structure is:
1732 #
1733 # multipart/mixed
1734 # text/plain
1735 # message/rfc822
1736 # multipart/mixed [*]
1737 #
1738 # [*] This message is missing its start boundary
1739 bad = outer.get_payload(1).get_payload(0)
1740 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001741 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001742 errors.StartBoundaryNotFoundDefect))
1743
1744 def test_first_line_is_continuation_header(self):
1745 eq = self.assertEqual
1746 m = ' Line 1\nLine 2\nLine 3'
1747 msg = email.message_from_string(m)
1748 eq(msg.keys(), [])
1749 eq(msg.get_payload(), 'Line 2\nLine 3')
1750 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001751 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001752 errors.FirstHeaderLineIsContinuationDefect))
1753 eq(msg.defects[0].line, ' Line 1\n')
1754
1755
Ezio Melottib3aedd42010-11-20 19:04:17 +00001756
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001757# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001758class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001759 def test_rfc2047_multiline(self):
1760 eq = self.assertEqual
1761 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1762 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1763 dh = decode_header(s)
1764 eq(dh, [
1765 (b'Re:', None),
1766 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1767 (b'baz foo bar', None),
1768 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1769 header = make_header(dh)
1770 eq(str(header),
1771 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001772 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001773Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1774 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001775
1776 def test_whitespace_eater_unicode(self):
1777 eq = self.assertEqual
1778 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1779 dh = decode_header(s)
1780 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1781 (b'Pirard <pirard@dom.ain>', None)])
1782 header = str(make_header(dh))
1783 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1784
1785 def test_whitespace_eater_unicode_2(self):
1786 eq = self.assertEqual
1787 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1788 dh = decode_header(s)
1789 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1790 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1791 hu = str(make_header(dh))
1792 eq(hu, 'The quick brown fox jumped over the lazy dog')
1793
1794 def test_rfc2047_missing_whitespace(self):
1795 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1796 dh = decode_header(s)
1797 self.assertEqual(dh, [(s, None)])
1798
1799 def test_rfc2047_with_whitespace(self):
1800 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1801 dh = decode_header(s)
1802 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1803 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1804 (b'sbord', None)])
1805
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001806 def test_rfc2047_B_bad_padding(self):
1807 s = '=?iso-8859-1?B?%s?='
1808 data = [ # only test complete bytes
1809 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1810 ('dmk=', b'vi'), ('dmk', b'vi')
1811 ]
1812 for q, a in data:
1813 dh = decode_header(s % q)
1814 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815
R. David Murray31e984c2010-10-01 15:40:20 +00001816 def test_rfc2047_Q_invalid_digits(self):
1817 # issue 10004.
1818 s = '=?iso-8659-1?Q?andr=e9=zz?='
1819 self.assertEqual(decode_header(s),
1820 [(b'andr\xe9=zz', 'iso-8659-1')])
1821
Ezio Melottib3aedd42010-11-20 19:04:17 +00001822
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823# Test the MIMEMessage class
1824class TestMIMEMessage(TestEmailBase):
1825 def setUp(self):
1826 with openfile('msg_11.txt') as fp:
1827 self._text = fp.read()
1828
1829 def test_type_error(self):
1830 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1831
1832 def test_valid_argument(self):
1833 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001834 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001835 subject = 'A sub-message'
1836 m = Message()
1837 m['Subject'] = subject
1838 r = MIMEMessage(m)
1839 eq(r.get_content_type(), 'message/rfc822')
1840 payload = r.get_payload()
1841 unless(isinstance(payload, list))
1842 eq(len(payload), 1)
1843 subpart = payload[0]
1844 unless(subpart is m)
1845 eq(subpart['subject'], subject)
1846
1847 def test_bad_multipart(self):
1848 eq = self.assertEqual
1849 msg1 = Message()
1850 msg1['Subject'] = 'subpart 1'
1851 msg2 = Message()
1852 msg2['Subject'] = 'subpart 2'
1853 r = MIMEMessage(msg1)
1854 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1855
1856 def test_generate(self):
1857 # First craft the message to be encapsulated
1858 m = Message()
1859 m['Subject'] = 'An enclosed message'
1860 m.set_payload('Here is the body of the message.\n')
1861 r = MIMEMessage(m)
1862 r['Subject'] = 'The enclosing message'
1863 s = StringIO()
1864 g = Generator(s)
1865 g.flatten(r)
1866 self.assertEqual(s.getvalue(), """\
1867Content-Type: message/rfc822
1868MIME-Version: 1.0
1869Subject: The enclosing message
1870
1871Subject: An enclosed message
1872
1873Here is the body of the message.
1874""")
1875
1876 def test_parse_message_rfc822(self):
1877 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001878 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001879 msg = self._msgobj('msg_11.txt')
1880 eq(msg.get_content_type(), 'message/rfc822')
1881 payload = msg.get_payload()
1882 unless(isinstance(payload, list))
1883 eq(len(payload), 1)
1884 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001885 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001886 eq(submsg['subject'], 'An enclosed message')
1887 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1888
1889 def test_dsn(self):
1890 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001891 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001892 # msg 16 is a Delivery Status Notification, see RFC 1894
1893 msg = self._msgobj('msg_16.txt')
1894 eq(msg.get_content_type(), 'multipart/report')
1895 unless(msg.is_multipart())
1896 eq(len(msg.get_payload()), 3)
1897 # Subpart 1 is a text/plain, human readable section
1898 subpart = msg.get_payload(0)
1899 eq(subpart.get_content_type(), 'text/plain')
1900 eq(subpart.get_payload(), """\
1901This report relates to a message you sent with the following header fields:
1902
1903 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1904 Date: Sun, 23 Sep 2001 20:10:55 -0700
1905 From: "Ian T. Henry" <henryi@oxy.edu>
1906 To: SoCal Raves <scr@socal-raves.org>
1907 Subject: [scr] yeah for Ians!!
1908
1909Your message cannot be delivered to the following recipients:
1910
1911 Recipient address: jangel1@cougar.noc.ucla.edu
1912 Reason: recipient reached disk quota
1913
1914""")
1915 # Subpart 2 contains the machine parsable DSN information. It
1916 # consists of two blocks of headers, represented by two nested Message
1917 # objects.
1918 subpart = msg.get_payload(1)
1919 eq(subpart.get_content_type(), 'message/delivery-status')
1920 eq(len(subpart.get_payload()), 2)
1921 # message/delivery-status should treat each block as a bunch of
1922 # headers, i.e. a bunch of Message objects.
1923 dsn1 = subpart.get_payload(0)
1924 unless(isinstance(dsn1, Message))
1925 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1926 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1927 # Try a missing one <wink>
1928 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1929 dsn2 = subpart.get_payload(1)
1930 unless(isinstance(dsn2, Message))
1931 eq(dsn2['action'], 'failed')
1932 eq(dsn2.get_params(header='original-recipient'),
1933 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1934 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1935 # Subpart 3 is the original message
1936 subpart = msg.get_payload(2)
1937 eq(subpart.get_content_type(), 'message/rfc822')
1938 payload = subpart.get_payload()
1939 unless(isinstance(payload, list))
1940 eq(len(payload), 1)
1941 subsubpart = payload[0]
1942 unless(isinstance(subsubpart, Message))
1943 eq(subsubpart.get_content_type(), 'text/plain')
1944 eq(subsubpart['message-id'],
1945 '<002001c144a6$8752e060$56104586@oxy.edu>')
1946
1947 def test_epilogue(self):
1948 eq = self.ndiffAssertEqual
1949 with openfile('msg_21.txt') as fp:
1950 text = fp.read()
1951 msg = Message()
1952 msg['From'] = 'aperson@dom.ain'
1953 msg['To'] = 'bperson@dom.ain'
1954 msg['Subject'] = 'Test'
1955 msg.preamble = 'MIME message'
1956 msg.epilogue = 'End of MIME message\n'
1957 msg1 = MIMEText('One')
1958 msg2 = MIMEText('Two')
1959 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1960 msg.attach(msg1)
1961 msg.attach(msg2)
1962 sfp = StringIO()
1963 g = Generator(sfp)
1964 g.flatten(msg)
1965 eq(sfp.getvalue(), text)
1966
1967 def test_no_nl_preamble(self):
1968 eq = self.ndiffAssertEqual
1969 msg = Message()
1970 msg['From'] = 'aperson@dom.ain'
1971 msg['To'] = 'bperson@dom.ain'
1972 msg['Subject'] = 'Test'
1973 msg.preamble = 'MIME message'
1974 msg.epilogue = ''
1975 msg1 = MIMEText('One')
1976 msg2 = MIMEText('Two')
1977 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1978 msg.attach(msg1)
1979 msg.attach(msg2)
1980 eq(msg.as_string(), """\
1981From: aperson@dom.ain
1982To: bperson@dom.ain
1983Subject: Test
1984Content-Type: multipart/mixed; boundary="BOUNDARY"
1985
1986MIME message
1987--BOUNDARY
1988Content-Type: text/plain; charset="us-ascii"
1989MIME-Version: 1.0
1990Content-Transfer-Encoding: 7bit
1991
1992One
1993--BOUNDARY
1994Content-Type: text/plain; charset="us-ascii"
1995MIME-Version: 1.0
1996Content-Transfer-Encoding: 7bit
1997
1998Two
1999--BOUNDARY--
2000""")
2001
2002 def test_default_type(self):
2003 eq = self.assertEqual
2004 with openfile('msg_30.txt') as fp:
2005 msg = email.message_from_file(fp)
2006 container1 = msg.get_payload(0)
2007 eq(container1.get_default_type(), 'message/rfc822')
2008 eq(container1.get_content_type(), 'message/rfc822')
2009 container2 = msg.get_payload(1)
2010 eq(container2.get_default_type(), 'message/rfc822')
2011 eq(container2.get_content_type(), 'message/rfc822')
2012 container1a = container1.get_payload(0)
2013 eq(container1a.get_default_type(), 'text/plain')
2014 eq(container1a.get_content_type(), 'text/plain')
2015 container2a = container2.get_payload(0)
2016 eq(container2a.get_default_type(), 'text/plain')
2017 eq(container2a.get_content_type(), 'text/plain')
2018
2019 def test_default_type_with_explicit_container_type(self):
2020 eq = self.assertEqual
2021 with openfile('msg_28.txt') as fp:
2022 msg = email.message_from_file(fp)
2023 container1 = msg.get_payload(0)
2024 eq(container1.get_default_type(), 'message/rfc822')
2025 eq(container1.get_content_type(), 'message/rfc822')
2026 container2 = msg.get_payload(1)
2027 eq(container2.get_default_type(), 'message/rfc822')
2028 eq(container2.get_content_type(), 'message/rfc822')
2029 container1a = container1.get_payload(0)
2030 eq(container1a.get_default_type(), 'text/plain')
2031 eq(container1a.get_content_type(), 'text/plain')
2032 container2a = container2.get_payload(0)
2033 eq(container2a.get_default_type(), 'text/plain')
2034 eq(container2a.get_content_type(), 'text/plain')
2035
2036 def test_default_type_non_parsed(self):
2037 eq = self.assertEqual
2038 neq = self.ndiffAssertEqual
2039 # Set up container
2040 container = MIMEMultipart('digest', 'BOUNDARY')
2041 container.epilogue = ''
2042 # Set up subparts
2043 subpart1a = MIMEText('message 1\n')
2044 subpart2a = MIMEText('message 2\n')
2045 subpart1 = MIMEMessage(subpart1a)
2046 subpart2 = MIMEMessage(subpart2a)
2047 container.attach(subpart1)
2048 container.attach(subpart2)
2049 eq(subpart1.get_content_type(), 'message/rfc822')
2050 eq(subpart1.get_default_type(), 'message/rfc822')
2051 eq(subpart2.get_content_type(), 'message/rfc822')
2052 eq(subpart2.get_default_type(), 'message/rfc822')
2053 neq(container.as_string(0), '''\
2054Content-Type: multipart/digest; boundary="BOUNDARY"
2055MIME-Version: 1.0
2056
2057--BOUNDARY
2058Content-Type: message/rfc822
2059MIME-Version: 1.0
2060
2061Content-Type: text/plain; charset="us-ascii"
2062MIME-Version: 1.0
2063Content-Transfer-Encoding: 7bit
2064
2065message 1
2066
2067--BOUNDARY
2068Content-Type: message/rfc822
2069MIME-Version: 1.0
2070
2071Content-Type: text/plain; charset="us-ascii"
2072MIME-Version: 1.0
2073Content-Transfer-Encoding: 7bit
2074
2075message 2
2076
2077--BOUNDARY--
2078''')
2079 del subpart1['content-type']
2080 del subpart1['mime-version']
2081 del subpart2['content-type']
2082 del subpart2['mime-version']
2083 eq(subpart1.get_content_type(), 'message/rfc822')
2084 eq(subpart1.get_default_type(), 'message/rfc822')
2085 eq(subpart2.get_content_type(), 'message/rfc822')
2086 eq(subpart2.get_default_type(), 'message/rfc822')
2087 neq(container.as_string(0), '''\
2088Content-Type: multipart/digest; boundary="BOUNDARY"
2089MIME-Version: 1.0
2090
2091--BOUNDARY
2092
2093Content-Type: text/plain; charset="us-ascii"
2094MIME-Version: 1.0
2095Content-Transfer-Encoding: 7bit
2096
2097message 1
2098
2099--BOUNDARY
2100
2101Content-Type: text/plain; charset="us-ascii"
2102MIME-Version: 1.0
2103Content-Transfer-Encoding: 7bit
2104
2105message 2
2106
2107--BOUNDARY--
2108''')
2109
2110 def test_mime_attachments_in_constructor(self):
2111 eq = self.assertEqual
2112 text1 = MIMEText('')
2113 text2 = MIMEText('')
2114 msg = MIMEMultipart(_subparts=(text1, text2))
2115 eq(len(msg.get_payload()), 2)
2116 eq(msg.get_payload(0), text1)
2117 eq(msg.get_payload(1), text2)
2118
Christian Heimes587c2bf2008-01-19 16:21:02 +00002119 def test_default_multipart_constructor(self):
2120 msg = MIMEMultipart()
2121 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002122
Ezio Melottib3aedd42010-11-20 19:04:17 +00002123
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124# A general test of parser->model->generator idempotency. IOW, read a message
2125# in, parse it into a message object tree, then without touching the tree,
2126# regenerate the plain text. The original text and the transformed text
2127# should be identical. Note: that we ignore the Unix-From since that may
2128# contain a changed date.
2129class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002130
2131 linesep = '\n'
2132
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002133 def _msgobj(self, filename):
2134 with openfile(filename) as fp:
2135 data = fp.read()
2136 msg = email.message_from_string(data)
2137 return msg, data
2138
R. David Murray719a4492010-11-21 16:53:48 +00002139 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140 eq = self.ndiffAssertEqual
2141 s = StringIO()
2142 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002143 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002144 eq(text, s.getvalue())
2145
2146 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002147 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002148 msg, text = self._msgobj('msg_01.txt')
2149 eq(msg.get_content_type(), 'text/plain')
2150 eq(msg.get_content_maintype(), 'text')
2151 eq(msg.get_content_subtype(), 'plain')
2152 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2153 eq(msg.get_param('charset'), 'us-ascii')
2154 eq(msg.preamble, None)
2155 eq(msg.epilogue, None)
2156 self._idempotent(msg, text)
2157
2158 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002159 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160 msg, text = self._msgobj('msg_03.txt')
2161 eq(msg.get_content_type(), 'text/plain')
2162 eq(msg.get_params(), None)
2163 eq(msg.get_param('charset'), None)
2164 self._idempotent(msg, text)
2165
2166 def test_simple_multipart(self):
2167 msg, text = self._msgobj('msg_04.txt')
2168 self._idempotent(msg, text)
2169
2170 def test_MIME_digest(self):
2171 msg, text = self._msgobj('msg_02.txt')
2172 self._idempotent(msg, text)
2173
2174 def test_long_header(self):
2175 msg, text = self._msgobj('msg_27.txt')
2176 self._idempotent(msg, text)
2177
2178 def test_MIME_digest_with_part_headers(self):
2179 msg, text = self._msgobj('msg_28.txt')
2180 self._idempotent(msg, text)
2181
2182 def test_mixed_with_image(self):
2183 msg, text = self._msgobj('msg_06.txt')
2184 self._idempotent(msg, text)
2185
2186 def test_multipart_report(self):
2187 msg, text = self._msgobj('msg_05.txt')
2188 self._idempotent(msg, text)
2189
2190 def test_dsn(self):
2191 msg, text = self._msgobj('msg_16.txt')
2192 self._idempotent(msg, text)
2193
2194 def test_preamble_epilogue(self):
2195 msg, text = self._msgobj('msg_21.txt')
2196 self._idempotent(msg, text)
2197
2198 def test_multipart_one_part(self):
2199 msg, text = self._msgobj('msg_23.txt')
2200 self._idempotent(msg, text)
2201
2202 def test_multipart_no_parts(self):
2203 msg, text = self._msgobj('msg_24.txt')
2204 self._idempotent(msg, text)
2205
2206 def test_no_start_boundary(self):
2207 msg, text = self._msgobj('msg_31.txt')
2208 self._idempotent(msg, text)
2209
2210 def test_rfc2231_charset(self):
2211 msg, text = self._msgobj('msg_32.txt')
2212 self._idempotent(msg, text)
2213
2214 def test_more_rfc2231_parameters(self):
2215 msg, text = self._msgobj('msg_33.txt')
2216 self._idempotent(msg, text)
2217
2218 def test_text_plain_in_a_multipart_digest(self):
2219 msg, text = self._msgobj('msg_34.txt')
2220 self._idempotent(msg, text)
2221
2222 def test_nested_multipart_mixeds(self):
2223 msg, text = self._msgobj('msg_12a.txt')
2224 self._idempotent(msg, text)
2225
2226 def test_message_external_body_idempotent(self):
2227 msg, text = self._msgobj('msg_36.txt')
2228 self._idempotent(msg, text)
2229
R. David Murray719a4492010-11-21 16:53:48 +00002230 def test_message_delivery_status(self):
2231 msg, text = self._msgobj('msg_43.txt')
2232 self._idempotent(msg, text, unixfrom=True)
2233
R. David Murray96fd54e2010-10-08 15:55:28 +00002234 def test_message_signed_idempotent(self):
2235 msg, text = self._msgobj('msg_45.txt')
2236 self._idempotent(msg, text)
2237
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002238 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002239 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002240 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002241 # Get a message object and reset the seek pointer for other tests
2242 msg, text = self._msgobj('msg_05.txt')
2243 eq(msg.get_content_type(), 'multipart/report')
2244 # Test the Content-Type: parameters
2245 params = {}
2246 for pk, pv in msg.get_params():
2247 params[pk] = pv
2248 eq(params['report-type'], 'delivery-status')
2249 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002250 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2251 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002252 eq(len(msg.get_payload()), 3)
2253 # Make sure the subparts are what we expect
2254 msg1 = msg.get_payload(0)
2255 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002256 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002257 msg2 = msg.get_payload(1)
2258 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002259 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002260 msg3 = msg.get_payload(2)
2261 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002262 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002263 payload = msg3.get_payload()
2264 unless(isinstance(payload, list))
2265 eq(len(payload), 1)
2266 msg4 = payload[0]
2267 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002268 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002269
2270 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002271 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002272 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002273 msg, text = self._msgobj('msg_06.txt')
2274 # Check some of the outer headers
2275 eq(msg.get_content_type(), 'message/rfc822')
2276 # Make sure the payload is a list of exactly one sub-Message, and that
2277 # that submessage has a type of text/plain
2278 payload = msg.get_payload()
2279 unless(isinstance(payload, list))
2280 eq(len(payload), 1)
2281 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002282 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002283 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002284 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002285 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002286
2287
Ezio Melottib3aedd42010-11-20 19:04:17 +00002288
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002289# Test various other bits of the package's functionality
2290class TestMiscellaneous(TestEmailBase):
2291 def test_message_from_string(self):
2292 with openfile('msg_01.txt') as fp:
2293 text = fp.read()
2294 msg = email.message_from_string(text)
2295 s = StringIO()
2296 # Don't wrap/continue long headers since we're trying to test
2297 # idempotency.
2298 g = Generator(s, maxheaderlen=0)
2299 g.flatten(msg)
2300 self.assertEqual(text, s.getvalue())
2301
2302 def test_message_from_file(self):
2303 with openfile('msg_01.txt') as fp:
2304 text = fp.read()
2305 fp.seek(0)
2306 msg = email.message_from_file(fp)
2307 s = StringIO()
2308 # Don't wrap/continue long headers since we're trying to test
2309 # idempotency.
2310 g = Generator(s, maxheaderlen=0)
2311 g.flatten(msg)
2312 self.assertEqual(text, s.getvalue())
2313
2314 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002315 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002316 with openfile('msg_01.txt') as fp:
2317 text = fp.read()
2318
2319 # Create a subclass
2320 class MyMessage(Message):
2321 pass
2322
2323 msg = email.message_from_string(text, MyMessage)
2324 unless(isinstance(msg, MyMessage))
2325 # Try something more complicated
2326 with openfile('msg_02.txt') as fp:
2327 text = fp.read()
2328 msg = email.message_from_string(text, MyMessage)
2329 for subpart in msg.walk():
2330 unless(isinstance(subpart, MyMessage))
2331
2332 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002333 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002334 # Create a subclass
2335 class MyMessage(Message):
2336 pass
2337
2338 with openfile('msg_01.txt') as fp:
2339 msg = email.message_from_file(fp, MyMessage)
2340 unless(isinstance(msg, MyMessage))
2341 # Try something more complicated
2342 with openfile('msg_02.txt') as fp:
2343 msg = email.message_from_file(fp, MyMessage)
2344 for subpart in msg.walk():
2345 unless(isinstance(subpart, MyMessage))
2346
2347 def test__all__(self):
2348 module = __import__('email')
2349 # Can't use sorted() here due to Python 2.3 compatibility
2350 all = module.__all__[:]
2351 all.sort()
2352 self.assertEqual(all, [
2353 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002354 'header', 'iterators', 'message', 'message_from_binary_file',
2355 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002356 'message_from_string', 'mime', 'parser',
2357 'quoprimime', 'utils',
2358 ])
2359
2360 def test_formatdate(self):
2361 now = time.time()
2362 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2363 time.gmtime(now)[:6])
2364
2365 def test_formatdate_localtime(self):
2366 now = time.time()
2367 self.assertEqual(
2368 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2369 time.localtime(now)[:6])
2370
2371 def test_formatdate_usegmt(self):
2372 now = time.time()
2373 self.assertEqual(
2374 utils.formatdate(now, localtime=False),
2375 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2376 self.assertEqual(
2377 utils.formatdate(now, localtime=False, usegmt=True),
2378 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2379
2380 def test_parsedate_none(self):
2381 self.assertEqual(utils.parsedate(''), None)
2382
2383 def test_parsedate_compact(self):
2384 # The FWS after the comma is optional
2385 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2386 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2387
2388 def test_parsedate_no_dayofweek(self):
2389 eq = self.assertEqual
2390 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2391 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2392
2393 def test_parsedate_compact_no_dayofweek(self):
2394 eq = self.assertEqual
2395 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2396 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2397
R. David Murray4a62e892010-12-23 20:35:46 +00002398 def test_parsedate_no_space_before_positive_offset(self):
2399 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2400 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2401
2402 def test_parsedate_no_space_before_negative_offset(self):
2403 # Issue 1155362: we already handled '+' for this case.
2404 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2405 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2406
2407
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002408 def test_parsedate_acceptable_to_time_functions(self):
2409 eq = self.assertEqual
2410 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2411 t = int(time.mktime(timetup))
2412 eq(time.localtime(t)[:6], timetup[:6])
2413 eq(int(time.strftime('%Y', timetup)), 2003)
2414 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2415 t = int(time.mktime(timetup[:9]))
2416 eq(time.localtime(t)[:6], timetup[:6])
2417 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2418
R. David Murray219d1c82010-08-25 00:45:55 +00002419 def test_parsedate_y2k(self):
2420 """Test for parsing a date with a two-digit year.
2421
2422 Parsing a date with a two-digit year should return the correct
2423 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2424 obsoletes RFC822) requires four-digit years.
2425
2426 """
2427 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2428 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2429 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2430 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2431
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002432 def test_parseaddr_empty(self):
2433 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2434 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2435
2436 def test_noquote_dump(self):
2437 self.assertEqual(
2438 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2439 'A Silly Person <person@dom.ain>')
2440
2441 def test_escape_dump(self):
2442 self.assertEqual(
2443 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2444 r'"A \(Very\) Silly Person" <person@dom.ain>')
2445 a = r'A \(Special\) Person'
2446 b = 'person@dom.ain'
2447 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2448
2449 def test_escape_backslashes(self):
2450 self.assertEqual(
2451 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2452 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2453 a = r'Arthur \Backslash\ Foobar'
2454 b = 'person@dom.ain'
2455 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2456
2457 def test_name_with_dot(self):
2458 x = 'John X. Doe <jxd@example.com>'
2459 y = '"John X. Doe" <jxd@example.com>'
2460 a, b = ('John X. Doe', 'jxd@example.com')
2461 self.assertEqual(utils.parseaddr(x), (a, b))
2462 self.assertEqual(utils.parseaddr(y), (a, b))
2463 # formataddr() quotes the name if there's a dot in it
2464 self.assertEqual(utils.formataddr((a, b)), y)
2465
R. David Murray5397e862010-10-02 15:58:26 +00002466 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2467 # issue 10005. Note that in the third test the second pair of
2468 # backslashes is not actually a quoted pair because it is not inside a
2469 # comment or quoted string: the address being parsed has a quoted
2470 # string containing a quoted backslash, followed by 'example' and two
2471 # backslashes, followed by another quoted string containing a space and
2472 # the word 'example'. parseaddr copies those two backslashes
2473 # literally. Per rfc5322 this is not technically correct since a \ may
2474 # not appear in an address outside of a quoted string. It is probably
2475 # a sensible Postel interpretation, though.
2476 eq = self.assertEqual
2477 eq(utils.parseaddr('""example" example"@example.com'),
2478 ('', '""example" example"@example.com'))
2479 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2480 ('', '"\\"example\\" example"@example.com'))
2481 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2482 ('', '"\\\\"example\\\\" example"@example.com'))
2483
R. David Murray63563cd2010-12-18 18:25:38 +00002484 def test_parseaddr_preserves_spaces_in_local_part(self):
2485 # issue 9286. A normal RFC5322 local part should not contain any
2486 # folding white space, but legacy local parts can (they are a sequence
2487 # of atoms, not dotatoms). On the other hand we strip whitespace from
2488 # before the @ and around dots, on the assumption that the whitespace
2489 # around the punctuation is a mistake in what would otherwise be
2490 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2491 self.assertEqual(('', "merwok wok@xample.com"),
2492 utils.parseaddr("merwok wok@xample.com"))
2493 self.assertEqual(('', "merwok wok@xample.com"),
2494 utils.parseaddr("merwok wok@xample.com"))
2495 self.assertEqual(('', "merwok wok@xample.com"),
2496 utils.parseaddr(" merwok wok @xample.com"))
2497 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2498 utils.parseaddr('merwok"wok" wok@xample.com'))
2499 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2500 utils.parseaddr('merwok. wok . wok@xample.com'))
2501
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002502 def test_multiline_from_comment(self):
2503 x = """\
2504Foo
2505\tBar <foo@example.com>"""
2506 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2507
2508 def test_quote_dump(self):
2509 self.assertEqual(
2510 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2511 r'"A Silly; Person" <person@dom.ain>')
2512
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002513 def test_charset_richcomparisons(self):
2514 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002515 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002516 cset1 = Charset()
2517 cset2 = Charset()
2518 eq(cset1, 'us-ascii')
2519 eq(cset1, 'US-ASCII')
2520 eq(cset1, 'Us-AsCiI')
2521 eq('us-ascii', cset1)
2522 eq('US-ASCII', cset1)
2523 eq('Us-AsCiI', cset1)
2524 ne(cset1, 'usascii')
2525 ne(cset1, 'USASCII')
2526 ne(cset1, 'UsAsCiI')
2527 ne('usascii', cset1)
2528 ne('USASCII', cset1)
2529 ne('UsAsCiI', cset1)
2530 eq(cset1, cset2)
2531 eq(cset2, cset1)
2532
2533 def test_getaddresses(self):
2534 eq = self.assertEqual
2535 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2536 'Bud Person <bperson@dom.ain>']),
2537 [('Al Person', 'aperson@dom.ain'),
2538 ('Bud Person', 'bperson@dom.ain')])
2539
2540 def test_getaddresses_nasty(self):
2541 eq = self.assertEqual
2542 eq(utils.getaddresses(['foo: ;']), [('', '')])
2543 eq(utils.getaddresses(
2544 ['[]*-- =~$']),
2545 [('', ''), ('', ''), ('', '*--')])
2546 eq(utils.getaddresses(
2547 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2548 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2549
2550 def test_getaddresses_embedded_comment(self):
2551 """Test proper handling of a nested comment"""
2552 eq = self.assertEqual
2553 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2554 eq(addrs[0][1], 'foo@bar.com')
2555
2556 def test_utils_quote_unquote(self):
2557 eq = self.assertEqual
2558 msg = Message()
2559 msg.add_header('content-disposition', 'attachment',
2560 filename='foo\\wacky"name')
2561 eq(msg.get_filename(), 'foo\\wacky"name')
2562
2563 def test_get_body_encoding_with_bogus_charset(self):
2564 charset = Charset('not a charset')
2565 self.assertEqual(charset.get_body_encoding(), 'base64')
2566
2567 def test_get_body_encoding_with_uppercase_charset(self):
2568 eq = self.assertEqual
2569 msg = Message()
2570 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2571 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2572 charsets = msg.get_charsets()
2573 eq(len(charsets), 1)
2574 eq(charsets[0], 'utf-8')
2575 charset = Charset(charsets[0])
2576 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002577 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2579 eq(msg.get_payload(decode=True), b'hello world')
2580 eq(msg['content-transfer-encoding'], 'base64')
2581 # Try another one
2582 msg = Message()
2583 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2584 charsets = msg.get_charsets()
2585 eq(len(charsets), 1)
2586 eq(charsets[0], 'us-ascii')
2587 charset = Charset(charsets[0])
2588 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2589 msg.set_payload('hello world', charset=charset)
2590 eq(msg.get_payload(), 'hello world')
2591 eq(msg['content-transfer-encoding'], '7bit')
2592
2593 def test_charsets_case_insensitive(self):
2594 lc = Charset('us-ascii')
2595 uc = Charset('US-ASCII')
2596 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2597
2598 def test_partial_falls_inside_message_delivery_status(self):
2599 eq = self.ndiffAssertEqual
2600 # The Parser interface provides chunks of data to FeedParser in 8192
2601 # byte gulps. SF bug #1076485 found one of those chunks inside
2602 # message/delivery-status header block, which triggered an
2603 # unreadline() of NeedMoreData.
2604 msg = self._msgobj('msg_43.txt')
2605 sfp = StringIO()
2606 iterators._structure(msg, sfp)
2607 eq(sfp.getvalue(), """\
2608multipart/report
2609 text/plain
2610 message/delivery-status
2611 text/plain
2612 text/plain
2613 text/plain
2614 text/plain
2615 text/plain
2616 text/plain
2617 text/plain
2618 text/plain
2619 text/plain
2620 text/plain
2621 text/plain
2622 text/plain
2623 text/plain
2624 text/plain
2625 text/plain
2626 text/plain
2627 text/plain
2628 text/plain
2629 text/plain
2630 text/plain
2631 text/plain
2632 text/plain
2633 text/plain
2634 text/plain
2635 text/plain
2636 text/plain
2637 text/rfc822-headers
2638""")
2639
R. David Murraya0b44b52010-12-02 21:47:19 +00002640 def test_make_msgid_domain(self):
2641 self.assertEqual(
2642 email.utils.make_msgid(domain='testdomain-string')[-19:],
2643 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002644
Ezio Melottib3aedd42010-11-20 19:04:17 +00002645
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002646# Test the iterator/generators
2647class TestIterators(TestEmailBase):
2648 def test_body_line_iterator(self):
2649 eq = self.assertEqual
2650 neq = self.ndiffAssertEqual
2651 # First a simple non-multipart message
2652 msg = self._msgobj('msg_01.txt')
2653 it = iterators.body_line_iterator(msg)
2654 lines = list(it)
2655 eq(len(lines), 6)
2656 neq(EMPTYSTRING.join(lines), msg.get_payload())
2657 # Now a more complicated multipart
2658 msg = self._msgobj('msg_02.txt')
2659 it = iterators.body_line_iterator(msg)
2660 lines = list(it)
2661 eq(len(lines), 43)
2662 with openfile('msg_19.txt') as fp:
2663 neq(EMPTYSTRING.join(lines), fp.read())
2664
2665 def test_typed_subpart_iterator(self):
2666 eq = self.assertEqual
2667 msg = self._msgobj('msg_04.txt')
2668 it = iterators.typed_subpart_iterator(msg, 'text')
2669 lines = []
2670 subparts = 0
2671 for subpart in it:
2672 subparts += 1
2673 lines.append(subpart.get_payload())
2674 eq(subparts, 2)
2675 eq(EMPTYSTRING.join(lines), """\
2676a simple kind of mirror
2677to reflect upon our own
2678a simple kind of mirror
2679to reflect upon our own
2680""")
2681
2682 def test_typed_subpart_iterator_default_type(self):
2683 eq = self.assertEqual
2684 msg = self._msgobj('msg_03.txt')
2685 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2686 lines = []
2687 subparts = 0
2688 for subpart in it:
2689 subparts += 1
2690 lines.append(subpart.get_payload())
2691 eq(subparts, 1)
2692 eq(EMPTYSTRING.join(lines), """\
2693
2694Hi,
2695
2696Do you like this message?
2697
2698-Me
2699""")
2700
R. David Murray45bf773f2010-07-17 01:19:57 +00002701 def test_pushCR_LF(self):
2702 '''FeedParser BufferedSubFile.push() assumed it received complete
2703 line endings. A CR ending one push() followed by a LF starting
2704 the next push() added an empty line.
2705 '''
2706 imt = [
2707 ("a\r \n", 2),
2708 ("b", 0),
2709 ("c\n", 1),
2710 ("", 0),
2711 ("d\r\n", 1),
2712 ("e\r", 0),
2713 ("\nf", 1),
2714 ("\r\n", 1),
2715 ]
2716 from email.feedparser import BufferedSubFile, NeedMoreData
2717 bsf = BufferedSubFile()
2718 om = []
2719 nt = 0
2720 for il, n in imt:
2721 bsf.push(il)
2722 nt += n
2723 n1 = 0
2724 while True:
2725 ol = bsf.readline()
2726 if ol == NeedMoreData:
2727 break
2728 om.append(ol)
2729 n1 += 1
2730 self.assertTrue(n == n1)
2731 self.assertTrue(len(om) == nt)
2732 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2733
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002734
Ezio Melottib3aedd42010-11-20 19:04:17 +00002735
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736class TestParsers(TestEmailBase):
2737 def test_header_parser(self):
2738 eq = self.assertEqual
2739 # Parse only the headers of a complex multipart MIME document
2740 with openfile('msg_02.txt') as fp:
2741 msg = HeaderParser().parse(fp)
2742 eq(msg['from'], 'ppp-request@zzz.org')
2743 eq(msg['to'], 'ppp@zzz.org')
2744 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002745 self.assertFalse(msg.is_multipart())
2746 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002747
2748 def test_whitespace_continuation(self):
2749 eq = self.assertEqual
2750 # This message contains a line after the Subject: header that has only
2751 # whitespace, but it is not empty!
2752 msg = email.message_from_string("""\
2753From: aperson@dom.ain
2754To: bperson@dom.ain
2755Subject: the next line has a space on it
2756\x20
2757Date: Mon, 8 Apr 2002 15:09:19 -0400
2758Message-ID: spam
2759
2760Here's the message body
2761""")
2762 eq(msg['subject'], 'the next line has a space on it\n ')
2763 eq(msg['message-id'], 'spam')
2764 eq(msg.get_payload(), "Here's the message body\n")
2765
2766 def test_whitespace_continuation_last_header(self):
2767 eq = self.assertEqual
2768 # Like the previous test, but the subject line is the last
2769 # header.
2770 msg = email.message_from_string("""\
2771From: aperson@dom.ain
2772To: bperson@dom.ain
2773Date: Mon, 8 Apr 2002 15:09:19 -0400
2774Message-ID: spam
2775Subject: the next line has a space on it
2776\x20
2777
2778Here's the message body
2779""")
2780 eq(msg['subject'], 'the next line has a space on it\n ')
2781 eq(msg['message-id'], 'spam')
2782 eq(msg.get_payload(), "Here's the message body\n")
2783
2784 def test_crlf_separation(self):
2785 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002786 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002787 msg = Parser().parse(fp)
2788 eq(len(msg.get_payload()), 2)
2789 part1 = msg.get_payload(0)
2790 eq(part1.get_content_type(), 'text/plain')
2791 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2792 part2 = msg.get_payload(1)
2793 eq(part2.get_content_type(), 'application/riscos')
2794
R. David Murray8451c4b2010-10-23 22:19:56 +00002795 def test_crlf_flatten(self):
2796 # Using newline='\n' preserves the crlfs in this input file.
2797 with openfile('msg_26.txt', newline='\n') as fp:
2798 text = fp.read()
2799 msg = email.message_from_string(text)
2800 s = StringIO()
2801 g = Generator(s)
2802 g.flatten(msg, linesep='\r\n')
2803 self.assertEqual(s.getvalue(), text)
2804
2805 maxDiff = None
2806
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002807 def test_multipart_digest_with_extra_mime_headers(self):
2808 eq = self.assertEqual
2809 neq = self.ndiffAssertEqual
2810 with openfile('msg_28.txt') as fp:
2811 msg = email.message_from_file(fp)
2812 # Structure is:
2813 # multipart/digest
2814 # message/rfc822
2815 # text/plain
2816 # message/rfc822
2817 # text/plain
2818 eq(msg.is_multipart(), 1)
2819 eq(len(msg.get_payload()), 2)
2820 part1 = msg.get_payload(0)
2821 eq(part1.get_content_type(), 'message/rfc822')
2822 eq(part1.is_multipart(), 1)
2823 eq(len(part1.get_payload()), 1)
2824 part1a = part1.get_payload(0)
2825 eq(part1a.is_multipart(), 0)
2826 eq(part1a.get_content_type(), 'text/plain')
2827 neq(part1a.get_payload(), 'message 1\n')
2828 # next message/rfc822
2829 part2 = msg.get_payload(1)
2830 eq(part2.get_content_type(), 'message/rfc822')
2831 eq(part2.is_multipart(), 1)
2832 eq(len(part2.get_payload()), 1)
2833 part2a = part2.get_payload(0)
2834 eq(part2a.is_multipart(), 0)
2835 eq(part2a.get_content_type(), 'text/plain')
2836 neq(part2a.get_payload(), 'message 2\n')
2837
2838 def test_three_lines(self):
2839 # A bug report by Andrew McNamara
2840 lines = ['From: Andrew Person <aperson@dom.ain',
2841 'Subject: Test',
2842 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2843 msg = email.message_from_string(NL.join(lines))
2844 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2845
2846 def test_strip_line_feed_and_carriage_return_in_headers(self):
2847 eq = self.assertEqual
2848 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2849 value1 = 'text'
2850 value2 = 'more text'
2851 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2852 value1, value2)
2853 msg = email.message_from_string(m)
2854 eq(msg.get('Header'), value1)
2855 eq(msg.get('Next-Header'), value2)
2856
2857 def test_rfc2822_header_syntax(self):
2858 eq = self.assertEqual
2859 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2860 msg = email.message_from_string(m)
2861 eq(len(msg), 3)
2862 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2863 eq(msg.get_payload(), 'body')
2864
2865 def test_rfc2822_space_not_allowed_in_header(self):
2866 eq = self.assertEqual
2867 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2868 msg = email.message_from_string(m)
2869 eq(len(msg.keys()), 0)
2870
2871 def test_rfc2822_one_character_header(self):
2872 eq = self.assertEqual
2873 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2874 msg = email.message_from_string(m)
2875 headers = msg.keys()
2876 headers.sort()
2877 eq(headers, ['A', 'B', 'CC'])
2878 eq(msg.get_payload(), 'body')
2879
R. David Murray45e0e142010-06-16 02:19:40 +00002880 def test_CRLFLF_at_end_of_part(self):
2881 # issue 5610: feedparser should not eat two chars from body part ending
2882 # with "\r\n\n".
2883 m = (
2884 "From: foo@bar.com\n"
2885 "To: baz\n"
2886 "Mime-Version: 1.0\n"
2887 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2888 "\n"
2889 "--BOUNDARY\n"
2890 "Content-Type: text/plain\n"
2891 "\n"
2892 "body ending with CRLF newline\r\n"
2893 "\n"
2894 "--BOUNDARY--\n"
2895 )
2896 msg = email.message_from_string(m)
2897 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002898
Ezio Melottib3aedd42010-11-20 19:04:17 +00002899
R. David Murray96fd54e2010-10-08 15:55:28 +00002900class Test8BitBytesHandling(unittest.TestCase):
2901 # In Python3 all input is string, but that doesn't work if the actual input
2902 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2903 # decode byte streams using the surrogateescape error handler, and
2904 # reconvert to binary at appropriate places if we detect surrogates. This
2905 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2906 # but it does allow us to parse and preserve them, and to decode body
2907 # parts that use an 8bit CTE.
2908
2909 bodytest_msg = textwrap.dedent("""\
2910 From: foo@bar.com
2911 To: baz
2912 Mime-Version: 1.0
2913 Content-Type: text/plain; charset={charset}
2914 Content-Transfer-Encoding: {cte}
2915
2916 {bodyline}
2917 """)
2918
2919 def test_known_8bit_CTE(self):
2920 m = self.bodytest_msg.format(charset='utf-8',
2921 cte='8bit',
2922 bodyline='pöstal').encode('utf-8')
2923 msg = email.message_from_bytes(m)
2924 self.assertEqual(msg.get_payload(), "pöstal\n")
2925 self.assertEqual(msg.get_payload(decode=True),
2926 "pöstal\n".encode('utf-8'))
2927
2928 def test_unknown_8bit_CTE(self):
2929 m = self.bodytest_msg.format(charset='notavalidcharset',
2930 cte='8bit',
2931 bodyline='pöstal').encode('utf-8')
2932 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002933 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002934 self.assertEqual(msg.get_payload(decode=True),
2935 "pöstal\n".encode('utf-8'))
2936
2937 def test_8bit_in_quopri_body(self):
2938 # This is non-RFC compliant data...without 'decode' the library code
2939 # decodes the body using the charset from the headers, and because the
2940 # source byte really is utf-8 this works. This is likely to fail
2941 # against real dirty data (ie: produce mojibake), but the data is
2942 # invalid anyway so it is as good a guess as any. But this means that
2943 # this test just confirms the current behavior; that behavior is not
2944 # necessarily the best possible behavior. With 'decode' it is
2945 # returning the raw bytes, so that test should be of correct behavior,
2946 # or at least produce the same result that email4 did.
2947 m = self.bodytest_msg.format(charset='utf-8',
2948 cte='quoted-printable',
2949 bodyline='p=C3=B6stál').encode('utf-8')
2950 msg = email.message_from_bytes(m)
2951 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2952 self.assertEqual(msg.get_payload(decode=True),
2953 'pöstál\n'.encode('utf-8'))
2954
2955 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2956 # This is similar to the previous test, but proves that if the 8bit
2957 # byte is undecodeable in the specified charset, it gets replaced
2958 # by the unicode 'unknown' character. Again, this may or may not
2959 # be the ideal behavior. Note that if decode=False none of the
2960 # decoders will get involved, so this is the only test we need
2961 # for this behavior.
2962 m = self.bodytest_msg.format(charset='ascii',
2963 cte='quoted-printable',
2964 bodyline='p=C3=B6stál').encode('utf-8')
2965 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002966 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002967 self.assertEqual(msg.get_payload(decode=True),
2968 'pöstál\n'.encode('utf-8'))
2969
2970 def test_8bit_in_base64_body(self):
2971 # Sticking an 8bit byte in a base64 block makes it undecodable by
2972 # normal means, so the block is returned undecoded, but as bytes.
2973 m = self.bodytest_msg.format(charset='utf-8',
2974 cte='base64',
2975 bodyline='cMO2c3RhbAá=').encode('utf-8')
2976 msg = email.message_from_bytes(m)
2977 self.assertEqual(msg.get_payload(decode=True),
2978 'cMO2c3RhbAá=\n'.encode('utf-8'))
2979
2980 def test_8bit_in_uuencode_body(self):
2981 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2982 # normal means, so the block is returned undecoded, but as bytes.
2983 m = self.bodytest_msg.format(charset='utf-8',
2984 cte='uuencode',
2985 bodyline='<,.V<W1A; á ').encode('utf-8')
2986 msg = email.message_from_bytes(m)
2987 self.assertEqual(msg.get_payload(decode=True),
2988 '<,.V<W1A; á \n'.encode('utf-8'))
2989
2990
R. David Murray92532142011-01-07 23:25:30 +00002991 headertest_headers = (
2992 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2993 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2994 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2995 '\tJean de Baddie',
2996 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2997 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2998 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2999 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3000 )
3001 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3002 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003003
3004 def test_get_8bit_header(self):
3005 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003006 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3007 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003008
3009 def test_print_8bit_headers(self):
3010 msg = email.message_from_bytes(self.headertest_msg)
3011 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003012 textwrap.dedent("""\
3013 From: {}
3014 To: {}
3015 Subject: {}
3016 From: {}
3017
3018 Yes, they are flying.
3019 """).format(*[expected[1] for (_, expected) in
3020 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003021
3022 def test_values_with_8bit_headers(self):
3023 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003024 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003025 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003026 'b\uFFFD\uFFFDz',
3027 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3028 'coll\uFFFD\uFFFDgue, le pouf '
3029 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003030 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003031 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003032
3033 def test_items_with_8bit_headers(self):
3034 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003035 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003036 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003037 ('To', 'b\uFFFD\uFFFDz'),
3038 ('Subject', 'Maintenant je vous '
3039 'pr\uFFFD\uFFFDsente '
3040 'mon coll\uFFFD\uFFFDgue, le pouf '
3041 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3042 '\tJean de Baddie'),
3043 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003044
3045 def test_get_all_with_8bit_headers(self):
3046 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003047 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003048 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003049 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003050
R David Murraya2150232011-03-16 21:11:23 -04003051 def test_get_content_type_with_8bit(self):
3052 msg = email.message_from_bytes(textwrap.dedent("""\
3053 Content-Type: text/pl\xA7in; charset=utf-8
3054 """).encode('latin-1'))
3055 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3056 self.assertEqual(msg.get_content_maintype(), "text")
3057 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3058
3059 def test_get_params_with_8bit(self):
3060 msg = email.message_from_bytes(
3061 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3062 self.assertEqual(msg.get_params(header='x-header'),
3063 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3064 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3065 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3066 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3067
3068 def test_get_rfc2231_params_with_8bit(self):
3069 msg = email.message_from_bytes(textwrap.dedent("""\
3070 Content-Type: text/plain; charset=us-ascii;
3071 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3072 ).encode('latin-1'))
3073 self.assertEqual(msg.get_param('title'),
3074 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3075
3076 def test_set_rfc2231_params_with_8bit(self):
3077 msg = email.message_from_bytes(textwrap.dedent("""\
3078 Content-Type: text/plain; charset=us-ascii;
3079 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3080 ).encode('latin-1'))
3081 msg.set_param('title', 'test')
3082 self.assertEqual(msg.get_param('title'), 'test')
3083
3084 def test_del_rfc2231_params_with_8bit(self):
3085 msg = email.message_from_bytes(textwrap.dedent("""\
3086 Content-Type: text/plain; charset=us-ascii;
3087 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3088 ).encode('latin-1'))
3089 msg.del_param('title')
3090 self.assertEqual(msg.get_param('title'), None)
3091 self.assertEqual(msg.get_content_maintype(), 'text')
3092
3093 def test_get_payload_with_8bit_cte_header(self):
3094 msg = email.message_from_bytes(textwrap.dedent("""\
3095 Content-Transfer-Encoding: b\xa7se64
3096 Content-Type: text/plain; charset=latin-1
3097
3098 payload
3099 """).encode('latin-1'))
3100 self.assertEqual(msg.get_payload(), 'payload\n')
3101 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3102
R. David Murray96fd54e2010-10-08 15:55:28 +00003103 non_latin_bin_msg = textwrap.dedent("""\
3104 From: foo@bar.com
3105 To: báz
3106 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3107 \tJean de Baddie
3108 Mime-Version: 1.0
3109 Content-Type: text/plain; charset="utf-8"
3110 Content-Transfer-Encoding: 8bit
3111
3112 Да, они летят.
3113 """).encode('utf-8')
3114
3115 def test_bytes_generator(self):
3116 msg = email.message_from_bytes(self.non_latin_bin_msg)
3117 out = BytesIO()
3118 email.generator.BytesGenerator(out).flatten(msg)
3119 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3120
R. David Murray7372a072011-01-26 21:21:32 +00003121 def test_bytes_generator_handles_None_body(self):
3122 #Issue 11019
3123 msg = email.message.Message()
3124 out = BytesIO()
3125 email.generator.BytesGenerator(out).flatten(msg)
3126 self.assertEqual(out.getvalue(), b"\n")
3127
R. David Murray92532142011-01-07 23:25:30 +00003128 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003129 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003130 To: =?unknown-8bit?q?b=C3=A1z?=
3131 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3132 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3133 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003134 Mime-Version: 1.0
3135 Content-Type: text/plain; charset="utf-8"
3136 Content-Transfer-Encoding: base64
3137
3138 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3139 """)
3140
3141 def test_generator_handles_8bit(self):
3142 msg = email.message_from_bytes(self.non_latin_bin_msg)
3143 out = StringIO()
3144 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003145 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003146
3147 def test_bytes_generator_with_unix_from(self):
3148 # The unixfrom contains a current date, so we can't check it
3149 # literally. Just make sure the first word is 'From' and the
3150 # rest of the message matches the input.
3151 msg = email.message_from_bytes(self.non_latin_bin_msg)
3152 out = BytesIO()
3153 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3154 lines = out.getvalue().split(b'\n')
3155 self.assertEqual(lines[0].split()[0], b'From')
3156 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3157
R. David Murray92532142011-01-07 23:25:30 +00003158 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3159 non_latin_bin_msg_as7bit[2:4] = [
3160 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3161 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3162 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3163
R. David Murray96fd54e2010-10-08 15:55:28 +00003164 def test_message_from_binary_file(self):
3165 fn = 'test.msg'
3166 self.addCleanup(unlink, fn)
3167 with open(fn, 'wb') as testfile:
3168 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003169 with open(fn, 'rb') as testfile:
3170 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003171 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3172
3173 latin_bin_msg = textwrap.dedent("""\
3174 From: foo@bar.com
3175 To: Dinsdale
3176 Subject: Nudge nudge, wink, wink
3177 Mime-Version: 1.0
3178 Content-Type: text/plain; charset="latin-1"
3179 Content-Transfer-Encoding: 8bit
3180
3181 oh là là, know what I mean, know what I mean?
3182 """).encode('latin-1')
3183
3184 latin_bin_msg_as7bit = textwrap.dedent("""\
3185 From: foo@bar.com
3186 To: Dinsdale
3187 Subject: Nudge nudge, wink, wink
3188 Mime-Version: 1.0
3189 Content-Type: text/plain; charset="iso-8859-1"
3190 Content-Transfer-Encoding: quoted-printable
3191
3192 oh l=E0 l=E0, know what I mean, know what I mean?
3193 """)
3194
3195 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3196 m = email.message_from_bytes(self.latin_bin_msg)
3197 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3198
3199 def test_decoded_generator_emits_unicode_body(self):
3200 m = email.message_from_bytes(self.latin_bin_msg)
3201 out = StringIO()
3202 email.generator.DecodedGenerator(out).flatten(m)
3203 #DecodedHeader output contains an extra blank line compared
3204 #to the input message. RDM: not sure if this is a bug or not,
3205 #but it is not specific to the 8bit->7bit conversion.
3206 self.assertEqual(out.getvalue(),
3207 self.latin_bin_msg.decode('latin-1')+'\n')
3208
3209 def test_bytes_feedparser(self):
3210 bfp = email.feedparser.BytesFeedParser()
3211 for i in range(0, len(self.latin_bin_msg), 10):
3212 bfp.feed(self.latin_bin_msg[i:i+10])
3213 m = bfp.close()
3214 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3215
R. David Murray8451c4b2010-10-23 22:19:56 +00003216 def test_crlf_flatten(self):
3217 with openfile('msg_26.txt', 'rb') as fp:
3218 text = fp.read()
3219 msg = email.message_from_bytes(text)
3220 s = BytesIO()
3221 g = email.generator.BytesGenerator(s)
3222 g.flatten(msg, linesep='\r\n')
3223 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003224
3225 def test_8bit_multipart(self):
3226 # Issue 11605
3227 source = textwrap.dedent("""\
3228 Date: Fri, 18 Mar 2011 17:15:43 +0100
3229 To: foo@example.com
3230 From: foodwatch-Newsletter <bar@example.com>
3231 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3232 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3233 MIME-Version: 1.0
3234 Content-Type: multipart/alternative;
3235 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3236
3237 --b1_76a486bee62b0d200f33dc2ca08220ad
3238 Content-Type: text/plain; charset="utf-8"
3239 Content-Transfer-Encoding: 8bit
3240
3241 Guten Tag, ,
3242
3243 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3244 Nachrichten aus Japan.
3245
3246
3247 --b1_76a486bee62b0d200f33dc2ca08220ad
3248 Content-Type: text/html; charset="utf-8"
3249 Content-Transfer-Encoding: 8bit
3250
3251 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3252 "http://www.w3.org/TR/html4/loose.dtd">
3253 <html lang="de">
3254 <head>
3255 <title>foodwatch - Newsletter</title>
3256 </head>
3257 <body>
3258 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3259 die Nachrichten aus Japan.</p>
3260 </body>
3261 </html>
3262 --b1_76a486bee62b0d200f33dc2ca08220ad--
3263
3264 """).encode('utf-8')
3265 msg = email.message_from_bytes(source)
3266 s = BytesIO()
3267 g = email.generator.BytesGenerator(s)
3268 g.flatten(msg)
3269 self.assertEqual(s.getvalue(), source)
3270
R. David Murray8451c4b2010-10-23 22:19:56 +00003271 maxDiff = None
3272
Ezio Melottib3aedd42010-11-20 19:04:17 +00003273
R. David Murray719a4492010-11-21 16:53:48 +00003274class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003275
R. David Murraye5db2632010-11-20 15:10:13 +00003276 maxDiff = None
3277
R. David Murray96fd54e2010-10-08 15:55:28 +00003278 def _msgobj(self, filename):
3279 with openfile(filename, 'rb') as fp:
3280 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003281 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003282 msg = email.message_from_bytes(data)
3283 return msg, data
3284
R. David Murray719a4492010-11-21 16:53:48 +00003285 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003286 b = BytesIO()
3287 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003288 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003289 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003290
R. David Murraye5db2632010-11-20 15:10:13 +00003291 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003292 # Not using self.blinesep here is intentional. This way the output
3293 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003294 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3295
3296
R. David Murray719a4492010-11-21 16:53:48 +00003297class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3298 TestIdempotent):
3299 linesep = '\n'
3300 blinesep = b'\n'
3301 normalize_linesep_regex = re.compile(br'\r\n')
3302
3303
3304class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3305 TestIdempotent):
3306 linesep = '\r\n'
3307 blinesep = b'\r\n'
3308 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3309
Ezio Melottib3aedd42010-11-20 19:04:17 +00003310
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003311class TestBase64(unittest.TestCase):
3312 def test_len(self):
3313 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003314 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003315 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003316 for size in range(15):
3317 if size == 0 : bsize = 0
3318 elif size <= 3 : bsize = 4
3319 elif size <= 6 : bsize = 8
3320 elif size <= 9 : bsize = 12
3321 elif size <= 12: bsize = 16
3322 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003323 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003324
3325 def test_decode(self):
3326 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003327 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003328 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003329
3330 def test_encode(self):
3331 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003332 eq(base64mime.body_encode(b''), b'')
3333 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003334 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003335 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003336 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003337 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003338eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3339eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3340eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3341eHh4eCB4eHh4IA==
3342""")
3343 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003344 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003345 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003346eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3347eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3348eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3349eHh4eCB4eHh4IA==\r
3350""")
3351
3352 def test_header_encode(self):
3353 eq = self.assertEqual
3354 he = base64mime.header_encode
3355 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003356 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3357 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003358 # Test the charset option
3359 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3360 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003361
3362
Ezio Melottib3aedd42010-11-20 19:04:17 +00003363
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003364class TestQuopri(unittest.TestCase):
3365 def setUp(self):
3366 # Set of characters (as byte integers) that don't need to be encoded
3367 # in headers.
3368 self.hlit = list(chain(
3369 range(ord('a'), ord('z') + 1),
3370 range(ord('A'), ord('Z') + 1),
3371 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003372 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003373 # Set of characters (as byte integers) that do need to be encoded in
3374 # headers.
3375 self.hnon = [c for c in range(256) if c not in self.hlit]
3376 assert len(self.hlit) + len(self.hnon) == 256
3377 # Set of characters (as byte integers) that don't need to be encoded
3378 # in bodies.
3379 self.blit = list(range(ord(' '), ord('~') + 1))
3380 self.blit.append(ord('\t'))
3381 self.blit.remove(ord('='))
3382 # Set of characters (as byte integers) that do need to be encoded in
3383 # bodies.
3384 self.bnon = [c for c in range(256) if c not in self.blit]
3385 assert len(self.blit) + len(self.bnon) == 256
3386
Guido van Rossum9604e662007-08-30 03:46:43 +00003387 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003388 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003389 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003390 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003391 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003392 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003393 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003394
Guido van Rossum9604e662007-08-30 03:46:43 +00003395 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003396 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003397 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003398 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003399 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003400 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003401 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003402
3403 def test_header_quopri_len(self):
3404 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003405 eq(quoprimime.header_length(b'hello'), 5)
3406 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003407 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003408 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003409 # =?xxx?q?...?= means 10 extra characters
3410 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003411 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3412 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003413 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003414 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003415 # =?xxx?q?...?= means 10 extra characters
3416 10)
3417 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003418 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003419 'expected length 1 for %r' % chr(c))
3420 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003421 # Space is special; it's encoded to _
3422 if c == ord(' '):
3423 continue
3424 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003425 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003426 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003427
3428 def test_body_quopri_len(self):
3429 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003430 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003431 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003432 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003433 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003434
3435 def test_quote_unquote_idempotent(self):
3436 for x in range(256):
3437 c = chr(x)
3438 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3439
R David Murrayec1b5b82011-03-23 14:19:05 -04003440 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3441 if charset is None:
3442 encoded_header = quoprimime.header_encode(header)
3443 else:
3444 encoded_header = quoprimime.header_encode(header, charset)
3445 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003446
R David Murraycafd79d2011-03-23 15:25:55 -04003447 def test_header_encode_null(self):
3448 self._test_header_encode(b'', '')
3449
R David Murrayec1b5b82011-03-23 14:19:05 -04003450 def test_header_encode_one_word(self):
3451 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3452
3453 def test_header_encode_two_lines(self):
3454 self._test_header_encode(b'hello\nworld',
3455 '=?iso-8859-1?q?hello=0Aworld?=')
3456
3457 def test_header_encode_non_ascii(self):
3458 self._test_header_encode(b'hello\xc7there',
3459 '=?iso-8859-1?q?hello=C7there?=')
3460
3461 def test_header_encode_alt_charset(self):
3462 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3463 charset='iso-8859-2')
3464
3465 def _test_header_decode(self, encoded_header, expected_decoded_header):
3466 decoded_header = quoprimime.header_decode(encoded_header)
3467 self.assertEqual(decoded_header, expected_decoded_header)
3468
3469 def test_header_decode_null(self):
3470 self._test_header_decode('', '')
3471
3472 def test_header_decode_one_word(self):
3473 self._test_header_decode('hello', 'hello')
3474
3475 def test_header_decode_two_lines(self):
3476 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3477
3478 def test_header_decode_non_ascii(self):
3479 self._test_header_decode('hello=C7there', 'hello\xc7there')
3480
3481 def _test_decode(self, encoded, expected_decoded, eol=None):
3482 if eol is None:
3483 decoded = quoprimime.decode(encoded)
3484 else:
3485 decoded = quoprimime.decode(encoded, eol=eol)
3486 self.assertEqual(decoded, expected_decoded)
3487
3488 def test_decode_null_word(self):
3489 self._test_decode('', '')
3490
3491 def test_decode_null_line_null_word(self):
3492 self._test_decode('\r\n', '\n')
3493
3494 def test_decode_one_word(self):
3495 self._test_decode('hello', 'hello')
3496
3497 def test_decode_one_word_eol(self):
3498 self._test_decode('hello', 'hello', eol='X')
3499
3500 def test_decode_one_line(self):
3501 self._test_decode('hello\r\n', 'hello\n')
3502
3503 def test_decode_one_line_lf(self):
3504 self._test_decode('hello\n', 'hello\n')
3505
R David Murraycafd79d2011-03-23 15:25:55 -04003506 def test_decode_one_line_cr(self):
3507 self._test_decode('hello\r', 'hello\n')
3508
3509 def test_decode_one_line_nl(self):
3510 self._test_decode('hello\n', 'helloX', eol='X')
3511
3512 def test_decode_one_line_crnl(self):
3513 self._test_decode('hello\r\n', 'helloX', eol='X')
3514
R David Murrayec1b5b82011-03-23 14:19:05 -04003515 def test_decode_one_line_one_word(self):
3516 self._test_decode('hello\r\nworld', 'hello\nworld')
3517
3518 def test_decode_one_line_one_word_eol(self):
3519 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3520
3521 def test_decode_two_lines(self):
3522 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3523
R David Murraycafd79d2011-03-23 15:25:55 -04003524 def test_decode_two_lines_eol(self):
3525 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3526
R David Murrayec1b5b82011-03-23 14:19:05 -04003527 def test_decode_one_long_line(self):
3528 self._test_decode('Spam' * 250, 'Spam' * 250)
3529
3530 def test_decode_one_space(self):
3531 self._test_decode(' ', '')
3532
3533 def test_decode_multiple_spaces(self):
3534 self._test_decode(' ' * 5, '')
3535
3536 def test_decode_one_line_trailing_spaces(self):
3537 self._test_decode('hello \r\n', 'hello\n')
3538
3539 def test_decode_two_lines_trailing_spaces(self):
3540 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3541
3542 def test_decode_quoted_word(self):
3543 self._test_decode('=22quoted=20words=22', '"quoted words"')
3544
3545 def test_decode_uppercase_quoting(self):
3546 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3547
3548 def test_decode_lowercase_quoting(self):
3549 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3550
3551 def test_decode_soft_line_break(self):
3552 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3553
3554 def test_decode_false_quoting(self):
3555 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3556
3557 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3558 kwargs = {}
3559 if maxlinelen is None:
3560 # Use body_encode's default.
3561 maxlinelen = 76
3562 else:
3563 kwargs['maxlinelen'] = maxlinelen
3564 if eol is None:
3565 # Use body_encode's default.
3566 eol = '\n'
3567 else:
3568 kwargs['eol'] = eol
3569 encoded_body = quoprimime.body_encode(body, **kwargs)
3570 self.assertEqual(encoded_body, expected_encoded_body)
3571 if eol == '\n' or eol == '\r\n':
3572 # We know how to split the result back into lines, so maxlinelen
3573 # can be checked.
3574 for line in encoded_body.splitlines():
3575 self.assertLessEqual(len(line), maxlinelen)
3576
3577 def test_encode_null(self):
3578 self._test_encode('', '')
3579
3580 def test_encode_null_lines(self):
3581 self._test_encode('\n\n', '\n\n')
3582
3583 def test_encode_one_line(self):
3584 self._test_encode('hello\n', 'hello\n')
3585
3586 def test_encode_one_line_crlf(self):
3587 self._test_encode('hello\r\n', 'hello\n')
3588
3589 def test_encode_one_line_eol(self):
3590 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3591
3592 def test_encode_one_space(self):
3593 self._test_encode(' ', '=20')
3594
3595 def test_encode_one_line_one_space(self):
3596 self._test_encode(' \n', '=20\n')
3597
R David Murrayb938c8c2011-03-24 12:19:26 -04003598# XXX: body_encode() expect strings, but uses ord(char) from these strings
3599# to index into a 256-entry list. For code points above 255, this will fail.
3600# Should there be a check for 8-bit only ord() values in body, or at least
3601# a comment about the expected input?
3602
3603 def test_encode_two_lines_one_space(self):
3604 self._test_encode(' \n \n', '=20\n=20\n')
3605
R David Murrayec1b5b82011-03-23 14:19:05 -04003606 def test_encode_one_word_trailing_spaces(self):
3607 self._test_encode('hello ', 'hello =20')
3608
3609 def test_encode_one_line_trailing_spaces(self):
3610 self._test_encode('hello \n', 'hello =20\n')
3611
3612 def test_encode_one_word_trailing_tab(self):
3613 self._test_encode('hello \t', 'hello =09')
3614
3615 def test_encode_one_line_trailing_tab(self):
3616 self._test_encode('hello \t\n', 'hello =09\n')
3617
3618 def test_encode_trailing_space_before_maxlinelen(self):
3619 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3620
R David Murrayb938c8c2011-03-24 12:19:26 -04003621 def test_encode_trailing_space_at_maxlinelen(self):
3622 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3623
R David Murrayec1b5b82011-03-23 14:19:05 -04003624 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003625 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3626
3627 def test_encode_whitespace_lines(self):
3628 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003629
3630 def test_encode_quoted_equals(self):
3631 self._test_encode('a = b', 'a =3D b')
3632
3633 def test_encode_one_long_string(self):
3634 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3635
3636 def test_encode_one_long_line(self):
3637 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3638
3639 def test_encode_one_very_long_line(self):
3640 self._test_encode('x' * 200 + '\n',
3641 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3642
3643 def test_encode_one_long_line(self):
3644 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3645
3646 def test_encode_shortest_maxlinelen(self):
3647 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003648
R David Murrayb938c8c2011-03-24 12:19:26 -04003649 def test_encode_maxlinelen_too_small(self):
3650 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3651
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003652 def test_encode(self):
3653 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003654 eq(quoprimime.body_encode(''), '')
3655 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003656 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003657 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003658 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003659 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003660xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3661 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3662x xxxx xxxx xxxx xxxx=20""")
3663 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003664 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3665 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003666xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3667 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3668x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003669 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003670one line
3671
3672two line"""), """\
3673one line
3674
3675two line""")
3676
3677
Ezio Melottib3aedd42010-11-20 19:04:17 +00003678
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003679# Test the Charset class
3680class TestCharset(unittest.TestCase):
3681 def tearDown(self):
3682 from email import charset as CharsetModule
3683 try:
3684 del CharsetModule.CHARSETS['fake']
3685 except KeyError:
3686 pass
3687
Guido van Rossum9604e662007-08-30 03:46:43 +00003688 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003689 eq = self.assertEqual
3690 # Make sure us-ascii = no Unicode conversion
3691 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003692 eq(c.header_encode('Hello World!'), 'Hello World!')
3693 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003694 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003695 self.assertRaises(UnicodeError, c.header_encode, s)
3696 c = Charset('utf-8')
3697 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003698
3699 def test_body_encode(self):
3700 eq = self.assertEqual
3701 # Try a charset with QP body encoding
3702 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003703 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 # Try a charset with Base64 body encoding
3705 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003706 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003707 # Try a charset with None body encoding
3708 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003709 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003710 # Try the convert argument, where input codec != output codec
3711 c = Charset('euc-jp')
3712 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003713 # XXX FIXME
3714## try:
3715## eq('\x1b$B5FCO;~IW\x1b(B',
3716## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3717## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3718## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3719## except LookupError:
3720## # We probably don't have the Japanese codecs installed
3721## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722 # Testing SF bug #625509, which we have to fake, since there are no
3723 # built-in encodings where the header encoding is QP but the body
3724 # encoding is not.
3725 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003726 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003727 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003728 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003729
3730 def test_unicode_charset_name(self):
3731 charset = Charset('us-ascii')
3732 self.assertEqual(str(charset), 'us-ascii')
3733 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3734
3735
Ezio Melottib3aedd42010-11-20 19:04:17 +00003736
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737# Test multilingual MIME headers.
3738class TestHeader(TestEmailBase):
3739 def test_simple(self):
3740 eq = self.ndiffAssertEqual
3741 h = Header('Hello World!')
3742 eq(h.encode(), 'Hello World!')
3743 h.append(' Goodbye World!')
3744 eq(h.encode(), 'Hello World! Goodbye World!')
3745
3746 def test_simple_surprise(self):
3747 eq = self.ndiffAssertEqual
3748 h = Header('Hello World!')
3749 eq(h.encode(), 'Hello World!')
3750 h.append('Goodbye World!')
3751 eq(h.encode(), 'Hello World! Goodbye World!')
3752
3753 def test_header_needs_no_decoding(self):
3754 h = 'no decoding needed'
3755 self.assertEqual(decode_header(h), [(h, None)])
3756
3757 def test_long(self):
3758 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3759 maxlinelen=76)
3760 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003761 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003762
3763 def test_multilingual(self):
3764 eq = self.ndiffAssertEqual
3765 g = Charset("iso-8859-1")
3766 cz = Charset("iso-8859-2")
3767 utf8 = Charset("utf-8")
3768 g_head = (b'Die Mieter treten hier ein werden mit einem '
3769 b'Foerderband komfortabel den Korridor entlang, '
3770 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3771 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3772 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3773 b'd\xf9vtipu.. ')
3774 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3775 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3776 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3777 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3778 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3779 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3780 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3781 '\u3044\u307e\u3059\u3002')
3782 h = Header(g_head, g)
3783 h.append(cz_head, cz)
3784 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003785 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003786 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003787=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3788 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3789 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3790 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003791 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3792 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3793 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3794 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3796 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3797 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3798 decoded = decode_header(enc)
3799 eq(len(decoded), 3)
3800 eq(decoded[0], (g_head, 'iso-8859-1'))
3801 eq(decoded[1], (cz_head, 'iso-8859-2'))
3802 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003803 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003804 eq(ustr,
3805 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3806 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3807 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3808 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3809 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3810 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3811 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3812 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3813 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3814 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3815 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3816 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3817 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3818 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3819 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3820 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3821 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003822 # Test make_header()
3823 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003824 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003825
3826 def test_empty_header_encode(self):
3827 h = Header()
3828 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003829
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003830 def test_header_ctor_default_args(self):
3831 eq = self.ndiffAssertEqual
3832 h = Header()
3833 eq(h, '')
3834 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003835 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003836
3837 def test_explicit_maxlinelen(self):
3838 eq = self.ndiffAssertEqual
3839 hstr = ('A very long line that must get split to something other '
3840 'than at the 76th character boundary to test the non-default '
3841 'behavior')
3842 h = Header(hstr)
3843 eq(h.encode(), '''\
3844A very long line that must get split to something other than at the 76th
3845 character boundary to test the non-default behavior''')
3846 eq(str(h), hstr)
3847 h = Header(hstr, header_name='Subject')
3848 eq(h.encode(), '''\
3849A very long line that must get split to something other than at the
3850 76th character boundary to test the non-default behavior''')
3851 eq(str(h), hstr)
3852 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3853 eq(h.encode(), hstr)
3854 eq(str(h), hstr)
3855
Guido van Rossum9604e662007-08-30 03:46:43 +00003856 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003857 eq = self.ndiffAssertEqual
3858 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003859 x = 'xxxx ' * 20
3860 h.append(x)
3861 s = h.encode()
3862 eq(s, """\
3863=?iso-8859-1?q?xxx?=
3864 =?iso-8859-1?q?x_?=
3865 =?iso-8859-1?q?xx?=
3866 =?iso-8859-1?q?xx?=
3867 =?iso-8859-1?q?_x?=
3868 =?iso-8859-1?q?xx?=
3869 =?iso-8859-1?q?x_?=
3870 =?iso-8859-1?q?xx?=
3871 =?iso-8859-1?q?xx?=
3872 =?iso-8859-1?q?_x?=
3873 =?iso-8859-1?q?xx?=
3874 =?iso-8859-1?q?x_?=
3875 =?iso-8859-1?q?xx?=
3876 =?iso-8859-1?q?xx?=
3877 =?iso-8859-1?q?_x?=
3878 =?iso-8859-1?q?xx?=
3879 =?iso-8859-1?q?x_?=
3880 =?iso-8859-1?q?xx?=
3881 =?iso-8859-1?q?xx?=
3882 =?iso-8859-1?q?_x?=
3883 =?iso-8859-1?q?xx?=
3884 =?iso-8859-1?q?x_?=
3885 =?iso-8859-1?q?xx?=
3886 =?iso-8859-1?q?xx?=
3887 =?iso-8859-1?q?_x?=
3888 =?iso-8859-1?q?xx?=
3889 =?iso-8859-1?q?x_?=
3890 =?iso-8859-1?q?xx?=
3891 =?iso-8859-1?q?xx?=
3892 =?iso-8859-1?q?_x?=
3893 =?iso-8859-1?q?xx?=
3894 =?iso-8859-1?q?x_?=
3895 =?iso-8859-1?q?xx?=
3896 =?iso-8859-1?q?xx?=
3897 =?iso-8859-1?q?_x?=
3898 =?iso-8859-1?q?xx?=
3899 =?iso-8859-1?q?x_?=
3900 =?iso-8859-1?q?xx?=
3901 =?iso-8859-1?q?xx?=
3902 =?iso-8859-1?q?_x?=
3903 =?iso-8859-1?q?xx?=
3904 =?iso-8859-1?q?x_?=
3905 =?iso-8859-1?q?xx?=
3906 =?iso-8859-1?q?xx?=
3907 =?iso-8859-1?q?_x?=
3908 =?iso-8859-1?q?xx?=
3909 =?iso-8859-1?q?x_?=
3910 =?iso-8859-1?q?xx?=
3911 =?iso-8859-1?q?xx?=
3912 =?iso-8859-1?q?_?=""")
3913 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003914 h = Header(charset='iso-8859-1', maxlinelen=40)
3915 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003916 s = h.encode()
3917 eq(s, """\
3918=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3919 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3920 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3921 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3922 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3923 eq(x, str(make_header(decode_header(s))))
3924
3925 def test_base64_splittable(self):
3926 eq = self.ndiffAssertEqual
3927 h = Header(charset='koi8-r', maxlinelen=20)
3928 x = 'xxxx ' * 20
3929 h.append(x)
3930 s = h.encode()
3931 eq(s, """\
3932=?koi8-r?b?eHh4?=
3933 =?koi8-r?b?eCB4?=
3934 =?koi8-r?b?eHh4?=
3935 =?koi8-r?b?IHh4?=
3936 =?koi8-r?b?eHgg?=
3937 =?koi8-r?b?eHh4?=
3938 =?koi8-r?b?eCB4?=
3939 =?koi8-r?b?eHh4?=
3940 =?koi8-r?b?IHh4?=
3941 =?koi8-r?b?eHgg?=
3942 =?koi8-r?b?eHh4?=
3943 =?koi8-r?b?eCB4?=
3944 =?koi8-r?b?eHh4?=
3945 =?koi8-r?b?IHh4?=
3946 =?koi8-r?b?eHgg?=
3947 =?koi8-r?b?eHh4?=
3948 =?koi8-r?b?eCB4?=
3949 =?koi8-r?b?eHh4?=
3950 =?koi8-r?b?IHh4?=
3951 =?koi8-r?b?eHgg?=
3952 =?koi8-r?b?eHh4?=
3953 =?koi8-r?b?eCB4?=
3954 =?koi8-r?b?eHh4?=
3955 =?koi8-r?b?IHh4?=
3956 =?koi8-r?b?eHgg?=
3957 =?koi8-r?b?eHh4?=
3958 =?koi8-r?b?eCB4?=
3959 =?koi8-r?b?eHh4?=
3960 =?koi8-r?b?IHh4?=
3961 =?koi8-r?b?eHgg?=
3962 =?koi8-r?b?eHh4?=
3963 =?koi8-r?b?eCB4?=
3964 =?koi8-r?b?eHh4?=
3965 =?koi8-r?b?IA==?=""")
3966 eq(x, str(make_header(decode_header(s))))
3967 h = Header(charset='koi8-r', maxlinelen=40)
3968 h.append(x)
3969 s = h.encode()
3970 eq(s, """\
3971=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3972 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3973 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3974 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3975 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3976 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3977 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003978
3979 def test_us_ascii_header(self):
3980 eq = self.assertEqual
3981 s = 'hello'
3982 x = decode_header(s)
3983 eq(x, [('hello', None)])
3984 h = make_header(x)
3985 eq(s, h.encode())
3986
3987 def test_string_charset(self):
3988 eq = self.assertEqual
3989 h = Header()
3990 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003991 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003992
3993## def test_unicode_error(self):
3994## raises = self.assertRaises
3995## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3996## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3997## h = Header()
3998## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3999## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4000## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4001
4002 def test_utf8_shortest(self):
4003 eq = self.assertEqual
4004 h = Header('p\xf6stal', 'utf-8')
4005 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4006 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4007 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4008
4009 def test_bad_8bit_header(self):
4010 raises = self.assertRaises
4011 eq = self.assertEqual
4012 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4013 raises(UnicodeError, Header, x)
4014 h = Header()
4015 raises(UnicodeError, h.append, x)
4016 e = x.decode('utf-8', 'replace')
4017 eq(str(Header(x, errors='replace')), e)
4018 h.append(x, errors='replace')
4019 eq(str(h), e)
4020
R David Murray041015c2011-03-25 15:10:55 -04004021 def test_escaped_8bit_header(self):
4022 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4023 x = x.decode('ascii', 'surrogateescape')
4024 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4025 self.assertEqual(str(h),
4026 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4027 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4028
4029 def test_modify_returned_list_does_not_change_header(self):
4030 h = Header('test')
4031 chunks = email.header.decode_header(h)
4032 chunks.append(('ascii', 'test2'))
4033 self.assertEqual(str(h), 'test')
4034
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004035 def test_encoded_adjacent_nonencoded(self):
4036 eq = self.assertEqual
4037 h = Header()
4038 h.append('hello', 'iso-8859-1')
4039 h.append('world')
4040 s = h.encode()
4041 eq(s, '=?iso-8859-1?q?hello?= world')
4042 h = make_header(decode_header(s))
4043 eq(h.encode(), s)
4044
4045 def test_whitespace_eater(self):
4046 eq = self.assertEqual
4047 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4048 parts = decode_header(s)
4049 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4050 hdr = make_header(parts)
4051 eq(hdr.encode(),
4052 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4053
4054 def test_broken_base64_header(self):
4055 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004056 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004057 raises(errors.HeaderParseError, decode_header, s)
4058
R. David Murray477efb32011-01-05 01:39:32 +00004059 def test_shift_jis_charset(self):
4060 h = Header('文', charset='shift_jis')
4061 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4062
R David Murrayde912762011-03-16 18:26:23 -04004063 def test_flatten_header_with_no_value(self):
4064 # Issue 11401 (regression from email 4.x) Note that the space after
4065 # the header doesn't reflect the input, but this is also the way
4066 # email 4.x behaved. At some point it would be nice to fix that.
4067 msg = email.message_from_string("EmptyHeader:")
4068 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4069
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004070
Ezio Melottib3aedd42010-11-20 19:04:17 +00004071
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004072# Test RFC 2231 header parameters (en/de)coding
4073class TestRFC2231(TestEmailBase):
4074 def test_get_param(self):
4075 eq = self.assertEqual
4076 msg = self._msgobj('msg_29.txt')
4077 eq(msg.get_param('title'),
4078 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4079 eq(msg.get_param('title', unquote=False),
4080 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4081
4082 def test_set_param(self):
4083 eq = self.ndiffAssertEqual
4084 msg = Message()
4085 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4086 charset='us-ascii')
4087 eq(msg.get_param('title'),
4088 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4089 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4090 charset='us-ascii', language='en')
4091 eq(msg.get_param('title'),
4092 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4093 msg = self._msgobj('msg_01.txt')
4094 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4095 charset='us-ascii', language='en')
4096 eq(msg.as_string(maxheaderlen=78), """\
4097Return-Path: <bbb@zzz.org>
4098Delivered-To: bbb@zzz.org
4099Received: by mail.zzz.org (Postfix, from userid 889)
4100\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4101MIME-Version: 1.0
4102Content-Transfer-Encoding: 7bit
4103Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4104From: bbb@ddd.com (John X. Doe)
4105To: bbb@zzz.org
4106Subject: This is a test message
4107Date: Fri, 4 May 2001 14:05:44 -0400
4108Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004109 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004110
4111
4112Hi,
4113
4114Do you like this message?
4115
4116-Me
4117""")
4118
4119 def test_del_param(self):
4120 eq = self.ndiffAssertEqual
4121 msg = self._msgobj('msg_01.txt')
4122 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4123 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4124 charset='us-ascii', language='en')
4125 msg.del_param('foo', header='Content-Type')
4126 eq(msg.as_string(maxheaderlen=78), """\
4127Return-Path: <bbb@zzz.org>
4128Delivered-To: bbb@zzz.org
4129Received: by mail.zzz.org (Postfix, from userid 889)
4130\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4131MIME-Version: 1.0
4132Content-Transfer-Encoding: 7bit
4133Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4134From: bbb@ddd.com (John X. Doe)
4135To: bbb@zzz.org
4136Subject: This is a test message
4137Date: Fri, 4 May 2001 14:05:44 -0400
4138Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004139 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004140
4141
4142Hi,
4143
4144Do you like this message?
4145
4146-Me
4147""")
4148
4149 def test_rfc2231_get_content_charset(self):
4150 eq = self.assertEqual
4151 msg = self._msgobj('msg_32.txt')
4152 eq(msg.get_content_charset(), 'us-ascii')
4153
R. David Murraydfd7eb02010-12-24 22:36:49 +00004154 def test_rfc2231_parse_rfc_quoting(self):
4155 m = textwrap.dedent('''\
4156 Content-Disposition: inline;
4157 \tfilename*0*=''This%20is%20even%20more%20;
4158 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4159 \tfilename*2="is it not.pdf"
4160
4161 ''')
4162 msg = email.message_from_string(m)
4163 self.assertEqual(msg.get_filename(),
4164 'This is even more ***fun*** is it not.pdf')
4165 self.assertEqual(m, msg.as_string())
4166
4167 def test_rfc2231_parse_extra_quoting(self):
4168 m = textwrap.dedent('''\
4169 Content-Disposition: inline;
4170 \tfilename*0*="''This%20is%20even%20more%20";
4171 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4172 \tfilename*2="is it not.pdf"
4173
4174 ''')
4175 msg = email.message_from_string(m)
4176 self.assertEqual(msg.get_filename(),
4177 'This is even more ***fun*** is it not.pdf')
4178 self.assertEqual(m, msg.as_string())
4179
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004180 def test_rfc2231_no_language_or_charset(self):
4181 m = '''\
4182Content-Transfer-Encoding: 8bit
4183Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4184Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4185
4186'''
4187 msg = email.message_from_string(m)
4188 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004189 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004190 self.assertEqual(
4191 param,
4192 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4193
4194 def test_rfc2231_no_language_or_charset_in_filename(self):
4195 m = '''\
4196Content-Disposition: inline;
4197\tfilename*0*="''This%20is%20even%20more%20";
4198\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4199\tfilename*2="is it not.pdf"
4200
4201'''
4202 msg = email.message_from_string(m)
4203 self.assertEqual(msg.get_filename(),
4204 'This is even more ***fun*** is it not.pdf')
4205
4206 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4207 m = '''\
4208Content-Disposition: inline;
4209\tfilename*0*="''This%20is%20even%20more%20";
4210\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4211\tfilename*2="is it not.pdf"
4212
4213'''
4214 msg = email.message_from_string(m)
4215 self.assertEqual(msg.get_filename(),
4216 'This is even more ***fun*** is it not.pdf')
4217
4218 def test_rfc2231_partly_encoded(self):
4219 m = '''\
4220Content-Disposition: inline;
4221\tfilename*0="''This%20is%20even%20more%20";
4222\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4223\tfilename*2="is it not.pdf"
4224
4225'''
4226 msg = email.message_from_string(m)
4227 self.assertEqual(
4228 msg.get_filename(),
4229 'This%20is%20even%20more%20***fun*** is it not.pdf')
4230
4231 def test_rfc2231_partly_nonencoded(self):
4232 m = '''\
4233Content-Disposition: inline;
4234\tfilename*0="This%20is%20even%20more%20";
4235\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4236\tfilename*2="is it not.pdf"
4237
4238'''
4239 msg = email.message_from_string(m)
4240 self.assertEqual(
4241 msg.get_filename(),
4242 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4243
4244 def test_rfc2231_no_language_or_charset_in_boundary(self):
4245 m = '''\
4246Content-Type: multipart/alternative;
4247\tboundary*0*="''This%20is%20even%20more%20";
4248\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4249\tboundary*2="is it not.pdf"
4250
4251'''
4252 msg = email.message_from_string(m)
4253 self.assertEqual(msg.get_boundary(),
4254 'This is even more ***fun*** is it not.pdf')
4255
4256 def test_rfc2231_no_language_or_charset_in_charset(self):
4257 # This is a nonsensical charset value, but tests the code anyway
4258 m = '''\
4259Content-Type: text/plain;
4260\tcharset*0*="This%20is%20even%20more%20";
4261\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4262\tcharset*2="is it not.pdf"
4263
4264'''
4265 msg = email.message_from_string(m)
4266 self.assertEqual(msg.get_content_charset(),
4267 'this is even more ***fun*** is it not.pdf')
4268
4269 def test_rfc2231_bad_encoding_in_filename(self):
4270 m = '''\
4271Content-Disposition: inline;
4272\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4273\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4274\tfilename*2="is it not.pdf"
4275
4276'''
4277 msg = email.message_from_string(m)
4278 self.assertEqual(msg.get_filename(),
4279 'This is even more ***fun*** is it not.pdf')
4280
4281 def test_rfc2231_bad_encoding_in_charset(self):
4282 m = """\
4283Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4284
4285"""
4286 msg = email.message_from_string(m)
4287 # This should return None because non-ascii characters in the charset
4288 # are not allowed.
4289 self.assertEqual(msg.get_content_charset(), None)
4290
4291 def test_rfc2231_bad_character_in_charset(self):
4292 m = """\
4293Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4294
4295"""
4296 msg = email.message_from_string(m)
4297 # This should return None because non-ascii characters in the charset
4298 # are not allowed.
4299 self.assertEqual(msg.get_content_charset(), None)
4300
4301 def test_rfc2231_bad_character_in_filename(self):
4302 m = '''\
4303Content-Disposition: inline;
4304\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4305\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4306\tfilename*2*="is it not.pdf%E2"
4307
4308'''
4309 msg = email.message_from_string(m)
4310 self.assertEqual(msg.get_filename(),
4311 'This is even more ***fun*** is it not.pdf\ufffd')
4312
4313 def test_rfc2231_unknown_encoding(self):
4314 m = """\
4315Content-Transfer-Encoding: 8bit
4316Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4317
4318"""
4319 msg = email.message_from_string(m)
4320 self.assertEqual(msg.get_filename(), 'myfile.txt')
4321
4322 def test_rfc2231_single_tick_in_filename_extended(self):
4323 eq = self.assertEqual
4324 m = """\
4325Content-Type: application/x-foo;
4326\tname*0*=\"Frank's\"; name*1*=\" Document\"
4327
4328"""
4329 msg = email.message_from_string(m)
4330 charset, language, s = msg.get_param('name')
4331 eq(charset, None)
4332 eq(language, None)
4333 eq(s, "Frank's Document")
4334
4335 def test_rfc2231_single_tick_in_filename(self):
4336 m = """\
4337Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4338
4339"""
4340 msg = email.message_from_string(m)
4341 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004342 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004343 self.assertEqual(param, "Frank's Document")
4344
4345 def test_rfc2231_tick_attack_extended(self):
4346 eq = self.assertEqual
4347 m = """\
4348Content-Type: application/x-foo;
4349\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4350
4351"""
4352 msg = email.message_from_string(m)
4353 charset, language, s = msg.get_param('name')
4354 eq(charset, 'us-ascii')
4355 eq(language, 'en-us')
4356 eq(s, "Frank's Document")
4357
4358 def test_rfc2231_tick_attack(self):
4359 m = """\
4360Content-Type: application/x-foo;
4361\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4362
4363"""
4364 msg = email.message_from_string(m)
4365 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004366 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004367 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4368
4369 def test_rfc2231_no_extended_values(self):
4370 eq = self.assertEqual
4371 m = """\
4372Content-Type: application/x-foo; name=\"Frank's Document\"
4373
4374"""
4375 msg = email.message_from_string(m)
4376 eq(msg.get_param('name'), "Frank's Document")
4377
4378 def test_rfc2231_encoded_then_unencoded_segments(self):
4379 eq = self.assertEqual
4380 m = """\
4381Content-Type: application/x-foo;
4382\tname*0*=\"us-ascii'en-us'My\";
4383\tname*1=\" Document\";
4384\tname*2*=\" For You\"
4385
4386"""
4387 msg = email.message_from_string(m)
4388 charset, language, s = msg.get_param('name')
4389 eq(charset, 'us-ascii')
4390 eq(language, 'en-us')
4391 eq(s, 'My Document For You')
4392
4393 def test_rfc2231_unencoded_then_encoded_segments(self):
4394 eq = self.assertEqual
4395 m = """\
4396Content-Type: application/x-foo;
4397\tname*0=\"us-ascii'en-us'My\";
4398\tname*1*=\" Document\";
4399\tname*2*=\" For You\"
4400
4401"""
4402 msg = email.message_from_string(m)
4403 charset, language, s = msg.get_param('name')
4404 eq(charset, 'us-ascii')
4405 eq(language, 'en-us')
4406 eq(s, 'My Document For You')
4407
4408
Ezio Melottib3aedd42010-11-20 19:04:17 +00004409
R. David Murraya8f480f2010-01-16 18:30:03 +00004410# Tests to ensure that signed parts of an email are completely preserved, as
4411# required by RFC1847 section 2.1. Note that these are incomplete, because the
4412# email package does not currently always preserve the body. See issue 1670765.
4413class TestSigned(TestEmailBase):
4414
4415 def _msg_and_obj(self, filename):
4416 with openfile(findfile(filename)) as fp:
4417 original = fp.read()
4418 msg = email.message_from_string(original)
4419 return original, msg
4420
4421 def _signed_parts_eq(self, original, result):
4422 # Extract the first mime part of each message
4423 import re
4424 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4425 inpart = repart.search(original).group(2)
4426 outpart = repart.search(result).group(2)
4427 self.assertEqual(outpart, inpart)
4428
4429 def test_long_headers_as_string(self):
4430 original, msg = self._msg_and_obj('msg_45.txt')
4431 result = msg.as_string()
4432 self._signed_parts_eq(original, result)
4433
4434 def test_long_headers_as_string_maxheaderlen(self):
4435 original, msg = self._msg_and_obj('msg_45.txt')
4436 result = msg.as_string(maxheaderlen=60)
4437 self._signed_parts_eq(original, result)
4438
4439 def test_long_headers_flatten(self):
4440 original, msg = self._msg_and_obj('msg_45.txt')
4441 fp = StringIO()
4442 Generator(fp).flatten(msg)
4443 result = fp.getvalue()
4444 self._signed_parts_eq(original, result)
4445
4446
Ezio Melottib3aedd42010-11-20 19:04:17 +00004447
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004448def _testclasses():
4449 mod = sys.modules[__name__]
4450 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4451
4452
4453def suite():
4454 suite = unittest.TestSuite()
4455 for testclass in _testclasses():
4456 suite.addTest(unittest.makeSuite(testclass))
4457 return suite
4458
4459
4460def test_main():
4461 for testclass in _testclasses():
4462 run_unittest(testclass)
4463
4464
Ezio Melottib3aedd42010-11-20 19:04:17 +00004465
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004466if __name__ == '__main__':
4467 unittest.main(defaultTest='suite')