blob: 5222baba419b9254d76c5e773e43436c2d60bedb [file] [log] [blame]
Benjamin Petersonffeda292010-01-09 18:48:46 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000046
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000052
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Georg Brandlab91fde2009-08-13 08:51:18 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000070
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murray27c19142010-12-21 18:11:01 +0000181 def test_make_boundary(self):
182 msg = MIMEMultipart('form-data')
183 # Note that when the boundary gets created is an implementation
184 # detail and might change.
185 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
186 # Trigger creation of boundary
187 msg.as_string()
188 self.assertEqual(msg.items()[0][1][:33],
189 'multipart/form-data; boundary="==')
190 # XXX: there ought to be tests of the uniqueness of the boundary, too.
191
R. David Murrayd0a04ff2010-02-21 04:48:18 +0000192 def test_message_rfc822_only(self):
193 # Issue 7970: message/rfc822 not in multipart parsed by
194 # HeaderParser caused an exception when flattened.
195 fp = openfile(findfile('msg_46.txt'))
196 msgdata = fp.read()
197 parser = HeaderParser()
198 msg = parser.parsestr(msgdata)
199 out = StringIO()
200 gen = Generator(out, True, 0)
201 gen.flatten(msg, False)
202 self.assertEqual(out.getvalue(), msgdata)
203
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000204 def test_get_decoded_payload(self):
205 eq = self.assertEqual
206 msg = self._msgobj('msg_10.txt')
207 # The outer message is a multipart
208 eq(msg.get_payload(decode=True), None)
209 # Subpart 1 is 7bit encoded
210 eq(msg.get_payload(0).get_payload(decode=True),
211 b'This is a 7bit encoded message.\n')
212 # Subpart 2 is quopri
213 eq(msg.get_payload(1).get_payload(decode=True),
214 b'\xa1This is a Quoted Printable encoded message!\n')
215 # Subpart 3 is base64
216 eq(msg.get_payload(2).get_payload(decode=True),
217 b'This is a Base64 encoded message.')
218 # Subpart 4 has no Content-Transfer-Encoding: header.
219 eq(msg.get_payload(3).get_payload(decode=True),
220 b'This has no Content-Transfer-Encoding: header.\n')
221
222 def test_get_decoded_uu_payload(self):
223 eq = self.assertEqual
224 msg = Message()
225 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
226 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
227 msg['content-transfer-encoding'] = cte
228 eq(msg.get_payload(decode=True), b'hello world')
229 # Now try some bogus data
230 msg.set_payload('foo')
231 eq(msg.get_payload(decode=True), b'foo')
232
233 def test_decoded_generator(self):
234 eq = self.assertEqual
235 msg = self._msgobj('msg_07.txt')
236 with openfile('msg_17.txt') as fp:
237 text = fp.read()
238 s = StringIO()
239 g = DecodedGenerator(s)
240 g.flatten(msg)
241 eq(s.getvalue(), text)
242
243 def test__contains__(self):
244 msg = Message()
245 msg['From'] = 'Me'
246 msg['to'] = 'You'
247 # Check for case insensitivity
Georg Brandlab91fde2009-08-13 08:51:18 +0000248 self.assertTrue('from' in msg)
249 self.assertTrue('From' in msg)
250 self.assertTrue('FROM' in msg)
251 self.assertTrue('to' in msg)
252 self.assertTrue('To' in msg)
253 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000254
255 def test_as_string(self):
256 eq = self.ndiffAssertEqual
257 msg = self._msgobj('msg_01.txt')
258 with openfile('msg_01.txt') as fp:
259 text = fp.read()
260 eq(text, str(msg))
261 fullrepr = msg.as_string(unixfrom=True)
262 lines = fullrepr.split('\n')
Georg Brandlab91fde2009-08-13 08:51:18 +0000263 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264 eq(text, NL.join(lines[1:]))
265
266 def test_bad_param(self):
267 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
268 self.assertEqual(msg.get_param('baz'), '')
269
270 def test_missing_filename(self):
271 msg = email.message_from_string("From: foo\n")
272 self.assertEqual(msg.get_filename(), None)
273
274 def test_bogus_filename(self):
275 msg = email.message_from_string(
276 "Content-Disposition: blarg; filename\n")
277 self.assertEqual(msg.get_filename(), '')
278
279 def test_missing_boundary(self):
280 msg = email.message_from_string("From: foo\n")
281 self.assertEqual(msg.get_boundary(), None)
282
283 def test_get_params(self):
284 eq = self.assertEqual
285 msg = email.message_from_string(
286 'X-Header: foo=one; bar=two; baz=three\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
289 msg = email.message_from_string(
290 'X-Header: foo; bar=one; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293 eq(msg.get_params(), None)
294 msg = email.message_from_string(
295 'X-Header: foo; bar="one"; baz=two\n')
296 eq(msg.get_params(header='x-header'),
297 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
298
299 def test_get_param_liberal(self):
300 msg = Message()
301 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
302 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
303
304 def test_get_param(self):
305 eq = self.assertEqual
306 msg = email.message_from_string(
307 "X-Header: foo=one; bar=two; baz=three\n")
308 eq(msg.get_param('bar', header='x-header'), 'two')
309 eq(msg.get_param('quuz', header='x-header'), None)
310 eq(msg.get_param('quuz'), None)
311 msg = email.message_from_string(
312 'X-Header: foo; bar="one"; baz=two\n')
313 eq(msg.get_param('foo', header='x-header'), '')
314 eq(msg.get_param('bar', header='x-header'), 'one')
315 eq(msg.get_param('baz', header='x-header'), 'two')
316 # XXX: We are not RFC-2045 compliant! We cannot parse:
317 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
318 # msg.get_param("weird")
319 # yet.
320
321 def test_get_param_funky_continuation_lines(self):
322 msg = self._msgobj('msg_22.txt')
323 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
324
325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R. David Murray84ee3102010-04-14 19:05:38 +0000332 def test_get_param_with_quotes(self):
333 msg = email.message_from_string(
334 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
335 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
336 msg = email.message_from_string(
337 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
338 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
339
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000340 def test_field_containment(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000341 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342 msg = email.message_from_string('Header: exists')
343 unless('header' in msg)
344 unless('Header' in msg)
345 unless('HEADER' in msg)
Georg Brandlab91fde2009-08-13 08:51:18 +0000346 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347
348 def test_set_param(self):
349 eq = self.assertEqual
350 msg = Message()
351 msg.set_param('charset', 'iso-2022-jp')
352 eq(msg.get_param('charset'), 'iso-2022-jp')
353 msg.set_param('importance', 'high value')
354 eq(msg.get_param('importance'), 'high value')
355 eq(msg.get_param('importance', unquote=False), '"high value"')
356 eq(msg.get_params(), [('text/plain', ''),
357 ('charset', 'iso-2022-jp'),
358 ('importance', 'high value')])
359 eq(msg.get_params(unquote=False), [('text/plain', ''),
360 ('charset', '"iso-2022-jp"'),
361 ('importance', '"high value"')])
362 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
363 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
364
365 def test_del_param(self):
366 eq = self.assertEqual
367 msg = self._msgobj('msg_05.txt')
368 eq(msg.get_params(),
369 [('multipart/report', ''), ('report-type', 'delivery-status'),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 old_val = msg.get_param("report-type")
372 msg.del_param("report-type")
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
376 msg.set_param("report-type", old_val)
377 eq(msg.get_params(),
378 [('multipart/report', ''),
379 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
380 ('report-type', old_val)])
381
382 def test_del_param_on_other_header(self):
383 msg = Message()
384 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
385 msg.del_param('filename', 'content-disposition')
386 self.assertEqual(msg['content-disposition'], 'attachment')
387
388 def test_set_type(self):
389 eq = self.assertEqual
390 msg = Message()
391 self.assertRaises(ValueError, msg.set_type, 'text')
392 msg.set_type('text/plain')
393 eq(msg['content-type'], 'text/plain')
394 msg.set_param('charset', 'us-ascii')
395 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
396 msg.set_type('text/html')
397 eq(msg['content-type'], 'text/html; charset="us-ascii"')
398
399 def test_set_type_on_other_header(self):
400 msg = Message()
401 msg['X-Content-Type'] = 'text/plain'
402 msg.set_type('application/octet-stream', 'X-Content-Type')
403 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
404
405 def test_get_content_type_missing(self):
406 msg = Message()
407 self.assertEqual(msg.get_content_type(), 'text/plain')
408
409 def test_get_content_type_missing_with_default_type(self):
410 msg = Message()
411 msg.set_default_type('message/rfc822')
412 self.assertEqual(msg.get_content_type(), 'message/rfc822')
413
414 def test_get_content_type_from_message_implicit(self):
415 msg = self._msgobj('msg_30.txt')
416 self.assertEqual(msg.get_payload(0).get_content_type(),
417 'message/rfc822')
418
419 def test_get_content_type_from_message_explicit(self):
420 msg = self._msgobj('msg_28.txt')
421 self.assertEqual(msg.get_payload(0).get_content_type(),
422 'message/rfc822')
423
424 def test_get_content_type_from_message_text_plain_implicit(self):
425 msg = self._msgobj('msg_03.txt')
426 self.assertEqual(msg.get_content_type(), 'text/plain')
427
428 def test_get_content_type_from_message_text_plain_explicit(self):
429 msg = self._msgobj('msg_01.txt')
430 self.assertEqual(msg.get_content_type(), 'text/plain')
431
432 def test_get_content_maintype_missing(self):
433 msg = Message()
434 self.assertEqual(msg.get_content_maintype(), 'text')
435
436 def test_get_content_maintype_missing_with_default_type(self):
437 msg = Message()
438 msg.set_default_type('message/rfc822')
439 self.assertEqual(msg.get_content_maintype(), 'message')
440
441 def test_get_content_maintype_from_message_implicit(self):
442 msg = self._msgobj('msg_30.txt')
443 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
444
445 def test_get_content_maintype_from_message_explicit(self):
446 msg = self._msgobj('msg_28.txt')
447 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
448
449 def test_get_content_maintype_from_message_text_plain_implicit(self):
450 msg = self._msgobj('msg_03.txt')
451 self.assertEqual(msg.get_content_maintype(), 'text')
452
453 def test_get_content_maintype_from_message_text_plain_explicit(self):
454 msg = self._msgobj('msg_01.txt')
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_subtype_missing(self):
458 msg = Message()
459 self.assertEqual(msg.get_content_subtype(), 'plain')
460
461 def test_get_content_subtype_missing_with_default_type(self):
462 msg = Message()
463 msg.set_default_type('message/rfc822')
464 self.assertEqual(msg.get_content_subtype(), 'rfc822')
465
466 def test_get_content_subtype_from_message_implicit(self):
467 msg = self._msgobj('msg_30.txt')
468 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
469
470 def test_get_content_subtype_from_message_explicit(self):
471 msg = self._msgobj('msg_28.txt')
472 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
473
474 def test_get_content_subtype_from_message_text_plain_implicit(self):
475 msg = self._msgobj('msg_03.txt')
476 self.assertEqual(msg.get_content_subtype(), 'plain')
477
478 def test_get_content_subtype_from_message_text_plain_explicit(self):
479 msg = self._msgobj('msg_01.txt')
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_maintype_error(self):
483 msg = Message()
484 msg['Content-Type'] = 'no-slash-in-this-string'
485 self.assertEqual(msg.get_content_maintype(), 'text')
486
487 def test_get_content_subtype_error(self):
488 msg = Message()
489 msg['Content-Type'] = 'no-slash-in-this-string'
490 self.assertEqual(msg.get_content_subtype(), 'plain')
491
492 def test_replace_header(self):
493 eq = self.assertEqual
494 msg = Message()
495 msg.add_header('First', 'One')
496 msg.add_header('Second', 'Two')
497 msg.add_header('Third', 'Three')
498 eq(msg.keys(), ['First', 'Second', 'Third'])
499 eq(msg.values(), ['One', 'Two', 'Three'])
500 msg.replace_header('Second', 'Twenty')
501 eq(msg.keys(), ['First', 'Second', 'Third'])
502 eq(msg.values(), ['One', 'Twenty', 'Three'])
503 msg.add_header('First', 'Eleven')
504 msg.replace_header('First', 'One Hundred')
505 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
506 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
507 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
508
509 def test_broken_base64_payload(self):
510 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
511 msg = Message()
512 msg['content-type'] = 'audio/x-midi'
513 msg['content-transfer-encoding'] = 'base64'
514 msg.set_payload(x)
515 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000516 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000517
R. David Murrayccb9d052010-12-13 23:57:01 +0000518 # Issue 1078919
519 def test_ascii_add_header(self):
520 msg = Message()
521 msg.add_header('Content-Disposition', 'attachment',
522 filename='bud.gif')
523 self.assertEqual('attachment; filename="bud.gif"',
524 msg['Content-Disposition'])
525
526 def test_noascii_add_header(self):
527 msg = Message()
528 msg.add_header('Content-Disposition', 'attachment',
529 filename="Fußballer.ppt")
530 self.assertEqual(
531 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
532 msg['Content-Disposition'])
533
534 def test_nonascii_add_header_via_triple(self):
535 msg = Message()
536 msg.add_header('Content-Disposition', 'attachment',
537 filename=('iso-8859-1', '', 'Fußballer.ppt'))
538 self.assertEqual(
539 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
540 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000541
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000542
R. David Murray389af002011-01-09 02:48:04 +0000543 # Issue 5871: reject an attempt to embed a header inside a header value
544 # (header injection attack).
545 def test_embeded_header_via_Header_rejected(self):
546 msg = Message()
547 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
548 self.assertRaises(errors.HeaderParseError, msg.as_string)
549
550 def test_embeded_header_via_string_rejected(self):
551 msg = Message()
552 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
553 self.assertRaises(errors.HeaderParseError, msg.as_string)
554
555
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000556# Test the email.encoders module
557class TestEncoders(unittest.TestCase):
558 def test_encode_empty_payload(self):
559 eq = self.assertEqual
560 msg = Message()
561 msg.set_charset('us-ascii')
562 eq(msg['content-transfer-encoding'], '7bit')
563
564 def test_default_cte(self):
565 eq = self.assertEqual
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000566 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000567 msg = MIMEText('hello world')
568 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000569 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000570 msg = MIMEText('hello \xf8 world')
571 eq(msg['content-transfer-encoding'], '8bit')
572 # And now with a different charset
573 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
574 eq(msg['content-transfer-encoding'], 'quoted-printable')
575
R. David Murrayf870d872010-05-06 01:53:03 +0000576 def test_encode7or8bit(self):
577 # Make sure a charset whose input character set is 8bit but
578 # whose output character set is 7bit gets a transfer-encoding
579 # of 7bit.
580 eq = self.assertEqual
R. David Murrayd2d08c62010-06-03 02:05:47 +0000581 msg = MIMEText('æ–‡', _charset='euc-jp')
R. David Murrayf870d872010-05-06 01:53:03 +0000582 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000583
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000584
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000585# Test long header wrapping
586class TestLongHeaders(TestEmailBase):
587 def test_split_long_continuation(self):
588 eq = self.ndiffAssertEqual
589 msg = email.message_from_string("""\
590Subject: bug demonstration
591\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
592\tmore text
593
594test
595""")
596 sfp = StringIO()
597 g = Generator(sfp)
598 g.flatten(msg)
599 eq(sfp.getvalue(), """\
600Subject: bug demonstration
601\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
602\tmore text
603
604test
605""")
606
607 def test_another_long_almost_unsplittable_header(self):
608 eq = self.ndiffAssertEqual
609 hstr = """\
610bug demonstration
611\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
612\tmore text"""
613 h = Header(hstr, continuation_ws='\t')
614 eq(h.encode(), """\
615bug demonstration
616\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
617\tmore text""")
618 h = Header(hstr.replace('\t', ' '))
619 eq(h.encode(), """\
620bug demonstration
621 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
622 more text""")
623
624 def test_long_nonstring(self):
625 eq = self.ndiffAssertEqual
626 g = Charset("iso-8859-1")
627 cz = Charset("iso-8859-2")
628 utf8 = Charset("utf-8")
629 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
630 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
631 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
632 b'bef\xf6rdert. ')
633 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
634 b'd\xf9vtipu.. ')
635 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
636 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
637 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
638 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
639 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
640 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
641 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
642 '\u3044\u307e\u3059\u3002')
643 h = Header(g_head, g, header_name='Subject')
644 h.append(cz_head, cz)
645 h.append(utf8_head, utf8)
646 msg = Message()
647 msg['Subject'] = h
648 sfp = StringIO()
649 g = Generator(sfp)
650 g.flatten(msg)
651 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000652Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
653 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
654 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
655 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
656 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
657 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
658 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
659 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
660 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
661 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
662 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663
664""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000665 eq(h.encode(maxlinelen=76), """\
666=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
667 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
668 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
669 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
670 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
671 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
672 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
673 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
674 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
675 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
676 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000677
678 def test_long_header_encode(self):
679 eq = self.ndiffAssertEqual
680 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
681 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
682 header_name='X-Foobar-Spoink-Defrobnit')
683 eq(h.encode(), '''\
684wasnipoop; giraffes="very-long-necked-animals";
685 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
686
687 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
688 eq = self.ndiffAssertEqual
689 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
690 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
691 header_name='X-Foobar-Spoink-Defrobnit',
692 continuation_ws='\t')
693 eq(h.encode(), '''\
694wasnipoop; giraffes="very-long-necked-animals";
695 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
696
697 def test_long_header_encode_with_tab_continuation(self):
698 eq = self.ndiffAssertEqual
699 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
700 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
701 header_name='X-Foobar-Spoink-Defrobnit',
702 continuation_ws='\t')
703 eq(h.encode(), '''\
704wasnipoop; giraffes="very-long-necked-animals";
705\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
706
707 def test_header_splitter(self):
708 eq = self.ndiffAssertEqual
709 msg = MIMEText('')
710 # It'd be great if we could use add_header() here, but that doesn't
711 # guarantee an order of the parameters.
712 msg['X-Foobar-Spoink-Defrobnit'] = (
713 'wasnipoop; giraffes="very-long-necked-animals"; '
714 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
715 sfp = StringIO()
716 g = Generator(sfp)
717 g.flatten(msg)
718 eq(sfp.getvalue(), '''\
719Content-Type: text/plain; charset="us-ascii"
720MIME-Version: 1.0
721Content-Transfer-Encoding: 7bit
722X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
723 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
724
725''')
726
727 def test_no_semis_header_splitter(self):
728 eq = self.ndiffAssertEqual
729 msg = Message()
730 msg['From'] = 'test@dom.ain'
731 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
732 msg.set_payload('Test')
733 sfp = StringIO()
734 g = Generator(sfp)
735 g.flatten(msg)
736 eq(sfp.getvalue(), """\
737From: test@dom.ain
738References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
739 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
740
741Test""")
742
743 def test_no_split_long_header(self):
744 eq = self.ndiffAssertEqual
745 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000746 h = Header(hstr)
747 # These come on two lines because Headers are really field value
748 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000749 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000750References:
751 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
752 h = Header('x' * 80)
753 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000754
755 def test_splitting_multiple_long_lines(self):
756 eq = self.ndiffAssertEqual
757 hstr = """\
758from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
759\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
760\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
761"""
762 h = Header(hstr, continuation_ws='\t')
763 eq(h.encode(), """\
764from babylon.socal-raves.org (localhost [127.0.0.1]);
765 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
766 for <mailman-admin@babylon.socal-raves.org>;
767 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
768\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
769 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
770 for <mailman-admin@babylon.socal-raves.org>;
771 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
772\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
773 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
774 for <mailman-admin@babylon.socal-raves.org>;
775 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
776
777 def test_splitting_first_line_only_is_long(self):
778 eq = self.ndiffAssertEqual
779 hstr = """\
780from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
781\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
782\tid 17k4h5-00034i-00
783\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
784 h = Header(hstr, maxlinelen=78, header_name='Received',
785 continuation_ws='\t')
786 eq(h.encode(), """\
787from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
788 helo=cthulhu.gerg.ca)
789\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
790\tid 17k4h5-00034i-00
791\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
792
793 def test_long_8bit_header(self):
794 eq = self.ndiffAssertEqual
795 msg = Message()
796 h = Header('Britische Regierung gibt', 'iso-8859-1',
797 header_name='Subject')
798 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000799 eq(h.encode(maxlinelen=76), """\
800=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
801 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000802 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000803 eq(msg.as_string(maxheaderlen=76), """\
804Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
805 =?iso-8859-1?q?hore-Windkraftprojekte?=
806
807""")
808 eq(msg.as_string(maxheaderlen=0), """\
809Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000810
811""")
812
813 def test_long_8bit_header_no_charset(self):
814 eq = self.ndiffAssertEqual
815 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000816 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
817 'f\xfcr Offshore-Windkraftprojekte '
818 '<a-very-long-address@example.com>')
819 msg['Reply-To'] = header_string
820 self.assertRaises(UnicodeEncodeError, msg.as_string)
821 msg = Message()
822 msg['Reply-To'] = Header(header_string, 'utf-8',
823 header_name='Reply-To')
824 eq(msg.as_string(maxheaderlen=78), """\
825Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
826 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000827
828""")
829
830 def test_long_to_header(self):
831 eq = self.ndiffAssertEqual
832 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
833 '<someone@eecs.umich.edu>,'
834 '"Someone Test #B" <someone@umich.edu>, '
835 '"Someone Test #C" <someone@eecs.umich.edu>, '
836 '"Someone Test #D" <someone@eecs.umich.edu>')
837 msg = Message()
838 msg['To'] = to
839 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000840To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000841 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000842 "Someone Test #C" <someone@eecs.umich.edu>,
843 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000844
845''')
846
847 def test_long_line_after_append(self):
848 eq = self.ndiffAssertEqual
849 s = 'This is an example of string which has almost the limit of header length.'
850 h = Header(s)
851 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000852 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000853This is an example of string which has almost the limit of header length.
854 Add another line.""")
855
856 def test_shorter_line_with_append(self):
857 eq = self.ndiffAssertEqual
858 s = 'This is a shorter line.'
859 h = Header(s)
860 h.append('Add another sentence. (Surprise?)')
861 eq(h.encode(),
862 'This is a shorter line. Add another sentence. (Surprise?)')
863
864 def test_long_field_name(self):
865 eq = self.ndiffAssertEqual
866 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000867 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
868 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
869 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
870 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871 h = Header(gs, 'iso-8859-1', header_name=fn)
872 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000873 eq(h.encode(maxlinelen=76), """\
874=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
875 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
876 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
877 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000878
879 def test_long_received_header(self):
880 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
881 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
882 'Wed, 05 Mar 2003 18:10:18 -0700')
883 msg = Message()
884 msg['Received-1'] = Header(h, continuation_ws='\t')
885 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000886 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000887 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000888Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
889 Wed, 05 Mar 2003 18:10:18 -0700
890Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
891 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892
893""")
894
895 def test_string_headerinst_eq(self):
896 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
897 'tu-muenchen.de> (David Bremner\'s message of '
898 '"Thu, 6 Mar 2003 13:58:21 +0100")')
899 msg = Message()
900 msg['Received-1'] = Header(h, header_name='Received-1',
901 continuation_ws='\t')
902 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000903 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000904 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000905Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
906 6 Mar 2003 13:58:21 +0100\")
907Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
908 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000909
910""")
911
912 def test_long_unbreakable_lines_with_continuation(self):
913 eq = self.ndiffAssertEqual
914 msg = Message()
915 t = """\
916iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
917 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
918 msg['Face-1'] = t
919 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000920 # XXX This splitting is all wrong. It the first value line should be
921 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000922 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000923Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000924 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000925 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000926Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000927 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000928 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
929
930""")
931
932 def test_another_long_multiline_header(self):
933 eq = self.ndiffAssertEqual
934 m = ('Received: from siimage.com '
935 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000936 'Microsoft SMTPSVC(5.0.2195.4905); '
937 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000938 msg = email.message_from_string(m)
939 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000940Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
941 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000942
943''')
944
945 def test_long_lines_with_different_header(self):
946 eq = self.ndiffAssertEqual
947 h = ('List-Unsubscribe: '
948 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
949 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
950 '?subject=unsubscribe>')
951 msg = Message()
952 msg['List'] = h
953 msg['List'] = Header(h, header_name='List')
954 eq(msg.as_string(maxheaderlen=78), """\
955List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000956 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000957List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000958 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000959
960""")
961
962
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000963
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000964# Test mangling of "From " lines in the body of a message
965class TestFromMangling(unittest.TestCase):
966 def setUp(self):
967 self.msg = Message()
968 self.msg['From'] = 'aaa@bbb.org'
969 self.msg.set_payload("""\
970From the desk of A.A.A.:
971Blah blah blah
972""")
973
974 def test_mangled_from(self):
975 s = StringIO()
976 g = Generator(s, mangle_from_=True)
977 g.flatten(self.msg)
978 self.assertEqual(s.getvalue(), """\
979From: aaa@bbb.org
980
981>From the desk of A.A.A.:
982Blah blah blah
983""")
984
985 def test_dont_mangle_from(self):
986 s = StringIO()
987 g = Generator(s, mangle_from_=False)
988 g.flatten(self.msg)
989 self.assertEqual(s.getvalue(), """\
990From: aaa@bbb.org
991
992From the desk of A.A.A.:
993Blah blah blah
994""")
995
996
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000997
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000998# Test the basic MIMEAudio class
999class TestMIMEAudio(unittest.TestCase):
1000 def setUp(self):
1001 # Make sure we pick up the audiotest.au that lives in email/test/data.
1002 # In Python, there's an audiotest.au living in Lib/test but that isn't
1003 # included in some binary distros that don't include the test
1004 # package. The trailing empty string on the .join() is significant
1005 # since findfile() will do a dirname().
1006 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1007 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1008 self._audiodata = fp.read()
1009 self._au = MIMEAudio(self._audiodata)
1010
1011 def test_guess_minor_type(self):
1012 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1013
1014 def test_encoding(self):
1015 payload = self._au.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +00001016 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1017 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001018
1019 def test_checkSetMinor(self):
1020 au = MIMEAudio(self._audiodata, 'fish')
1021 self.assertEqual(au.get_content_type(), 'audio/fish')
1022
1023 def test_add_header(self):
1024 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001025 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001026 self._au.add_header('Content-Disposition', 'attachment',
1027 filename='audiotest.au')
1028 eq(self._au['content-disposition'],
1029 'attachment; filename="audiotest.au"')
1030 eq(self._au.get_params(header='content-disposition'),
1031 [('attachment', ''), ('filename', 'audiotest.au')])
1032 eq(self._au.get_param('filename', header='content-disposition'),
1033 'audiotest.au')
1034 missing = []
1035 eq(self._au.get_param('attachment', header='content-disposition'), '')
1036 unless(self._au.get_param('foo', failobj=missing,
1037 header='content-disposition') is missing)
1038 # Try some missing stuff
1039 unless(self._au.get_param('foobar', missing) is missing)
1040 unless(self._au.get_param('attachment', missing,
1041 header='foobar') is missing)
1042
1043
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001044
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001045# Test the basic MIMEImage class
1046class TestMIMEImage(unittest.TestCase):
1047 def setUp(self):
1048 with openfile('PyBanner048.gif', 'rb') as fp:
1049 self._imgdata = fp.read()
1050 self._im = MIMEImage(self._imgdata)
1051
1052 def test_guess_minor_type(self):
1053 self.assertEqual(self._im.get_content_type(), 'image/gif')
1054
1055 def test_encoding(self):
1056 payload = self._im.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +00001057 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1058 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001059
1060 def test_checkSetMinor(self):
1061 im = MIMEImage(self._imgdata, 'fish')
1062 self.assertEqual(im.get_content_type(), 'image/fish')
1063
1064 def test_add_header(self):
1065 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001066 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001067 self._im.add_header('Content-Disposition', 'attachment',
1068 filename='dingusfish.gif')
1069 eq(self._im['content-disposition'],
1070 'attachment; filename="dingusfish.gif"')
1071 eq(self._im.get_params(header='content-disposition'),
1072 [('attachment', ''), ('filename', 'dingusfish.gif')])
1073 eq(self._im.get_param('filename', header='content-disposition'),
1074 'dingusfish.gif')
1075 missing = []
1076 eq(self._im.get_param('attachment', header='content-disposition'), '')
1077 unless(self._im.get_param('foo', failobj=missing,
1078 header='content-disposition') is missing)
1079 # Try some missing stuff
1080 unless(self._im.get_param('foobar', missing) is missing)
1081 unless(self._im.get_param('attachment', missing,
1082 header='foobar') is missing)
1083
1084
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001085
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001086# Test the basic MIMEApplication class
1087class TestMIMEApplication(unittest.TestCase):
1088 def test_headers(self):
1089 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001090 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001091 eq(msg.get_content_type(), 'application/octet-stream')
1092 eq(msg['content-transfer-encoding'], 'base64')
1093
1094 def test_body(self):
1095 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001096 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001097 msg = MIMEApplication(bytes)
R. David Murray99147c42010-06-04 16:15:34 +00001098 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001099 eq(msg.get_payload(decode=True), bytes)
1100
1101
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001102
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001103# Test the basic MIMEText class
1104class TestMIMEText(unittest.TestCase):
1105 def setUp(self):
1106 self._msg = MIMEText('hello there')
1107
1108 def test_types(self):
1109 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001110 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111 eq(self._msg.get_content_type(), 'text/plain')
1112 eq(self._msg.get_param('charset'), 'us-ascii')
1113 missing = []
1114 unless(self._msg.get_param('foobar', missing) is missing)
1115 unless(self._msg.get_param('charset', missing, header='foobar')
1116 is missing)
1117
1118 def test_payload(self):
1119 self.assertEqual(self._msg.get_payload(), 'hello there')
Georg Brandlab91fde2009-08-13 08:51:18 +00001120 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001121
1122 def test_charset(self):
1123 eq = self.assertEqual
1124 msg = MIMEText('hello there', _charset='us-ascii')
1125 eq(msg.get_charset().input_charset, 'us-ascii')
1126 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1127
R. David Murrayd2d08c62010-06-03 02:05:47 +00001128 def test_7bit_input(self):
1129 eq = self.assertEqual
1130 msg = MIMEText('hello there', _charset='us-ascii')
1131 eq(msg.get_charset().input_charset, 'us-ascii')
1132 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1133
1134 def test_7bit_input_no_charset(self):
1135 eq = self.assertEqual
1136 msg = MIMEText('hello there')
1137 eq(msg.get_charset(), 'us-ascii')
1138 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1139 self.assertTrue('hello there' in msg.as_string())
1140
1141 def test_utf8_input(self):
1142 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1143 eq = self.assertEqual
1144 msg = MIMEText(teststr, _charset='utf-8')
1145 eq(msg.get_charset().output_charset, 'utf-8')
1146 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1147 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1148
1149 @unittest.skip("can't fix because of backward compat in email5, "
1150 "will fix in email6")
1151 def test_utf8_input_no_charset(self):
1152 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1153 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1154
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001156
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001157# Test complicated multipart/* messages
1158class TestMultipart(TestEmailBase):
1159 def setUp(self):
1160 with openfile('PyBanner048.gif', 'rb') as fp:
1161 data = fp.read()
1162 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1163 image = MIMEImage(data, name='dingusfish.gif')
1164 image.add_header('content-disposition', 'attachment',
1165 filename='dingusfish.gif')
1166 intro = MIMEText('''\
1167Hi there,
1168
1169This is the dingus fish.
1170''')
1171 container.attach(intro)
1172 container.attach(image)
1173 container['From'] = 'Barry <barry@digicool.com>'
1174 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1175 container['Subject'] = 'Here is your dingus fish'
1176
1177 now = 987809702.54848599
1178 timetuple = time.localtime(now)
1179 if timetuple[-1] == 0:
1180 tzsecs = time.timezone
1181 else:
1182 tzsecs = time.altzone
1183 if tzsecs > 0:
1184 sign = '-'
1185 else:
1186 sign = '+'
1187 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1188 container['Date'] = time.strftime(
1189 '%a, %d %b %Y %H:%M:%S',
1190 time.localtime(now)) + tzoffset
1191 self._msg = container
1192 self._im = image
1193 self._txt = intro
1194
1195 def test_hierarchy(self):
1196 # convenience
1197 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001198 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001199 raises = self.assertRaises
1200 # tests
1201 m = self._msg
1202 unless(m.is_multipart())
1203 eq(m.get_content_type(), 'multipart/mixed')
1204 eq(len(m.get_payload()), 2)
1205 raises(IndexError, m.get_payload, 2)
1206 m0 = m.get_payload(0)
1207 m1 = m.get_payload(1)
1208 unless(m0 is self._txt)
1209 unless(m1 is self._im)
1210 eq(m.get_payload(), [m0, m1])
1211 unless(not m0.is_multipart())
1212 unless(not m1.is_multipart())
1213
1214 def test_empty_multipart_idempotent(self):
1215 text = """\
1216Content-Type: multipart/mixed; boundary="BOUNDARY"
1217MIME-Version: 1.0
1218Subject: A subject
1219To: aperson@dom.ain
1220From: bperson@dom.ain
1221
1222
1223--BOUNDARY
1224
1225
1226--BOUNDARY--
1227"""
1228 msg = Parser().parsestr(text)
1229 self.ndiffAssertEqual(text, msg.as_string())
1230
1231 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1232 outer = MIMEBase('multipart', 'mixed')
1233 outer['Subject'] = 'A subject'
1234 outer['To'] = 'aperson@dom.ain'
1235 outer['From'] = 'bperson@dom.ain'
1236 outer.set_boundary('BOUNDARY')
1237 self.ndiffAssertEqual(outer.as_string(), '''\
1238Content-Type: multipart/mixed; boundary="BOUNDARY"
1239MIME-Version: 1.0
1240Subject: A subject
1241To: aperson@dom.ain
1242From: bperson@dom.ain
1243
1244--BOUNDARY
1245
1246--BOUNDARY--''')
1247
1248 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1249 outer = MIMEBase('multipart', 'mixed')
1250 outer['Subject'] = 'A subject'
1251 outer['To'] = 'aperson@dom.ain'
1252 outer['From'] = 'bperson@dom.ain'
1253 outer.preamble = ''
1254 outer.epilogue = ''
1255 outer.set_boundary('BOUNDARY')
1256 self.ndiffAssertEqual(outer.as_string(), '''\
1257Content-Type: multipart/mixed; boundary="BOUNDARY"
1258MIME-Version: 1.0
1259Subject: A subject
1260To: aperson@dom.ain
1261From: bperson@dom.ain
1262
1263
1264--BOUNDARY
1265
1266--BOUNDARY--
1267''')
1268
1269 def test_one_part_in_a_multipart(self):
1270 eq = self.ndiffAssertEqual
1271 outer = MIMEBase('multipart', 'mixed')
1272 outer['Subject'] = 'A subject'
1273 outer['To'] = 'aperson@dom.ain'
1274 outer['From'] = 'bperson@dom.ain'
1275 outer.set_boundary('BOUNDARY')
1276 msg = MIMEText('hello world')
1277 outer.attach(msg)
1278 eq(outer.as_string(), '''\
1279Content-Type: multipart/mixed; boundary="BOUNDARY"
1280MIME-Version: 1.0
1281Subject: A subject
1282To: aperson@dom.ain
1283From: bperson@dom.ain
1284
1285--BOUNDARY
1286Content-Type: text/plain; charset="us-ascii"
1287MIME-Version: 1.0
1288Content-Transfer-Encoding: 7bit
1289
1290hello world
1291--BOUNDARY--''')
1292
1293 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1294 eq = self.ndiffAssertEqual
1295 outer = MIMEBase('multipart', 'mixed')
1296 outer['Subject'] = 'A subject'
1297 outer['To'] = 'aperson@dom.ain'
1298 outer['From'] = 'bperson@dom.ain'
1299 outer.preamble = ''
1300 msg = MIMEText('hello world')
1301 outer.attach(msg)
1302 outer.set_boundary('BOUNDARY')
1303 eq(outer.as_string(), '''\
1304Content-Type: multipart/mixed; boundary="BOUNDARY"
1305MIME-Version: 1.0
1306Subject: A subject
1307To: aperson@dom.ain
1308From: bperson@dom.ain
1309
1310
1311--BOUNDARY
1312Content-Type: text/plain; charset="us-ascii"
1313MIME-Version: 1.0
1314Content-Transfer-Encoding: 7bit
1315
1316hello world
1317--BOUNDARY--''')
1318
1319
1320 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1321 eq = self.ndiffAssertEqual
1322 outer = MIMEBase('multipart', 'mixed')
1323 outer['Subject'] = 'A subject'
1324 outer['To'] = 'aperson@dom.ain'
1325 outer['From'] = 'bperson@dom.ain'
1326 outer.preamble = None
1327 msg = MIMEText('hello world')
1328 outer.attach(msg)
1329 outer.set_boundary('BOUNDARY')
1330 eq(outer.as_string(), '''\
1331Content-Type: multipart/mixed; boundary="BOUNDARY"
1332MIME-Version: 1.0
1333Subject: A subject
1334To: aperson@dom.ain
1335From: bperson@dom.ain
1336
1337--BOUNDARY
1338Content-Type: text/plain; charset="us-ascii"
1339MIME-Version: 1.0
1340Content-Transfer-Encoding: 7bit
1341
1342hello world
1343--BOUNDARY--''')
1344
1345
1346 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1347 eq = self.ndiffAssertEqual
1348 outer = MIMEBase('multipart', 'mixed')
1349 outer['Subject'] = 'A subject'
1350 outer['To'] = 'aperson@dom.ain'
1351 outer['From'] = 'bperson@dom.ain'
1352 outer.epilogue = None
1353 msg = MIMEText('hello world')
1354 outer.attach(msg)
1355 outer.set_boundary('BOUNDARY')
1356 eq(outer.as_string(), '''\
1357Content-Type: multipart/mixed; boundary="BOUNDARY"
1358MIME-Version: 1.0
1359Subject: A subject
1360To: aperson@dom.ain
1361From: bperson@dom.ain
1362
1363--BOUNDARY
1364Content-Type: text/plain; charset="us-ascii"
1365MIME-Version: 1.0
1366Content-Transfer-Encoding: 7bit
1367
1368hello world
1369--BOUNDARY--''')
1370
1371
1372 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1373 eq = self.ndiffAssertEqual
1374 outer = MIMEBase('multipart', 'mixed')
1375 outer['Subject'] = 'A subject'
1376 outer['To'] = 'aperson@dom.ain'
1377 outer['From'] = 'bperson@dom.ain'
1378 outer.epilogue = ''
1379 msg = MIMEText('hello world')
1380 outer.attach(msg)
1381 outer.set_boundary('BOUNDARY')
1382 eq(outer.as_string(), '''\
1383Content-Type: multipart/mixed; boundary="BOUNDARY"
1384MIME-Version: 1.0
1385Subject: A subject
1386To: aperson@dom.ain
1387From: bperson@dom.ain
1388
1389--BOUNDARY
1390Content-Type: text/plain; charset="us-ascii"
1391MIME-Version: 1.0
1392Content-Transfer-Encoding: 7bit
1393
1394hello world
1395--BOUNDARY--
1396''')
1397
1398
1399 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1400 eq = self.ndiffAssertEqual
1401 outer = MIMEBase('multipart', 'mixed')
1402 outer['Subject'] = 'A subject'
1403 outer['To'] = 'aperson@dom.ain'
1404 outer['From'] = 'bperson@dom.ain'
1405 outer.epilogue = '\n'
1406 msg = MIMEText('hello world')
1407 outer.attach(msg)
1408 outer.set_boundary('BOUNDARY')
1409 eq(outer.as_string(), '''\
1410Content-Type: multipart/mixed; boundary="BOUNDARY"
1411MIME-Version: 1.0
1412Subject: A subject
1413To: aperson@dom.ain
1414From: bperson@dom.ain
1415
1416--BOUNDARY
1417Content-Type: text/plain; charset="us-ascii"
1418MIME-Version: 1.0
1419Content-Transfer-Encoding: 7bit
1420
1421hello world
1422--BOUNDARY--
1423
1424''')
1425
1426 def test_message_external_body(self):
1427 eq = self.assertEqual
1428 msg = self._msgobj('msg_36.txt')
1429 eq(len(msg.get_payload()), 2)
1430 msg1 = msg.get_payload(1)
1431 eq(msg1.get_content_type(), 'multipart/alternative')
1432 eq(len(msg1.get_payload()), 2)
1433 for subpart in msg1.get_payload():
1434 eq(subpart.get_content_type(), 'message/external-body')
1435 eq(len(subpart.get_payload()), 1)
1436 subsubpart = subpart.get_payload(0)
1437 eq(subsubpart.get_content_type(), 'text/plain')
1438
1439 def test_double_boundary(self):
1440 # msg_37.txt is a multipart that contains two dash-boundary's in a
1441 # row. Our interpretation of RFC 2046 calls for ignoring the second
1442 # and subsequent boundaries.
1443 msg = self._msgobj('msg_37.txt')
1444 self.assertEqual(len(msg.get_payload()), 3)
1445
1446 def test_nested_inner_contains_outer_boundary(self):
1447 eq = self.ndiffAssertEqual
1448 # msg_38.txt has an inner part that contains outer boundaries. My
1449 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1450 # these are illegal and should be interpreted as unterminated inner
1451 # parts.
1452 msg = self._msgobj('msg_38.txt')
1453 sfp = StringIO()
1454 iterators._structure(msg, sfp)
1455 eq(sfp.getvalue(), """\
1456multipart/mixed
1457 multipart/mixed
1458 multipart/alternative
1459 text/plain
1460 text/plain
1461 text/plain
1462 text/plain
1463""")
1464
1465 def test_nested_with_same_boundary(self):
1466 eq = self.ndiffAssertEqual
1467 # msg 39.txt is similarly evil in that it's got inner parts that use
1468 # the same boundary as outer parts. Again, I believe the way this is
1469 # parsed is closest to the spirit of RFC 2046
1470 msg = self._msgobj('msg_39.txt')
1471 sfp = StringIO()
1472 iterators._structure(msg, sfp)
1473 eq(sfp.getvalue(), """\
1474multipart/mixed
1475 multipart/mixed
1476 multipart/alternative
1477 application/octet-stream
1478 application/octet-stream
1479 text/plain
1480""")
1481
1482 def test_boundary_in_non_multipart(self):
1483 msg = self._msgobj('msg_40.txt')
1484 self.assertEqual(msg.as_string(), '''\
1485MIME-Version: 1.0
1486Content-Type: text/html; boundary="--961284236552522269"
1487
1488----961284236552522269
1489Content-Type: text/html;
1490Content-Transfer-Encoding: 7Bit
1491
1492<html></html>
1493
1494----961284236552522269--
1495''')
1496
1497 def test_boundary_with_leading_space(self):
1498 eq = self.assertEqual
1499 msg = email.message_from_string('''\
1500MIME-Version: 1.0
1501Content-Type: multipart/mixed; boundary=" XXXX"
1502
1503-- XXXX
1504Content-Type: text/plain
1505
1506
1507-- XXXX
1508Content-Type: text/plain
1509
1510-- XXXX--
1511''')
Georg Brandlab91fde2009-08-13 08:51:18 +00001512 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001513 eq(msg.get_boundary(), ' XXXX')
1514 eq(len(msg.get_payload()), 2)
1515
1516 def test_boundary_without_trailing_newline(self):
1517 m = Parser().parsestr("""\
1518Content-Type: multipart/mixed; boundary="===============0012394164=="
1519MIME-Version: 1.0
1520
1521--===============0012394164==
1522Content-Type: image/file1.jpg
1523MIME-Version: 1.0
1524Content-Transfer-Encoding: base64
1525
1526YXNkZg==
1527--===============0012394164==--""")
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001528 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001529
1530
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001531
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001532# Test some badly formatted messages
1533class TestNonConformant(TestEmailBase):
1534 def test_parse_missing_minor_type(self):
1535 eq = self.assertEqual
1536 msg = self._msgobj('msg_14.txt')
1537 eq(msg.get_content_type(), 'text/plain')
1538 eq(msg.get_content_maintype(), 'text')
1539 eq(msg.get_content_subtype(), 'plain')
1540
1541 def test_same_boundary_inner_outer(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001542 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001543 msg = self._msgobj('msg_15.txt')
1544 # XXX We can probably eventually do better
1545 inner = msg.get_payload(0)
1546 unless(hasattr(inner, 'defects'))
1547 self.assertEqual(len(inner.defects), 1)
1548 unless(isinstance(inner.defects[0],
1549 errors.StartBoundaryNotFoundDefect))
1550
1551 def test_multipart_no_boundary(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001552 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001553 msg = self._msgobj('msg_25.txt')
1554 unless(isinstance(msg.get_payload(), str))
1555 self.assertEqual(len(msg.defects), 2)
1556 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1557 unless(isinstance(msg.defects[1],
1558 errors.MultipartInvariantViolationDefect))
1559
1560 def test_invalid_content_type(self):
1561 eq = self.assertEqual
1562 neq = self.ndiffAssertEqual
1563 msg = Message()
1564 # RFC 2045, $5.2 says invalid yields text/plain
1565 msg['Content-Type'] = 'text'
1566 eq(msg.get_content_maintype(), 'text')
1567 eq(msg.get_content_subtype(), 'plain')
1568 eq(msg.get_content_type(), 'text/plain')
1569 # Clear the old value and try something /really/ invalid
1570 del msg['content-type']
1571 msg['Content-Type'] = 'foo'
1572 eq(msg.get_content_maintype(), 'text')
1573 eq(msg.get_content_subtype(), 'plain')
1574 eq(msg.get_content_type(), 'text/plain')
1575 # Still, make sure that the message is idempotently generated
1576 s = StringIO()
1577 g = Generator(s)
1578 g.flatten(msg)
1579 neq(s.getvalue(), 'Content-Type: foo\n\n')
1580
1581 def test_no_start_boundary(self):
1582 eq = self.ndiffAssertEqual
1583 msg = self._msgobj('msg_31.txt')
1584 eq(msg.get_payload(), """\
1585--BOUNDARY
1586Content-Type: text/plain
1587
1588message 1
1589
1590--BOUNDARY
1591Content-Type: text/plain
1592
1593message 2
1594
1595--BOUNDARY--
1596""")
1597
1598 def test_no_separating_blank_line(self):
1599 eq = self.ndiffAssertEqual
1600 msg = self._msgobj('msg_35.txt')
1601 eq(msg.as_string(), """\
1602From: aperson@dom.ain
1603To: bperson@dom.ain
1604Subject: here's something interesting
1605
1606counter to RFC 2822, there's no separating newline here
1607""")
1608
1609 def test_lying_multipart(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001610 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001611 msg = self._msgobj('msg_41.txt')
1612 unless(hasattr(msg, 'defects'))
1613 self.assertEqual(len(msg.defects), 2)
1614 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1615 unless(isinstance(msg.defects[1],
1616 errors.MultipartInvariantViolationDefect))
1617
1618 def test_missing_start_boundary(self):
1619 outer = self._msgobj('msg_42.txt')
1620 # The message structure is:
1621 #
1622 # multipart/mixed
1623 # text/plain
1624 # message/rfc822
1625 # multipart/mixed [*]
1626 #
1627 # [*] This message is missing its start boundary
1628 bad = outer.get_payload(1).get_payload(0)
1629 self.assertEqual(len(bad.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001630 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001631 errors.StartBoundaryNotFoundDefect))
1632
1633 def test_first_line_is_continuation_header(self):
1634 eq = self.assertEqual
1635 m = ' Line 1\nLine 2\nLine 3'
1636 msg = email.message_from_string(m)
1637 eq(msg.keys(), [])
1638 eq(msg.get_payload(), 'Line 2\nLine 3')
1639 eq(len(msg.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001640 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001641 errors.FirstHeaderLineIsContinuationDefect))
1642 eq(msg.defects[0].line, ' Line 1\n')
1643
1644
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001645
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001646# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001647class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001648 def test_rfc2047_multiline(self):
1649 eq = self.assertEqual
1650 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1651 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1652 dh = decode_header(s)
1653 eq(dh, [
1654 (b'Re:', None),
1655 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1656 (b'baz foo bar', None),
1657 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1658 header = make_header(dh)
1659 eq(str(header),
1660 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001661 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001662Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1663 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001664
1665 def test_whitespace_eater_unicode(self):
1666 eq = self.assertEqual
1667 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1668 dh = decode_header(s)
1669 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1670 (b'Pirard <pirard@dom.ain>', None)])
1671 header = str(make_header(dh))
1672 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1673
1674 def test_whitespace_eater_unicode_2(self):
1675 eq = self.assertEqual
1676 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1677 dh = decode_header(s)
1678 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1679 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1680 hu = str(make_header(dh))
1681 eq(hu, 'The quick brown fox jumped over the lazy dog')
1682
1683 def test_rfc2047_missing_whitespace(self):
1684 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1685 dh = decode_header(s)
1686 self.assertEqual(dh, [(s, None)])
1687
1688 def test_rfc2047_with_whitespace(self):
1689 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1690 dh = decode_header(s)
1691 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1692 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1693 (b'sbord', None)])
1694
R. David Murraye06528c2010-08-03 23:35:44 +00001695 def test_rfc2047_B_bad_padding(self):
1696 s = '=?iso-8859-1?B?%s?='
1697 data = [ # only test complete bytes
1698 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1699 ('dmk=', b'vi'), ('dmk', b'vi')
1700 ]
1701 for q, a in data:
1702 dh = decode_header(s % q)
1703 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001704
R. David Murrayf9c957f2010-10-01 15:45:48 +00001705 def test_rfc2047_Q_invalid_digits(self):
1706 # issue 10004.
1707 s = '=?iso-8659-1?Q?andr=e9=zz?='
1708 self.assertEqual(decode_header(s),
1709 [(b'andr\xe9=zz', 'iso-8659-1')])
1710
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001711
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001712# Test the MIMEMessage class
1713class TestMIMEMessage(TestEmailBase):
1714 def setUp(self):
1715 with openfile('msg_11.txt') as fp:
1716 self._text = fp.read()
1717
1718 def test_type_error(self):
1719 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1720
1721 def test_valid_argument(self):
1722 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001723 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001724 subject = 'A sub-message'
1725 m = Message()
1726 m['Subject'] = subject
1727 r = MIMEMessage(m)
1728 eq(r.get_content_type(), 'message/rfc822')
1729 payload = r.get_payload()
1730 unless(isinstance(payload, list))
1731 eq(len(payload), 1)
1732 subpart = payload[0]
1733 unless(subpart is m)
1734 eq(subpart['subject'], subject)
1735
1736 def test_bad_multipart(self):
1737 eq = self.assertEqual
1738 msg1 = Message()
1739 msg1['Subject'] = 'subpart 1'
1740 msg2 = Message()
1741 msg2['Subject'] = 'subpart 2'
1742 r = MIMEMessage(msg1)
1743 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1744
1745 def test_generate(self):
1746 # First craft the message to be encapsulated
1747 m = Message()
1748 m['Subject'] = 'An enclosed message'
1749 m.set_payload('Here is the body of the message.\n')
1750 r = MIMEMessage(m)
1751 r['Subject'] = 'The enclosing message'
1752 s = StringIO()
1753 g = Generator(s)
1754 g.flatten(r)
1755 self.assertEqual(s.getvalue(), """\
1756Content-Type: message/rfc822
1757MIME-Version: 1.0
1758Subject: The enclosing message
1759
1760Subject: An enclosed message
1761
1762Here is the body of the message.
1763""")
1764
1765 def test_parse_message_rfc822(self):
1766 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001767 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001768 msg = self._msgobj('msg_11.txt')
1769 eq(msg.get_content_type(), 'message/rfc822')
1770 payload = msg.get_payload()
1771 unless(isinstance(payload, list))
1772 eq(len(payload), 1)
1773 submsg = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00001774 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001775 eq(submsg['subject'], 'An enclosed message')
1776 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1777
1778 def test_dsn(self):
1779 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001780 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001781 # msg 16 is a Delivery Status Notification, see RFC 1894
1782 msg = self._msgobj('msg_16.txt')
1783 eq(msg.get_content_type(), 'multipart/report')
1784 unless(msg.is_multipart())
1785 eq(len(msg.get_payload()), 3)
1786 # Subpart 1 is a text/plain, human readable section
1787 subpart = msg.get_payload(0)
1788 eq(subpart.get_content_type(), 'text/plain')
1789 eq(subpart.get_payload(), """\
1790This report relates to a message you sent with the following header fields:
1791
1792 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1793 Date: Sun, 23 Sep 2001 20:10:55 -0700
1794 From: "Ian T. Henry" <henryi@oxy.edu>
1795 To: SoCal Raves <scr@socal-raves.org>
1796 Subject: [scr] yeah for Ians!!
1797
1798Your message cannot be delivered to the following recipients:
1799
1800 Recipient address: jangel1@cougar.noc.ucla.edu
1801 Reason: recipient reached disk quota
1802
1803""")
1804 # Subpart 2 contains the machine parsable DSN information. It
1805 # consists of two blocks of headers, represented by two nested Message
1806 # objects.
1807 subpart = msg.get_payload(1)
1808 eq(subpart.get_content_type(), 'message/delivery-status')
1809 eq(len(subpart.get_payload()), 2)
1810 # message/delivery-status should treat each block as a bunch of
1811 # headers, i.e. a bunch of Message objects.
1812 dsn1 = subpart.get_payload(0)
1813 unless(isinstance(dsn1, Message))
1814 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1815 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1816 # Try a missing one <wink>
1817 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1818 dsn2 = subpart.get_payload(1)
1819 unless(isinstance(dsn2, Message))
1820 eq(dsn2['action'], 'failed')
1821 eq(dsn2.get_params(header='original-recipient'),
1822 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1823 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1824 # Subpart 3 is the original message
1825 subpart = msg.get_payload(2)
1826 eq(subpart.get_content_type(), 'message/rfc822')
1827 payload = subpart.get_payload()
1828 unless(isinstance(payload, list))
1829 eq(len(payload), 1)
1830 subsubpart = payload[0]
1831 unless(isinstance(subsubpart, Message))
1832 eq(subsubpart.get_content_type(), 'text/plain')
1833 eq(subsubpart['message-id'],
1834 '<002001c144a6$8752e060$56104586@oxy.edu>')
1835
1836 def test_epilogue(self):
1837 eq = self.ndiffAssertEqual
1838 with openfile('msg_21.txt') as fp:
1839 text = fp.read()
1840 msg = Message()
1841 msg['From'] = 'aperson@dom.ain'
1842 msg['To'] = 'bperson@dom.ain'
1843 msg['Subject'] = 'Test'
1844 msg.preamble = 'MIME message'
1845 msg.epilogue = 'End of MIME message\n'
1846 msg1 = MIMEText('One')
1847 msg2 = MIMEText('Two')
1848 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1849 msg.attach(msg1)
1850 msg.attach(msg2)
1851 sfp = StringIO()
1852 g = Generator(sfp)
1853 g.flatten(msg)
1854 eq(sfp.getvalue(), text)
1855
1856 def test_no_nl_preamble(self):
1857 eq = self.ndiffAssertEqual
1858 msg = Message()
1859 msg['From'] = 'aperson@dom.ain'
1860 msg['To'] = 'bperson@dom.ain'
1861 msg['Subject'] = 'Test'
1862 msg.preamble = 'MIME message'
1863 msg.epilogue = ''
1864 msg1 = MIMEText('One')
1865 msg2 = MIMEText('Two')
1866 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1867 msg.attach(msg1)
1868 msg.attach(msg2)
1869 eq(msg.as_string(), """\
1870From: aperson@dom.ain
1871To: bperson@dom.ain
1872Subject: Test
1873Content-Type: multipart/mixed; boundary="BOUNDARY"
1874
1875MIME message
1876--BOUNDARY
1877Content-Type: text/plain; charset="us-ascii"
1878MIME-Version: 1.0
1879Content-Transfer-Encoding: 7bit
1880
1881One
1882--BOUNDARY
1883Content-Type: text/plain; charset="us-ascii"
1884MIME-Version: 1.0
1885Content-Transfer-Encoding: 7bit
1886
1887Two
1888--BOUNDARY--
1889""")
1890
1891 def test_default_type(self):
1892 eq = self.assertEqual
1893 with openfile('msg_30.txt') as fp:
1894 msg = email.message_from_file(fp)
1895 container1 = msg.get_payload(0)
1896 eq(container1.get_default_type(), 'message/rfc822')
1897 eq(container1.get_content_type(), 'message/rfc822')
1898 container2 = msg.get_payload(1)
1899 eq(container2.get_default_type(), 'message/rfc822')
1900 eq(container2.get_content_type(), 'message/rfc822')
1901 container1a = container1.get_payload(0)
1902 eq(container1a.get_default_type(), 'text/plain')
1903 eq(container1a.get_content_type(), 'text/plain')
1904 container2a = container2.get_payload(0)
1905 eq(container2a.get_default_type(), 'text/plain')
1906 eq(container2a.get_content_type(), 'text/plain')
1907
1908 def test_default_type_with_explicit_container_type(self):
1909 eq = self.assertEqual
1910 with openfile('msg_28.txt') as fp:
1911 msg = email.message_from_file(fp)
1912 container1 = msg.get_payload(0)
1913 eq(container1.get_default_type(), 'message/rfc822')
1914 eq(container1.get_content_type(), 'message/rfc822')
1915 container2 = msg.get_payload(1)
1916 eq(container2.get_default_type(), 'message/rfc822')
1917 eq(container2.get_content_type(), 'message/rfc822')
1918 container1a = container1.get_payload(0)
1919 eq(container1a.get_default_type(), 'text/plain')
1920 eq(container1a.get_content_type(), 'text/plain')
1921 container2a = container2.get_payload(0)
1922 eq(container2a.get_default_type(), 'text/plain')
1923 eq(container2a.get_content_type(), 'text/plain')
1924
1925 def test_default_type_non_parsed(self):
1926 eq = self.assertEqual
1927 neq = self.ndiffAssertEqual
1928 # Set up container
1929 container = MIMEMultipart('digest', 'BOUNDARY')
1930 container.epilogue = ''
1931 # Set up subparts
1932 subpart1a = MIMEText('message 1\n')
1933 subpart2a = MIMEText('message 2\n')
1934 subpart1 = MIMEMessage(subpart1a)
1935 subpart2 = MIMEMessage(subpart2a)
1936 container.attach(subpart1)
1937 container.attach(subpart2)
1938 eq(subpart1.get_content_type(), 'message/rfc822')
1939 eq(subpart1.get_default_type(), 'message/rfc822')
1940 eq(subpart2.get_content_type(), 'message/rfc822')
1941 eq(subpart2.get_default_type(), 'message/rfc822')
1942 neq(container.as_string(0), '''\
1943Content-Type: multipart/digest; boundary="BOUNDARY"
1944MIME-Version: 1.0
1945
1946--BOUNDARY
1947Content-Type: message/rfc822
1948MIME-Version: 1.0
1949
1950Content-Type: text/plain; charset="us-ascii"
1951MIME-Version: 1.0
1952Content-Transfer-Encoding: 7bit
1953
1954message 1
1955
1956--BOUNDARY
1957Content-Type: message/rfc822
1958MIME-Version: 1.0
1959
1960Content-Type: text/plain; charset="us-ascii"
1961MIME-Version: 1.0
1962Content-Transfer-Encoding: 7bit
1963
1964message 2
1965
1966--BOUNDARY--
1967''')
1968 del subpart1['content-type']
1969 del subpart1['mime-version']
1970 del subpart2['content-type']
1971 del subpart2['mime-version']
1972 eq(subpart1.get_content_type(), 'message/rfc822')
1973 eq(subpart1.get_default_type(), 'message/rfc822')
1974 eq(subpart2.get_content_type(), 'message/rfc822')
1975 eq(subpart2.get_default_type(), 'message/rfc822')
1976 neq(container.as_string(0), '''\
1977Content-Type: multipart/digest; boundary="BOUNDARY"
1978MIME-Version: 1.0
1979
1980--BOUNDARY
1981
1982Content-Type: text/plain; charset="us-ascii"
1983MIME-Version: 1.0
1984Content-Transfer-Encoding: 7bit
1985
1986message 1
1987
1988--BOUNDARY
1989
1990Content-Type: text/plain; charset="us-ascii"
1991MIME-Version: 1.0
1992Content-Transfer-Encoding: 7bit
1993
1994message 2
1995
1996--BOUNDARY--
1997''')
1998
1999 def test_mime_attachments_in_constructor(self):
2000 eq = self.assertEqual
2001 text1 = MIMEText('')
2002 text2 = MIMEText('')
2003 msg = MIMEMultipart(_subparts=(text1, text2))
2004 eq(len(msg.get_payload()), 2)
2005 eq(msg.get_payload(0), text1)
2006 eq(msg.get_payload(1), text2)
2007
Christian Heimes587c2bf2008-01-19 16:21:02 +00002008 def test_default_multipart_constructor(self):
2009 msg = MIMEMultipart()
2010 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002012
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013# A general test of parser->model->generator idempotency. IOW, read a message
2014# in, parse it into a message object tree, then without touching the tree,
2015# regenerate the plain text. The original text and the transformed text
2016# should be identical. Note: that we ignore the Unix-From since that may
2017# contain a changed date.
2018class TestIdempotent(TestEmailBase):
2019 def _msgobj(self, filename):
2020 with openfile(filename) as fp:
2021 data = fp.read()
2022 msg = email.message_from_string(data)
2023 return msg, data
2024
2025 def _idempotent(self, msg, text):
2026 eq = self.ndiffAssertEqual
2027 s = StringIO()
2028 g = Generator(s, maxheaderlen=0)
2029 g.flatten(msg)
2030 eq(text, s.getvalue())
2031
2032 def test_parse_text_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002033 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002034 msg, text = self._msgobj('msg_01.txt')
2035 eq(msg.get_content_type(), 'text/plain')
2036 eq(msg.get_content_maintype(), 'text')
2037 eq(msg.get_content_subtype(), 'plain')
2038 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2039 eq(msg.get_param('charset'), 'us-ascii')
2040 eq(msg.preamble, None)
2041 eq(msg.epilogue, None)
2042 self._idempotent(msg, text)
2043
2044 def test_parse_untyped_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002045 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002046 msg, text = self._msgobj('msg_03.txt')
2047 eq(msg.get_content_type(), 'text/plain')
2048 eq(msg.get_params(), None)
2049 eq(msg.get_param('charset'), None)
2050 self._idempotent(msg, text)
2051
2052 def test_simple_multipart(self):
2053 msg, text = self._msgobj('msg_04.txt')
2054 self._idempotent(msg, text)
2055
2056 def test_MIME_digest(self):
2057 msg, text = self._msgobj('msg_02.txt')
2058 self._idempotent(msg, text)
2059
2060 def test_long_header(self):
2061 msg, text = self._msgobj('msg_27.txt')
2062 self._idempotent(msg, text)
2063
2064 def test_MIME_digest_with_part_headers(self):
2065 msg, text = self._msgobj('msg_28.txt')
2066 self._idempotent(msg, text)
2067
2068 def test_mixed_with_image(self):
2069 msg, text = self._msgobj('msg_06.txt')
2070 self._idempotent(msg, text)
2071
2072 def test_multipart_report(self):
2073 msg, text = self._msgobj('msg_05.txt')
2074 self._idempotent(msg, text)
2075
2076 def test_dsn(self):
2077 msg, text = self._msgobj('msg_16.txt')
2078 self._idempotent(msg, text)
2079
2080 def test_preamble_epilogue(self):
2081 msg, text = self._msgobj('msg_21.txt')
2082 self._idempotent(msg, text)
2083
2084 def test_multipart_one_part(self):
2085 msg, text = self._msgobj('msg_23.txt')
2086 self._idempotent(msg, text)
2087
2088 def test_multipart_no_parts(self):
2089 msg, text = self._msgobj('msg_24.txt')
2090 self._idempotent(msg, text)
2091
2092 def test_no_start_boundary(self):
2093 msg, text = self._msgobj('msg_31.txt')
2094 self._idempotent(msg, text)
2095
2096 def test_rfc2231_charset(self):
2097 msg, text = self._msgobj('msg_32.txt')
2098 self._idempotent(msg, text)
2099
2100 def test_more_rfc2231_parameters(self):
2101 msg, text = self._msgobj('msg_33.txt')
2102 self._idempotent(msg, text)
2103
2104 def test_text_plain_in_a_multipart_digest(self):
2105 msg, text = self._msgobj('msg_34.txt')
2106 self._idempotent(msg, text)
2107
2108 def test_nested_multipart_mixeds(self):
2109 msg, text = self._msgobj('msg_12a.txt')
2110 self._idempotent(msg, text)
2111
2112 def test_message_external_body_idempotent(self):
2113 msg, text = self._msgobj('msg_36.txt')
2114 self._idempotent(msg, text)
2115
2116 def test_content_type(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002117 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002118 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002119 # Get a message object and reset the seek pointer for other tests
2120 msg, text = self._msgobj('msg_05.txt')
2121 eq(msg.get_content_type(), 'multipart/report')
2122 # Test the Content-Type: parameters
2123 params = {}
2124 for pk, pv in msg.get_params():
2125 params[pk] = pv
2126 eq(params['report-type'], 'delivery-status')
2127 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2128 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2129 eq(msg.epilogue, '\n')
2130 eq(len(msg.get_payload()), 3)
2131 # Make sure the subparts are what we expect
2132 msg1 = msg.get_payload(0)
2133 eq(msg1.get_content_type(), 'text/plain')
2134 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2135 msg2 = msg.get_payload(1)
2136 eq(msg2.get_content_type(), 'text/plain')
2137 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2138 msg3 = msg.get_payload(2)
2139 eq(msg3.get_content_type(), 'message/rfc822')
Georg Brandlab91fde2009-08-13 08:51:18 +00002140 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002141 payload = msg3.get_payload()
2142 unless(isinstance(payload, list))
2143 eq(len(payload), 1)
2144 msg4 = payload[0]
2145 unless(isinstance(msg4, Message))
2146 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2147
2148 def test_parser(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002149 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002150 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151 msg, text = self._msgobj('msg_06.txt')
2152 # Check some of the outer headers
2153 eq(msg.get_content_type(), 'message/rfc822')
2154 # Make sure the payload is a list of exactly one sub-Message, and that
2155 # that submessage has a type of text/plain
2156 payload = msg.get_payload()
2157 unless(isinstance(payload, list))
2158 eq(len(payload), 1)
2159 msg1 = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00002160 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002161 eq(msg1.get_content_type(), 'text/plain')
Georg Brandlab91fde2009-08-13 08:51:18 +00002162 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002163 eq(msg1.get_payload(), '\n')
2164
2165
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002166
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002167# Test various other bits of the package's functionality
2168class TestMiscellaneous(TestEmailBase):
2169 def test_message_from_string(self):
2170 with openfile('msg_01.txt') as fp:
2171 text = fp.read()
2172 msg = email.message_from_string(text)
2173 s = StringIO()
2174 # Don't wrap/continue long headers since we're trying to test
2175 # idempotency.
2176 g = Generator(s, maxheaderlen=0)
2177 g.flatten(msg)
2178 self.assertEqual(text, s.getvalue())
2179
2180 def test_message_from_file(self):
2181 with openfile('msg_01.txt') as fp:
2182 text = fp.read()
2183 fp.seek(0)
2184 msg = email.message_from_file(fp)
2185 s = StringIO()
2186 # Don't wrap/continue long headers since we're trying to test
2187 # idempotency.
2188 g = Generator(s, maxheaderlen=0)
2189 g.flatten(msg)
2190 self.assertEqual(text, s.getvalue())
2191
2192 def test_message_from_string_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002193 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002194 with openfile('msg_01.txt') as fp:
2195 text = fp.read()
2196
2197 # Create a subclass
2198 class MyMessage(Message):
2199 pass
2200
2201 msg = email.message_from_string(text, MyMessage)
2202 unless(isinstance(msg, MyMessage))
2203 # Try something more complicated
2204 with openfile('msg_02.txt') as fp:
2205 text = fp.read()
2206 msg = email.message_from_string(text, MyMessage)
2207 for subpart in msg.walk():
2208 unless(isinstance(subpart, MyMessage))
2209
2210 def test_message_from_file_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002211 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002212 # Create a subclass
2213 class MyMessage(Message):
2214 pass
2215
2216 with openfile('msg_01.txt') as fp:
2217 msg = email.message_from_file(fp, MyMessage)
2218 unless(isinstance(msg, MyMessage))
2219 # Try something more complicated
2220 with openfile('msg_02.txt') as fp:
2221 msg = email.message_from_file(fp, MyMessage)
2222 for subpart in msg.walk():
2223 unless(isinstance(subpart, MyMessage))
2224
2225 def test__all__(self):
2226 module = __import__('email')
2227 # Can't use sorted() here due to Python 2.3 compatibility
2228 all = module.__all__[:]
2229 all.sort()
2230 self.assertEqual(all, [
2231 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2232 'header', 'iterators', 'message', 'message_from_file',
2233 'message_from_string', 'mime', 'parser',
2234 'quoprimime', 'utils',
2235 ])
2236
2237 def test_formatdate(self):
2238 now = time.time()
2239 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2240 time.gmtime(now)[:6])
2241
2242 def test_formatdate_localtime(self):
2243 now = time.time()
2244 self.assertEqual(
2245 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2246 time.localtime(now)[:6])
2247
2248 def test_formatdate_usegmt(self):
2249 now = time.time()
2250 self.assertEqual(
2251 utils.formatdate(now, localtime=False),
2252 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2253 self.assertEqual(
2254 utils.formatdate(now, localtime=False, usegmt=True),
2255 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2256
2257 def test_parsedate_none(self):
2258 self.assertEqual(utils.parsedate(''), None)
2259
2260 def test_parsedate_compact(self):
2261 # The FWS after the comma is optional
2262 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2263 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2264
2265 def test_parsedate_no_dayofweek(self):
2266 eq = self.assertEqual
2267 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2268 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2269
2270 def test_parsedate_compact_no_dayofweek(self):
2271 eq = self.assertEqual
2272 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2273 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2274
2275 def test_parsedate_acceptable_to_time_functions(self):
2276 eq = self.assertEqual
2277 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2278 t = int(time.mktime(timetup))
2279 eq(time.localtime(t)[:6], timetup[:6])
2280 eq(int(time.strftime('%Y', timetup)), 2003)
2281 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2282 t = int(time.mktime(timetup[:9]))
2283 eq(time.localtime(t)[:6], timetup[:6])
2284 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2285
R. David Murray1061f182010-08-25 01:55:24 +00002286 def test_parsedate_y2k(self):
2287 """Test for parsing a date with a two-digit year.
2288
2289 Parsing a date with a two-digit year should return the correct
2290 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2291 obsoletes RFC822) requires four-digit years.
2292
2293 """
2294 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2295 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2296 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2297 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2298
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002299 def test_parseaddr_empty(self):
2300 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2301 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2302
2303 def test_noquote_dump(self):
2304 self.assertEqual(
2305 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2306 'A Silly Person <person@dom.ain>')
2307
2308 def test_escape_dump(self):
2309 self.assertEqual(
2310 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2311 r'"A \(Very\) Silly Person" <person@dom.ain>')
2312 a = r'A \(Special\) Person'
2313 b = 'person@dom.ain'
2314 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2315
2316 def test_escape_backslashes(self):
2317 self.assertEqual(
2318 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2319 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2320 a = r'Arthur \Backslash\ Foobar'
2321 b = 'person@dom.ain'
2322 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2323
2324 def test_name_with_dot(self):
2325 x = 'John X. Doe <jxd@example.com>'
2326 y = '"John X. Doe" <jxd@example.com>'
2327 a, b = ('John X. Doe', 'jxd@example.com')
2328 self.assertEqual(utils.parseaddr(x), (a, b))
2329 self.assertEqual(utils.parseaddr(y), (a, b))
2330 # formataddr() quotes the name if there's a dot in it
2331 self.assertEqual(utils.formataddr((a, b)), y)
2332
R. David Murray7f8199a2010-10-02 16:04:44 +00002333 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2334 # issue 10005. Note that in the third test the second pair of
2335 # backslashes is not actually a quoted pair because it is not inside a
2336 # comment or quoted string: the address being parsed has a quoted
2337 # string containing a quoted backslash, followed by 'example' and two
2338 # backslashes, followed by another quoted string containing a space and
2339 # the word 'example'. parseaddr copies those two backslashes
2340 # literally. Per rfc5322 this is not technically correct since a \ may
2341 # not appear in an address outside of a quoted string. It is probably
2342 # a sensible Postel interpretation, though.
2343 eq = self.assertEqual
2344 eq(utils.parseaddr('""example" example"@example.com'),
2345 ('', '""example" example"@example.com'))
2346 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2347 ('', '"\\"example\\" example"@example.com'))
2348 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2349 ('', '"\\\\"example\\\\" example"@example.com'))
2350
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002351 def test_multiline_from_comment(self):
2352 x = """\
2353Foo
2354\tBar <foo@example.com>"""
2355 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2356
2357 def test_quote_dump(self):
2358 self.assertEqual(
2359 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2360 r'"A Silly; Person" <person@dom.ain>')
2361
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002362 def test_charset_richcomparisons(self):
2363 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002364 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002365 cset1 = Charset()
2366 cset2 = Charset()
2367 eq(cset1, 'us-ascii')
2368 eq(cset1, 'US-ASCII')
2369 eq(cset1, 'Us-AsCiI')
2370 eq('us-ascii', cset1)
2371 eq('US-ASCII', cset1)
2372 eq('Us-AsCiI', cset1)
2373 ne(cset1, 'usascii')
2374 ne(cset1, 'USASCII')
2375 ne(cset1, 'UsAsCiI')
2376 ne('usascii', cset1)
2377 ne('USASCII', cset1)
2378 ne('UsAsCiI', cset1)
2379 eq(cset1, cset2)
2380 eq(cset2, cset1)
2381
2382 def test_getaddresses(self):
2383 eq = self.assertEqual
2384 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2385 'Bud Person <bperson@dom.ain>']),
2386 [('Al Person', 'aperson@dom.ain'),
2387 ('Bud Person', 'bperson@dom.ain')])
2388
2389 def test_getaddresses_nasty(self):
2390 eq = self.assertEqual
2391 eq(utils.getaddresses(['foo: ;']), [('', '')])
2392 eq(utils.getaddresses(
2393 ['[]*-- =~$']),
2394 [('', ''), ('', ''), ('', '*--')])
2395 eq(utils.getaddresses(
2396 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2397 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2398
2399 def test_getaddresses_embedded_comment(self):
2400 """Test proper handling of a nested comment"""
2401 eq = self.assertEqual
2402 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2403 eq(addrs[0][1], 'foo@bar.com')
2404
2405 def test_utils_quote_unquote(self):
2406 eq = self.assertEqual
2407 msg = Message()
2408 msg.add_header('content-disposition', 'attachment',
2409 filename='foo\\wacky"name')
2410 eq(msg.get_filename(), 'foo\\wacky"name')
2411
2412 def test_get_body_encoding_with_bogus_charset(self):
2413 charset = Charset('not a charset')
2414 self.assertEqual(charset.get_body_encoding(), 'base64')
2415
2416 def test_get_body_encoding_with_uppercase_charset(self):
2417 eq = self.assertEqual
2418 msg = Message()
2419 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2420 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2421 charsets = msg.get_charsets()
2422 eq(len(charsets), 1)
2423 eq(charsets[0], 'utf-8')
2424 charset = Charset(charsets[0])
2425 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002426 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002427 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2428 eq(msg.get_payload(decode=True), b'hello world')
2429 eq(msg['content-transfer-encoding'], 'base64')
2430 # Try another one
2431 msg = Message()
2432 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2433 charsets = msg.get_charsets()
2434 eq(len(charsets), 1)
2435 eq(charsets[0], 'us-ascii')
2436 charset = Charset(charsets[0])
2437 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2438 msg.set_payload('hello world', charset=charset)
2439 eq(msg.get_payload(), 'hello world')
2440 eq(msg['content-transfer-encoding'], '7bit')
2441
2442 def test_charsets_case_insensitive(self):
2443 lc = Charset('us-ascii')
2444 uc = Charset('US-ASCII')
2445 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2446
2447 def test_partial_falls_inside_message_delivery_status(self):
2448 eq = self.ndiffAssertEqual
2449 # The Parser interface provides chunks of data to FeedParser in 8192
2450 # byte gulps. SF bug #1076485 found one of those chunks inside
2451 # message/delivery-status header block, which triggered an
2452 # unreadline() of NeedMoreData.
2453 msg = self._msgobj('msg_43.txt')
2454 sfp = StringIO()
2455 iterators._structure(msg, sfp)
2456 eq(sfp.getvalue(), """\
2457multipart/report
2458 text/plain
2459 message/delivery-status
2460 text/plain
2461 text/plain
2462 text/plain
2463 text/plain
2464 text/plain
2465 text/plain
2466 text/plain
2467 text/plain
2468 text/plain
2469 text/plain
2470 text/plain
2471 text/plain
2472 text/plain
2473 text/plain
2474 text/plain
2475 text/plain
2476 text/plain
2477 text/plain
2478 text/plain
2479 text/plain
2480 text/plain
2481 text/plain
2482 text/plain
2483 text/plain
2484 text/plain
2485 text/plain
2486 text/rfc822-headers
2487""")
2488
2489
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002490
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002491# Test the iterator/generators
2492class TestIterators(TestEmailBase):
2493 def test_body_line_iterator(self):
2494 eq = self.assertEqual
2495 neq = self.ndiffAssertEqual
2496 # First a simple non-multipart message
2497 msg = self._msgobj('msg_01.txt')
2498 it = iterators.body_line_iterator(msg)
2499 lines = list(it)
2500 eq(len(lines), 6)
2501 neq(EMPTYSTRING.join(lines), msg.get_payload())
2502 # Now a more complicated multipart
2503 msg = self._msgobj('msg_02.txt')
2504 it = iterators.body_line_iterator(msg)
2505 lines = list(it)
2506 eq(len(lines), 43)
2507 with openfile('msg_19.txt') as fp:
2508 neq(EMPTYSTRING.join(lines), fp.read())
2509
2510 def test_typed_subpart_iterator(self):
2511 eq = self.assertEqual
2512 msg = self._msgobj('msg_04.txt')
2513 it = iterators.typed_subpart_iterator(msg, 'text')
2514 lines = []
2515 subparts = 0
2516 for subpart in it:
2517 subparts += 1
2518 lines.append(subpart.get_payload())
2519 eq(subparts, 2)
2520 eq(EMPTYSTRING.join(lines), """\
2521a simple kind of mirror
2522to reflect upon our own
2523a simple kind of mirror
2524to reflect upon our own
2525""")
2526
2527 def test_typed_subpart_iterator_default_type(self):
2528 eq = self.assertEqual
2529 msg = self._msgobj('msg_03.txt')
2530 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2531 lines = []
2532 subparts = 0
2533 for subpart in it:
2534 subparts += 1
2535 lines.append(subpart.get_payload())
2536 eq(subparts, 1)
2537 eq(EMPTYSTRING.join(lines), """\
2538
2539Hi,
2540
2541Do you like this message?
2542
2543-Me
2544""")
2545
R. David Murray6d4a06c2010-07-17 01:28:04 +00002546 def test_pushCR_LF(self):
2547 '''FeedParser BufferedSubFile.push() assumed it received complete
2548 line endings. A CR ending one push() followed by a LF starting
2549 the next push() added an empty line.
2550 '''
2551 imt = [
2552 ("a\r \n", 2),
2553 ("b", 0),
2554 ("c\n", 1),
2555 ("", 0),
2556 ("d\r\n", 1),
2557 ("e\r", 0),
2558 ("\nf", 1),
2559 ("\r\n", 1),
2560 ]
2561 from email.feedparser import BufferedSubFile, NeedMoreData
2562 bsf = BufferedSubFile()
2563 om = []
2564 nt = 0
2565 for il, n in imt:
2566 bsf.push(il)
2567 nt += n
2568 n1 = 0
2569 while True:
2570 ol = bsf.readline()
2571 if ol == NeedMoreData:
2572 break
2573 om.append(ol)
2574 n1 += 1
2575 self.assertTrue(n == n1)
2576 self.assertTrue(len(om) == nt)
2577 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2578
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002580
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002581class TestParsers(TestEmailBase):
2582 def test_header_parser(self):
2583 eq = self.assertEqual
2584 # Parse only the headers of a complex multipart MIME document
2585 with openfile('msg_02.txt') as fp:
2586 msg = HeaderParser().parse(fp)
2587 eq(msg['from'], 'ppp-request@zzz.org')
2588 eq(msg['to'], 'ppp@zzz.org')
2589 eq(msg.get_content_type(), 'multipart/mixed')
Georg Brandlab91fde2009-08-13 08:51:18 +00002590 self.assertFalse(msg.is_multipart())
2591 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002592
2593 def test_whitespace_continuation(self):
2594 eq = self.assertEqual
2595 # This message contains a line after the Subject: header that has only
2596 # whitespace, but it is not empty!
2597 msg = email.message_from_string("""\
2598From: aperson@dom.ain
2599To: bperson@dom.ain
2600Subject: the next line has a space on it
2601\x20
2602Date: Mon, 8 Apr 2002 15:09:19 -0400
2603Message-ID: spam
2604
2605Here's the message body
2606""")
2607 eq(msg['subject'], 'the next line has a space on it\n ')
2608 eq(msg['message-id'], 'spam')
2609 eq(msg.get_payload(), "Here's the message body\n")
2610
2611 def test_whitespace_continuation_last_header(self):
2612 eq = self.assertEqual
2613 # Like the previous test, but the subject line is the last
2614 # header.
2615 msg = email.message_from_string("""\
2616From: aperson@dom.ain
2617To: bperson@dom.ain
2618Date: Mon, 8 Apr 2002 15:09:19 -0400
2619Message-ID: spam
2620Subject: the next line has a space on it
2621\x20
2622
2623Here's the message body
2624""")
2625 eq(msg['subject'], 'the next line has a space on it\n ')
2626 eq(msg['message-id'], 'spam')
2627 eq(msg.get_payload(), "Here's the message body\n")
2628
2629 def test_crlf_separation(self):
2630 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002631 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002632 msg = Parser().parse(fp)
2633 eq(len(msg.get_payload()), 2)
2634 part1 = msg.get_payload(0)
2635 eq(part1.get_content_type(), 'text/plain')
2636 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2637 part2 = msg.get_payload(1)
2638 eq(part2.get_content_type(), 'application/riscos')
2639
2640 def test_multipart_digest_with_extra_mime_headers(self):
2641 eq = self.assertEqual
2642 neq = self.ndiffAssertEqual
2643 with openfile('msg_28.txt') as fp:
2644 msg = email.message_from_file(fp)
2645 # Structure is:
2646 # multipart/digest
2647 # message/rfc822
2648 # text/plain
2649 # message/rfc822
2650 # text/plain
2651 eq(msg.is_multipart(), 1)
2652 eq(len(msg.get_payload()), 2)
2653 part1 = msg.get_payload(0)
2654 eq(part1.get_content_type(), 'message/rfc822')
2655 eq(part1.is_multipart(), 1)
2656 eq(len(part1.get_payload()), 1)
2657 part1a = part1.get_payload(0)
2658 eq(part1a.is_multipart(), 0)
2659 eq(part1a.get_content_type(), 'text/plain')
2660 neq(part1a.get_payload(), 'message 1\n')
2661 # next message/rfc822
2662 part2 = msg.get_payload(1)
2663 eq(part2.get_content_type(), 'message/rfc822')
2664 eq(part2.is_multipart(), 1)
2665 eq(len(part2.get_payload()), 1)
2666 part2a = part2.get_payload(0)
2667 eq(part2a.is_multipart(), 0)
2668 eq(part2a.get_content_type(), 'text/plain')
2669 neq(part2a.get_payload(), 'message 2\n')
2670
2671 def test_three_lines(self):
2672 # A bug report by Andrew McNamara
2673 lines = ['From: Andrew Person <aperson@dom.ain',
2674 'Subject: Test',
2675 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2676 msg = email.message_from_string(NL.join(lines))
2677 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2678
2679 def test_strip_line_feed_and_carriage_return_in_headers(self):
2680 eq = self.assertEqual
2681 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2682 value1 = 'text'
2683 value2 = 'more text'
2684 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2685 value1, value2)
2686 msg = email.message_from_string(m)
2687 eq(msg.get('Header'), value1)
2688 eq(msg.get('Next-Header'), value2)
2689
2690 def test_rfc2822_header_syntax(self):
2691 eq = self.assertEqual
2692 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2693 msg = email.message_from_string(m)
2694 eq(len(msg), 3)
2695 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2696 eq(msg.get_payload(), 'body')
2697
2698 def test_rfc2822_space_not_allowed_in_header(self):
2699 eq = self.assertEqual
2700 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2701 msg = email.message_from_string(m)
2702 eq(len(msg.keys()), 0)
2703
2704 def test_rfc2822_one_character_header(self):
2705 eq = self.assertEqual
2706 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2707 msg = email.message_from_string(m)
2708 headers = msg.keys()
2709 headers.sort()
2710 eq(headers, ['A', 'B', 'CC'])
2711 eq(msg.get_payload(), 'body')
2712
R. David Murray71df9d92010-06-16 02:22:56 +00002713 def test_CRLFLF_at_end_of_part(self):
2714 # issue 5610: feedparser should not eat two chars from body part ending
2715 # with "\r\n\n".
2716 m = (
2717 "From: foo@bar.com\n"
2718 "To: baz\n"
2719 "Mime-Version: 1.0\n"
2720 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2721 "\n"
2722 "--BOUNDARY\n"
2723 "Content-Type: text/plain\n"
2724 "\n"
2725 "body ending with CRLF newline\r\n"
2726 "\n"
2727 "--BOUNDARY--\n"
2728 )
2729 msg = email.message_from_string(m)
2730 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002731
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002732
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002733class TestBase64(unittest.TestCase):
2734 def test_len(self):
2735 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002736 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002737 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002738 for size in range(15):
2739 if size == 0 : bsize = 0
2740 elif size <= 3 : bsize = 4
2741 elif size <= 6 : bsize = 8
2742 elif size <= 9 : bsize = 12
2743 elif size <= 12: bsize = 16
2744 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002745 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002746
2747 def test_decode(self):
2748 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002749 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002750 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002751
2752 def test_encode(self):
2753 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002754 eq(base64mime.body_encode(b''), b'')
2755 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002756 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002757 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002758 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002759 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002760eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2761eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2762eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2763eHh4eCB4eHh4IA==
2764""")
2765 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002766 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002767 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002768eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2769eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2770eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2771eHh4eCB4eHh4IA==\r
2772""")
2773
2774 def test_header_encode(self):
2775 eq = self.assertEqual
2776 he = base64mime.header_encode
2777 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002778 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2779 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002780 # Test the charset option
2781 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2782 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002783
2784
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002785
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002786class TestQuopri(unittest.TestCase):
2787 def setUp(self):
2788 # Set of characters (as byte integers) that don't need to be encoded
2789 # in headers.
2790 self.hlit = list(chain(
2791 range(ord('a'), ord('z') + 1),
2792 range(ord('A'), ord('Z') + 1),
2793 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002794 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002795 # Set of characters (as byte integers) that do need to be encoded in
2796 # headers.
2797 self.hnon = [c for c in range(256) if c not in self.hlit]
2798 assert len(self.hlit) + len(self.hnon) == 256
2799 # Set of characters (as byte integers) that don't need to be encoded
2800 # in bodies.
2801 self.blit = list(range(ord(' '), ord('~') + 1))
2802 self.blit.append(ord('\t'))
2803 self.blit.remove(ord('='))
2804 # Set of characters (as byte integers) that do need to be encoded in
2805 # bodies.
2806 self.bnon = [c for c in range(256) if c not in self.blit]
2807 assert len(self.blit) + len(self.bnon) == 256
2808
Guido van Rossum9604e662007-08-30 03:46:43 +00002809 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002810 for c in self.hlit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002811 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002812 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002813 for c in self.hnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002814 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002815 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002816
Guido van Rossum9604e662007-08-30 03:46:43 +00002817 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002818 for c in self.blit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002819 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002820 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002821 for c in self.bnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002822 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002823 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002824
2825 def test_header_quopri_len(self):
2826 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002827 eq(quoprimime.header_length(b'hello'), 5)
2828 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002829 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002830 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002831 # =?xxx?q?...?= means 10 extra characters
2832 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002833 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2834 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002835 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002836 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002837 # =?xxx?q?...?= means 10 extra characters
2838 10)
2839 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002840 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002841 'expected length 1 for %r' % chr(c))
2842 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002843 # Space is special; it's encoded to _
2844 if c == ord(' '):
2845 continue
2846 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002847 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002848 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002849
2850 def test_body_quopri_len(self):
2851 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002852 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002853 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002854 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002855 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002856
2857 def test_quote_unquote_idempotent(self):
2858 for x in range(256):
2859 c = chr(x)
2860 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2861
2862 def test_header_encode(self):
2863 eq = self.assertEqual
2864 he = quoprimime.header_encode
2865 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2866 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2867 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2868 # Test a non-ASCII character
2869 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2870
2871 def test_decode(self):
2872 eq = self.assertEqual
2873 eq(quoprimime.decode(''), '')
2874 eq(quoprimime.decode('hello'), 'hello')
2875 eq(quoprimime.decode('hello', 'X'), 'hello')
2876 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2877
2878 def test_encode(self):
2879 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002880 eq(quoprimime.body_encode(''), '')
2881 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002882 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002883 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002884 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002885 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002886xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2887 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2888x xxxx xxxx xxxx xxxx=20""")
2889 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002890 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2891 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002892xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2893 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2894x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002895 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002896one line
2897
2898two line"""), """\
2899one line
2900
2901two line""")
2902
2903
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002904
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002905# Test the Charset class
2906class TestCharset(unittest.TestCase):
2907 def tearDown(self):
2908 from email import charset as CharsetModule
2909 try:
2910 del CharsetModule.CHARSETS['fake']
2911 except KeyError:
2912 pass
2913
Guido van Rossum9604e662007-08-30 03:46:43 +00002914 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002915 eq = self.assertEqual
2916 # Make sure us-ascii = no Unicode conversion
2917 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002918 eq(c.header_encode('Hello World!'), 'Hello World!')
2919 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002920 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002921 self.assertRaises(UnicodeError, c.header_encode, s)
2922 c = Charset('utf-8')
2923 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002924
2925 def test_body_encode(self):
2926 eq = self.assertEqual
2927 # Try a charset with QP body encoding
2928 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002929 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002930 # Try a charset with Base64 body encoding
2931 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002932 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002933 # Try a charset with None body encoding
2934 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002935 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002936 # Try the convert argument, where input codec != output codec
2937 c = Charset('euc-jp')
2938 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002939 # XXX FIXME
2940## try:
2941## eq('\x1b$B5FCO;~IW\x1b(B',
2942## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2943## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2944## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2945## except LookupError:
2946## # We probably don't have the Japanese codecs installed
2947## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002948 # Testing SF bug #625509, which we have to fake, since there are no
2949 # built-in encodings where the header encoding is QP but the body
2950 # encoding is not.
2951 from email import charset as CharsetModule
2952 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2953 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002954 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002955
2956 def test_unicode_charset_name(self):
2957 charset = Charset('us-ascii')
2958 self.assertEqual(str(charset), 'us-ascii')
2959 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2960
2961
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002962
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002963# Test multilingual MIME headers.
2964class TestHeader(TestEmailBase):
2965 def test_simple(self):
2966 eq = self.ndiffAssertEqual
2967 h = Header('Hello World!')
2968 eq(h.encode(), 'Hello World!')
2969 h.append(' Goodbye World!')
2970 eq(h.encode(), 'Hello World! Goodbye World!')
2971
2972 def test_simple_surprise(self):
2973 eq = self.ndiffAssertEqual
2974 h = Header('Hello World!')
2975 eq(h.encode(), 'Hello World!')
2976 h.append('Goodbye World!')
2977 eq(h.encode(), 'Hello World! Goodbye World!')
2978
2979 def test_header_needs_no_decoding(self):
2980 h = 'no decoding needed'
2981 self.assertEqual(decode_header(h), [(h, None)])
2982
2983 def test_long(self):
2984 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2985 maxlinelen=76)
2986 for l in h.encode(splitchars=' ').split('\n '):
Georg Brandlab91fde2009-08-13 08:51:18 +00002987 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002988
2989 def test_multilingual(self):
2990 eq = self.ndiffAssertEqual
2991 g = Charset("iso-8859-1")
2992 cz = Charset("iso-8859-2")
2993 utf8 = Charset("utf-8")
2994 g_head = (b'Die Mieter treten hier ein werden mit einem '
2995 b'Foerderband komfortabel den Korridor entlang, '
2996 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2997 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2998 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2999 b'd\xf9vtipu.. ')
3000 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3001 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3002 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3003 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3004 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3005 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3006 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3007 '\u3044\u307e\u3059\u3002')
3008 h = Header(g_head, g)
3009 h.append(cz_head, cz)
3010 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003011 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003012 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003013=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3014 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3015 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3016 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003017 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3018 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3019 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3020 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003021 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3022 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3023 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3024 decoded = decode_header(enc)
3025 eq(len(decoded), 3)
3026 eq(decoded[0], (g_head, 'iso-8859-1'))
3027 eq(decoded[1], (cz_head, 'iso-8859-2'))
3028 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003029 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003030 eq(ustr,
3031 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3032 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3033 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3034 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3035 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3036 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3037 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3038 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3039 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3040 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3041 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3042 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3043 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3044 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3045 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3046 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3047 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003048 # Test make_header()
3049 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003050 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003051
3052 def test_empty_header_encode(self):
3053 h = Header()
3054 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003056 def test_header_ctor_default_args(self):
3057 eq = self.ndiffAssertEqual
3058 h = Header()
3059 eq(h, '')
3060 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003061 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003062
3063 def test_explicit_maxlinelen(self):
3064 eq = self.ndiffAssertEqual
3065 hstr = ('A very long line that must get split to something other '
3066 'than at the 76th character boundary to test the non-default '
3067 'behavior')
3068 h = Header(hstr)
3069 eq(h.encode(), '''\
3070A very long line that must get split to something other than at the 76th
3071 character boundary to test the non-default behavior''')
3072 eq(str(h), hstr)
3073 h = Header(hstr, header_name='Subject')
3074 eq(h.encode(), '''\
3075A very long line that must get split to something other than at the
3076 76th character boundary to test the non-default behavior''')
3077 eq(str(h), hstr)
3078 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3079 eq(h.encode(), hstr)
3080 eq(str(h), hstr)
3081
Guido van Rossum9604e662007-08-30 03:46:43 +00003082 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083 eq = self.ndiffAssertEqual
3084 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003085 x = 'xxxx ' * 20
3086 h.append(x)
3087 s = h.encode()
3088 eq(s, """\
3089=?iso-8859-1?q?xxx?=
3090 =?iso-8859-1?q?x_?=
3091 =?iso-8859-1?q?xx?=
3092 =?iso-8859-1?q?xx?=
3093 =?iso-8859-1?q?_x?=
3094 =?iso-8859-1?q?xx?=
3095 =?iso-8859-1?q?x_?=
3096 =?iso-8859-1?q?xx?=
3097 =?iso-8859-1?q?xx?=
3098 =?iso-8859-1?q?_x?=
3099 =?iso-8859-1?q?xx?=
3100 =?iso-8859-1?q?x_?=
3101 =?iso-8859-1?q?xx?=
3102 =?iso-8859-1?q?xx?=
3103 =?iso-8859-1?q?_x?=
3104 =?iso-8859-1?q?xx?=
3105 =?iso-8859-1?q?x_?=
3106 =?iso-8859-1?q?xx?=
3107 =?iso-8859-1?q?xx?=
3108 =?iso-8859-1?q?_x?=
3109 =?iso-8859-1?q?xx?=
3110 =?iso-8859-1?q?x_?=
3111 =?iso-8859-1?q?xx?=
3112 =?iso-8859-1?q?xx?=
3113 =?iso-8859-1?q?_x?=
3114 =?iso-8859-1?q?xx?=
3115 =?iso-8859-1?q?x_?=
3116 =?iso-8859-1?q?xx?=
3117 =?iso-8859-1?q?xx?=
3118 =?iso-8859-1?q?_x?=
3119 =?iso-8859-1?q?xx?=
3120 =?iso-8859-1?q?x_?=
3121 =?iso-8859-1?q?xx?=
3122 =?iso-8859-1?q?xx?=
3123 =?iso-8859-1?q?_x?=
3124 =?iso-8859-1?q?xx?=
3125 =?iso-8859-1?q?x_?=
3126 =?iso-8859-1?q?xx?=
3127 =?iso-8859-1?q?xx?=
3128 =?iso-8859-1?q?_x?=
3129 =?iso-8859-1?q?xx?=
3130 =?iso-8859-1?q?x_?=
3131 =?iso-8859-1?q?xx?=
3132 =?iso-8859-1?q?xx?=
3133 =?iso-8859-1?q?_x?=
3134 =?iso-8859-1?q?xx?=
3135 =?iso-8859-1?q?x_?=
3136 =?iso-8859-1?q?xx?=
3137 =?iso-8859-1?q?xx?=
3138 =?iso-8859-1?q?_?=""")
3139 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003140 h = Header(charset='iso-8859-1', maxlinelen=40)
3141 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003142 s = h.encode()
3143 eq(s, """\
3144=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3145 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3146 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3147 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3148 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3149 eq(x, str(make_header(decode_header(s))))
3150
3151 def test_base64_splittable(self):
3152 eq = self.ndiffAssertEqual
3153 h = Header(charset='koi8-r', maxlinelen=20)
3154 x = 'xxxx ' * 20
3155 h.append(x)
3156 s = h.encode()
3157 eq(s, """\
3158=?koi8-r?b?eHh4?=
3159 =?koi8-r?b?eCB4?=
3160 =?koi8-r?b?eHh4?=
3161 =?koi8-r?b?IHh4?=
3162 =?koi8-r?b?eHgg?=
3163 =?koi8-r?b?eHh4?=
3164 =?koi8-r?b?eCB4?=
3165 =?koi8-r?b?eHh4?=
3166 =?koi8-r?b?IHh4?=
3167 =?koi8-r?b?eHgg?=
3168 =?koi8-r?b?eHh4?=
3169 =?koi8-r?b?eCB4?=
3170 =?koi8-r?b?eHh4?=
3171 =?koi8-r?b?IHh4?=
3172 =?koi8-r?b?eHgg?=
3173 =?koi8-r?b?eHh4?=
3174 =?koi8-r?b?eCB4?=
3175 =?koi8-r?b?eHh4?=
3176 =?koi8-r?b?IHh4?=
3177 =?koi8-r?b?eHgg?=
3178 =?koi8-r?b?eHh4?=
3179 =?koi8-r?b?eCB4?=
3180 =?koi8-r?b?eHh4?=
3181 =?koi8-r?b?IHh4?=
3182 =?koi8-r?b?eHgg?=
3183 =?koi8-r?b?eHh4?=
3184 =?koi8-r?b?eCB4?=
3185 =?koi8-r?b?eHh4?=
3186 =?koi8-r?b?IHh4?=
3187 =?koi8-r?b?eHgg?=
3188 =?koi8-r?b?eHh4?=
3189 =?koi8-r?b?eCB4?=
3190 =?koi8-r?b?eHh4?=
3191 =?koi8-r?b?IA==?=""")
3192 eq(x, str(make_header(decode_header(s))))
3193 h = Header(charset='koi8-r', maxlinelen=40)
3194 h.append(x)
3195 s = h.encode()
3196 eq(s, """\
3197=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3198 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3199 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3200 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3201 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3202 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3203 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003204
3205 def test_us_ascii_header(self):
3206 eq = self.assertEqual
3207 s = 'hello'
3208 x = decode_header(s)
3209 eq(x, [('hello', None)])
3210 h = make_header(x)
3211 eq(s, h.encode())
3212
3213 def test_string_charset(self):
3214 eq = self.assertEqual
3215 h = Header()
3216 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003217 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003218
3219## def test_unicode_error(self):
3220## raises = self.assertRaises
3221## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3222## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3223## h = Header()
3224## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3225## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3226## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3227
3228 def test_utf8_shortest(self):
3229 eq = self.assertEqual
3230 h = Header('p\xf6stal', 'utf-8')
3231 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3232 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3233 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3234
3235 def test_bad_8bit_header(self):
3236 raises = self.assertRaises
3237 eq = self.assertEqual
3238 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3239 raises(UnicodeError, Header, x)
3240 h = Header()
3241 raises(UnicodeError, h.append, x)
3242 e = x.decode('utf-8', 'replace')
3243 eq(str(Header(x, errors='replace')), e)
3244 h.append(x, errors='replace')
3245 eq(str(h), e)
3246
3247 def test_encoded_adjacent_nonencoded(self):
3248 eq = self.assertEqual
3249 h = Header()
3250 h.append('hello', 'iso-8859-1')
3251 h.append('world')
3252 s = h.encode()
3253 eq(s, '=?iso-8859-1?q?hello?= world')
3254 h = make_header(decode_header(s))
3255 eq(h.encode(), s)
3256
3257 def test_whitespace_eater(self):
3258 eq = self.assertEqual
3259 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3260 parts = decode_header(s)
3261 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3262 hdr = make_header(parts)
3263 eq(hdr.encode(),
3264 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3265
3266 def test_broken_base64_header(self):
3267 raises = self.assertRaises
R. David Murraye06528c2010-08-03 23:35:44 +00003268 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003269 raises(errors.HeaderParseError, decode_header, s)
3270
R. David Murrayf9844c82011-01-05 01:47:38 +00003271 def test_shift_jis_charset(self):
3272 h = Header('æ–‡', charset='shift_jis')
3273 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3274
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003275
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003276
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003277# Test RFC 2231 header parameters (en/de)coding
3278class TestRFC2231(TestEmailBase):
3279 def test_get_param(self):
3280 eq = self.assertEqual
3281 msg = self._msgobj('msg_29.txt')
3282 eq(msg.get_param('title'),
3283 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3284 eq(msg.get_param('title', unquote=False),
3285 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3286
3287 def test_set_param(self):
3288 eq = self.ndiffAssertEqual
3289 msg = Message()
3290 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3291 charset='us-ascii')
3292 eq(msg.get_param('title'),
3293 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3294 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3295 charset='us-ascii', language='en')
3296 eq(msg.get_param('title'),
3297 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3298 msg = self._msgobj('msg_01.txt')
3299 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3300 charset='us-ascii', language='en')
3301 eq(msg.as_string(maxheaderlen=78), """\
3302Return-Path: <bbb@zzz.org>
3303Delivered-To: bbb@zzz.org
3304Received: by mail.zzz.org (Postfix, from userid 889)
3305\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3306MIME-Version: 1.0
3307Content-Transfer-Encoding: 7bit
3308Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3309From: bbb@ddd.com (John X. Doe)
3310To: bbb@zzz.org
3311Subject: This is a test message
3312Date: Fri, 4 May 2001 14:05:44 -0400
3313Content-Type: text/plain; charset=us-ascii;
3314 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3315
3316
3317Hi,
3318
3319Do you like this message?
3320
3321-Me
3322""")
3323
3324 def test_del_param(self):
3325 eq = self.ndiffAssertEqual
3326 msg = self._msgobj('msg_01.txt')
3327 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3328 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3329 charset='us-ascii', language='en')
3330 msg.del_param('foo', header='Content-Type')
3331 eq(msg.as_string(maxheaderlen=78), """\
3332Return-Path: <bbb@zzz.org>
3333Delivered-To: bbb@zzz.org
3334Received: by mail.zzz.org (Postfix, from userid 889)
3335\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3336MIME-Version: 1.0
3337Content-Transfer-Encoding: 7bit
3338Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3339From: bbb@ddd.com (John X. Doe)
3340To: bbb@zzz.org
3341Subject: This is a test message
3342Date: Fri, 4 May 2001 14:05:44 -0400
3343Content-Type: text/plain; charset="us-ascii";
3344 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3345
3346
3347Hi,
3348
3349Do you like this message?
3350
3351-Me
3352""")
3353
3354 def test_rfc2231_get_content_charset(self):
3355 eq = self.assertEqual
3356 msg = self._msgobj('msg_32.txt')
3357 eq(msg.get_content_charset(), 'us-ascii')
3358
3359 def test_rfc2231_no_language_or_charset(self):
3360 m = '''\
3361Content-Transfer-Encoding: 8bit
3362Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3363Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3364
3365'''
3366 msg = email.message_from_string(m)
3367 param = msg.get_param('NAME')
Georg Brandlab91fde2009-08-13 08:51:18 +00003368 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003369 self.assertEqual(
3370 param,
3371 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3372
3373 def test_rfc2231_no_language_or_charset_in_filename(self):
3374 m = '''\
3375Content-Disposition: inline;
3376\tfilename*0*="''This%20is%20even%20more%20";
3377\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3378\tfilename*2="is it not.pdf"
3379
3380'''
3381 msg = email.message_from_string(m)
3382 self.assertEqual(msg.get_filename(),
3383 'This is even more ***fun*** is it not.pdf')
3384
3385 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3386 m = '''\
3387Content-Disposition: inline;
3388\tfilename*0*="''This%20is%20even%20more%20";
3389\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3390\tfilename*2="is it not.pdf"
3391
3392'''
3393 msg = email.message_from_string(m)
3394 self.assertEqual(msg.get_filename(),
3395 'This is even more ***fun*** is it not.pdf')
3396
3397 def test_rfc2231_partly_encoded(self):
3398 m = '''\
3399Content-Disposition: inline;
3400\tfilename*0="''This%20is%20even%20more%20";
3401\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3402\tfilename*2="is it not.pdf"
3403
3404'''
3405 msg = email.message_from_string(m)
3406 self.assertEqual(
3407 msg.get_filename(),
3408 'This%20is%20even%20more%20***fun*** is it not.pdf')
3409
3410 def test_rfc2231_partly_nonencoded(self):
3411 m = '''\
3412Content-Disposition: inline;
3413\tfilename*0="This%20is%20even%20more%20";
3414\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3415\tfilename*2="is it not.pdf"
3416
3417'''
3418 msg = email.message_from_string(m)
3419 self.assertEqual(
3420 msg.get_filename(),
3421 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3422
3423 def test_rfc2231_no_language_or_charset_in_boundary(self):
3424 m = '''\
3425Content-Type: multipart/alternative;
3426\tboundary*0*="''This%20is%20even%20more%20";
3427\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3428\tboundary*2="is it not.pdf"
3429
3430'''
3431 msg = email.message_from_string(m)
3432 self.assertEqual(msg.get_boundary(),
3433 'This is even more ***fun*** is it not.pdf')
3434
3435 def test_rfc2231_no_language_or_charset_in_charset(self):
3436 # This is a nonsensical charset value, but tests the code anyway
3437 m = '''\
3438Content-Type: text/plain;
3439\tcharset*0*="This%20is%20even%20more%20";
3440\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3441\tcharset*2="is it not.pdf"
3442
3443'''
3444 msg = email.message_from_string(m)
3445 self.assertEqual(msg.get_content_charset(),
3446 'this is even more ***fun*** is it not.pdf')
3447
3448 def test_rfc2231_bad_encoding_in_filename(self):
3449 m = '''\
3450Content-Disposition: inline;
3451\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3452\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3453\tfilename*2="is it not.pdf"
3454
3455'''
3456 msg = email.message_from_string(m)
3457 self.assertEqual(msg.get_filename(),
3458 'This is even more ***fun*** is it not.pdf')
3459
3460 def test_rfc2231_bad_encoding_in_charset(self):
3461 m = """\
3462Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3463
3464"""
3465 msg = email.message_from_string(m)
3466 # This should return None because non-ascii characters in the charset
3467 # are not allowed.
3468 self.assertEqual(msg.get_content_charset(), None)
3469
3470 def test_rfc2231_bad_character_in_charset(self):
3471 m = """\
3472Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3473
3474"""
3475 msg = email.message_from_string(m)
3476 # This should return None because non-ascii characters in the charset
3477 # are not allowed.
3478 self.assertEqual(msg.get_content_charset(), None)
3479
3480 def test_rfc2231_bad_character_in_filename(self):
3481 m = '''\
3482Content-Disposition: inline;
3483\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3484\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3485\tfilename*2*="is it not.pdf%E2"
3486
3487'''
3488 msg = email.message_from_string(m)
3489 self.assertEqual(msg.get_filename(),
3490 'This is even more ***fun*** is it not.pdf\ufffd')
3491
3492 def test_rfc2231_unknown_encoding(self):
3493 m = """\
3494Content-Transfer-Encoding: 8bit
3495Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3496
3497"""
3498 msg = email.message_from_string(m)
3499 self.assertEqual(msg.get_filename(), 'myfile.txt')
3500
3501 def test_rfc2231_single_tick_in_filename_extended(self):
3502 eq = self.assertEqual
3503 m = """\
3504Content-Type: application/x-foo;
3505\tname*0*=\"Frank's\"; name*1*=\" Document\"
3506
3507"""
3508 msg = email.message_from_string(m)
3509 charset, language, s = msg.get_param('name')
3510 eq(charset, None)
3511 eq(language, None)
3512 eq(s, "Frank's Document")
3513
3514 def test_rfc2231_single_tick_in_filename(self):
3515 m = """\
3516Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3517
3518"""
3519 msg = email.message_from_string(m)
3520 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003521 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003522 self.assertEqual(param, "Frank's Document")
3523
3524 def test_rfc2231_tick_attack_extended(self):
3525 eq = self.assertEqual
3526 m = """\
3527Content-Type: application/x-foo;
3528\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3529
3530"""
3531 msg = email.message_from_string(m)
3532 charset, language, s = msg.get_param('name')
3533 eq(charset, 'us-ascii')
3534 eq(language, 'en-us')
3535 eq(s, "Frank's Document")
3536
3537 def test_rfc2231_tick_attack(self):
3538 m = """\
3539Content-Type: application/x-foo;
3540\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3541
3542"""
3543 msg = email.message_from_string(m)
3544 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003545 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003546 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3547
3548 def test_rfc2231_no_extended_values(self):
3549 eq = self.assertEqual
3550 m = """\
3551Content-Type: application/x-foo; name=\"Frank's Document\"
3552
3553"""
3554 msg = email.message_from_string(m)
3555 eq(msg.get_param('name'), "Frank's Document")
3556
3557 def test_rfc2231_encoded_then_unencoded_segments(self):
3558 eq = self.assertEqual
3559 m = """\
3560Content-Type: application/x-foo;
3561\tname*0*=\"us-ascii'en-us'My\";
3562\tname*1=\" Document\";
3563\tname*2*=\" For You\"
3564
3565"""
3566 msg = email.message_from_string(m)
3567 charset, language, s = msg.get_param('name')
3568 eq(charset, 'us-ascii')
3569 eq(language, 'en-us')
3570 eq(s, 'My Document For You')
3571
3572 def test_rfc2231_unencoded_then_encoded_segments(self):
3573 eq = self.assertEqual
3574 m = """\
3575Content-Type: application/x-foo;
3576\tname*0=\"us-ascii'en-us'My\";
3577\tname*1*=\" Document\";
3578\tname*2*=\" For You\"
3579
3580"""
3581 msg = email.message_from_string(m)
3582 charset, language, s = msg.get_param('name')
3583 eq(charset, 'us-ascii')
3584 eq(language, 'en-us')
3585 eq(s, 'My Document For You')
3586
3587
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003588
R. David Murrayfa606922010-01-16 18:41:00 +00003589# Tests to ensure that signed parts of an email are completely preserved, as
3590# required by RFC1847 section 2.1. Note that these are incomplete, because the
3591# email package does not currently always preserve the body. See issue 1670765.
3592class TestSigned(TestEmailBase):
3593
3594 def _msg_and_obj(self, filename):
3595 with openfile(findfile(filename)) as fp:
3596 original = fp.read()
3597 msg = email.message_from_string(original)
3598 return original, msg
3599
3600 def _signed_parts_eq(self, original, result):
3601 # Extract the first mime part of each message
3602 import re
3603 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3604 inpart = repart.search(original).group(2)
3605 outpart = repart.search(result).group(2)
3606 self.assertEqual(outpart, inpart)
3607
3608 def test_long_headers_as_string(self):
3609 original, msg = self._msg_and_obj('msg_45.txt')
3610 result = msg.as_string()
3611 self._signed_parts_eq(original, result)
3612
3613 def test_long_headers_as_string_maxheaderlen(self):
3614 original, msg = self._msg_and_obj('msg_45.txt')
3615 result = msg.as_string(maxheaderlen=60)
3616 self._signed_parts_eq(original, result)
3617
3618 def test_long_headers_flatten(self):
3619 original, msg = self._msg_and_obj('msg_45.txt')
3620 fp = StringIO()
3621 Generator(fp).flatten(msg)
3622 result = fp.getvalue()
3623 self._signed_parts_eq(original, result)
3624
3625
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003626
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003627def _testclasses():
3628 mod = sys.modules[__name__]
3629 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3630
3631
3632def suite():
3633 suite = unittest.TestSuite()
3634 for testclass in _testclasses():
3635 suite.addTest(unittest.makeSuite(testclass))
3636 return suite
3637
3638
3639def test_main():
3640 for testclass in _testclasses():
3641 run_unittest(testclass)
3642
3643
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003644
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003645if __name__ == '__main__':
3646 unittest.main(defaultTest='suite')