blob: c9903ecac845c4d817c1fae08b9f8b09b8a506e5 [file] [log] [blame]
Benjamin Petersonffeda292010-01-09 18:48:46 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000046
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000052
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Georg Brandlab91fde2009-08-13 08:51:18 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000070
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murray27c19142010-12-21 18:11:01 +0000181 def test_make_boundary(self):
182 msg = MIMEMultipart('form-data')
183 # Note that when the boundary gets created is an implementation
184 # detail and might change.
185 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
186 # Trigger creation of boundary
187 msg.as_string()
188 self.assertEqual(msg.items()[0][1][:33],
189 'multipart/form-data; boundary="==')
190 # XXX: there ought to be tests of the uniqueness of the boundary, too.
191
R. David Murrayd0a04ff2010-02-21 04:48:18 +0000192 def test_message_rfc822_only(self):
193 # Issue 7970: message/rfc822 not in multipart parsed by
194 # HeaderParser caused an exception when flattened.
195 fp = openfile(findfile('msg_46.txt'))
196 msgdata = fp.read()
197 parser = HeaderParser()
198 msg = parser.parsestr(msgdata)
199 out = StringIO()
200 gen = Generator(out, True, 0)
201 gen.flatten(msg, False)
202 self.assertEqual(out.getvalue(), msgdata)
203
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000204 def test_get_decoded_payload(self):
205 eq = self.assertEqual
206 msg = self._msgobj('msg_10.txt')
207 # The outer message is a multipart
208 eq(msg.get_payload(decode=True), None)
209 # Subpart 1 is 7bit encoded
210 eq(msg.get_payload(0).get_payload(decode=True),
211 b'This is a 7bit encoded message.\n')
212 # Subpart 2 is quopri
213 eq(msg.get_payload(1).get_payload(decode=True),
214 b'\xa1This is a Quoted Printable encoded message!\n')
215 # Subpart 3 is base64
216 eq(msg.get_payload(2).get_payload(decode=True),
217 b'This is a Base64 encoded message.')
218 # Subpart 4 has no Content-Transfer-Encoding: header.
219 eq(msg.get_payload(3).get_payload(decode=True),
220 b'This has no Content-Transfer-Encoding: header.\n')
221
222 def test_get_decoded_uu_payload(self):
223 eq = self.assertEqual
224 msg = Message()
225 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
226 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
227 msg['content-transfer-encoding'] = cte
228 eq(msg.get_payload(decode=True), b'hello world')
229 # Now try some bogus data
230 msg.set_payload('foo')
231 eq(msg.get_payload(decode=True), b'foo')
232
233 def test_decoded_generator(self):
234 eq = self.assertEqual
235 msg = self._msgobj('msg_07.txt')
236 with openfile('msg_17.txt') as fp:
237 text = fp.read()
238 s = StringIO()
239 g = DecodedGenerator(s)
240 g.flatten(msg)
241 eq(s.getvalue(), text)
242
243 def test__contains__(self):
244 msg = Message()
245 msg['From'] = 'Me'
246 msg['to'] = 'You'
247 # Check for case insensitivity
Georg Brandlab91fde2009-08-13 08:51:18 +0000248 self.assertTrue('from' in msg)
249 self.assertTrue('From' in msg)
250 self.assertTrue('FROM' in msg)
251 self.assertTrue('to' in msg)
252 self.assertTrue('To' in msg)
253 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000254
255 def test_as_string(self):
256 eq = self.ndiffAssertEqual
257 msg = self._msgobj('msg_01.txt')
258 with openfile('msg_01.txt') as fp:
259 text = fp.read()
260 eq(text, str(msg))
261 fullrepr = msg.as_string(unixfrom=True)
262 lines = fullrepr.split('\n')
Georg Brandlab91fde2009-08-13 08:51:18 +0000263 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264 eq(text, NL.join(lines[1:]))
265
266 def test_bad_param(self):
267 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
268 self.assertEqual(msg.get_param('baz'), '')
269
270 def test_missing_filename(self):
271 msg = email.message_from_string("From: foo\n")
272 self.assertEqual(msg.get_filename(), None)
273
274 def test_bogus_filename(self):
275 msg = email.message_from_string(
276 "Content-Disposition: blarg; filename\n")
277 self.assertEqual(msg.get_filename(), '')
278
279 def test_missing_boundary(self):
280 msg = email.message_from_string("From: foo\n")
281 self.assertEqual(msg.get_boundary(), None)
282
283 def test_get_params(self):
284 eq = self.assertEqual
285 msg = email.message_from_string(
286 'X-Header: foo=one; bar=two; baz=three\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
289 msg = email.message_from_string(
290 'X-Header: foo; bar=one; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293 eq(msg.get_params(), None)
294 msg = email.message_from_string(
295 'X-Header: foo; bar="one"; baz=two\n')
296 eq(msg.get_params(header='x-header'),
297 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
298
299 def test_get_param_liberal(self):
300 msg = Message()
301 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
302 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
303
304 def test_get_param(self):
305 eq = self.assertEqual
306 msg = email.message_from_string(
307 "X-Header: foo=one; bar=two; baz=three\n")
308 eq(msg.get_param('bar', header='x-header'), 'two')
309 eq(msg.get_param('quuz', header='x-header'), None)
310 eq(msg.get_param('quuz'), None)
311 msg = email.message_from_string(
312 'X-Header: foo; bar="one"; baz=two\n')
313 eq(msg.get_param('foo', header='x-header'), '')
314 eq(msg.get_param('bar', header='x-header'), 'one')
315 eq(msg.get_param('baz', header='x-header'), 'two')
316 # XXX: We are not RFC-2045 compliant! We cannot parse:
317 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
318 # msg.get_param("weird")
319 # yet.
320
321 def test_get_param_funky_continuation_lines(self):
322 msg = self._msgobj('msg_22.txt')
323 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
324
325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R. David Murray84ee3102010-04-14 19:05:38 +0000332 def test_get_param_with_quotes(self):
333 msg = email.message_from_string(
334 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
335 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
336 msg = email.message_from_string(
337 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
338 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
339
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000340 def test_field_containment(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000341 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342 msg = email.message_from_string('Header: exists')
343 unless('header' in msg)
344 unless('Header' in msg)
345 unless('HEADER' in msg)
Georg Brandlab91fde2009-08-13 08:51:18 +0000346 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347
348 def test_set_param(self):
349 eq = self.assertEqual
350 msg = Message()
351 msg.set_param('charset', 'iso-2022-jp')
352 eq(msg.get_param('charset'), 'iso-2022-jp')
353 msg.set_param('importance', 'high value')
354 eq(msg.get_param('importance'), 'high value')
355 eq(msg.get_param('importance', unquote=False), '"high value"')
356 eq(msg.get_params(), [('text/plain', ''),
357 ('charset', 'iso-2022-jp'),
358 ('importance', 'high value')])
359 eq(msg.get_params(unquote=False), [('text/plain', ''),
360 ('charset', '"iso-2022-jp"'),
361 ('importance', '"high value"')])
362 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
363 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
364
365 def test_del_param(self):
366 eq = self.assertEqual
367 msg = self._msgobj('msg_05.txt')
368 eq(msg.get_params(),
369 [('multipart/report', ''), ('report-type', 'delivery-status'),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 old_val = msg.get_param("report-type")
372 msg.del_param("report-type")
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
376 msg.set_param("report-type", old_val)
377 eq(msg.get_params(),
378 [('multipart/report', ''),
379 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
380 ('report-type', old_val)])
381
382 def test_del_param_on_other_header(self):
383 msg = Message()
384 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
385 msg.del_param('filename', 'content-disposition')
386 self.assertEqual(msg['content-disposition'], 'attachment')
387
388 def test_set_type(self):
389 eq = self.assertEqual
390 msg = Message()
391 self.assertRaises(ValueError, msg.set_type, 'text')
392 msg.set_type('text/plain')
393 eq(msg['content-type'], 'text/plain')
394 msg.set_param('charset', 'us-ascii')
395 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
396 msg.set_type('text/html')
397 eq(msg['content-type'], 'text/html; charset="us-ascii"')
398
399 def test_set_type_on_other_header(self):
400 msg = Message()
401 msg['X-Content-Type'] = 'text/plain'
402 msg.set_type('application/octet-stream', 'X-Content-Type')
403 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
404
405 def test_get_content_type_missing(self):
406 msg = Message()
407 self.assertEqual(msg.get_content_type(), 'text/plain')
408
409 def test_get_content_type_missing_with_default_type(self):
410 msg = Message()
411 msg.set_default_type('message/rfc822')
412 self.assertEqual(msg.get_content_type(), 'message/rfc822')
413
414 def test_get_content_type_from_message_implicit(self):
415 msg = self._msgobj('msg_30.txt')
416 self.assertEqual(msg.get_payload(0).get_content_type(),
417 'message/rfc822')
418
419 def test_get_content_type_from_message_explicit(self):
420 msg = self._msgobj('msg_28.txt')
421 self.assertEqual(msg.get_payload(0).get_content_type(),
422 'message/rfc822')
423
424 def test_get_content_type_from_message_text_plain_implicit(self):
425 msg = self._msgobj('msg_03.txt')
426 self.assertEqual(msg.get_content_type(), 'text/plain')
427
428 def test_get_content_type_from_message_text_plain_explicit(self):
429 msg = self._msgobj('msg_01.txt')
430 self.assertEqual(msg.get_content_type(), 'text/plain')
431
432 def test_get_content_maintype_missing(self):
433 msg = Message()
434 self.assertEqual(msg.get_content_maintype(), 'text')
435
436 def test_get_content_maintype_missing_with_default_type(self):
437 msg = Message()
438 msg.set_default_type('message/rfc822')
439 self.assertEqual(msg.get_content_maintype(), 'message')
440
441 def test_get_content_maintype_from_message_implicit(self):
442 msg = self._msgobj('msg_30.txt')
443 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
444
445 def test_get_content_maintype_from_message_explicit(self):
446 msg = self._msgobj('msg_28.txt')
447 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
448
449 def test_get_content_maintype_from_message_text_plain_implicit(self):
450 msg = self._msgobj('msg_03.txt')
451 self.assertEqual(msg.get_content_maintype(), 'text')
452
453 def test_get_content_maintype_from_message_text_plain_explicit(self):
454 msg = self._msgobj('msg_01.txt')
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_subtype_missing(self):
458 msg = Message()
459 self.assertEqual(msg.get_content_subtype(), 'plain')
460
461 def test_get_content_subtype_missing_with_default_type(self):
462 msg = Message()
463 msg.set_default_type('message/rfc822')
464 self.assertEqual(msg.get_content_subtype(), 'rfc822')
465
466 def test_get_content_subtype_from_message_implicit(self):
467 msg = self._msgobj('msg_30.txt')
468 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
469
470 def test_get_content_subtype_from_message_explicit(self):
471 msg = self._msgobj('msg_28.txt')
472 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
473
474 def test_get_content_subtype_from_message_text_plain_implicit(self):
475 msg = self._msgobj('msg_03.txt')
476 self.assertEqual(msg.get_content_subtype(), 'plain')
477
478 def test_get_content_subtype_from_message_text_plain_explicit(self):
479 msg = self._msgobj('msg_01.txt')
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_maintype_error(self):
483 msg = Message()
484 msg['Content-Type'] = 'no-slash-in-this-string'
485 self.assertEqual(msg.get_content_maintype(), 'text')
486
487 def test_get_content_subtype_error(self):
488 msg = Message()
489 msg['Content-Type'] = 'no-slash-in-this-string'
490 self.assertEqual(msg.get_content_subtype(), 'plain')
491
492 def test_replace_header(self):
493 eq = self.assertEqual
494 msg = Message()
495 msg.add_header('First', 'One')
496 msg.add_header('Second', 'Two')
497 msg.add_header('Third', 'Three')
498 eq(msg.keys(), ['First', 'Second', 'Third'])
499 eq(msg.values(), ['One', 'Two', 'Three'])
500 msg.replace_header('Second', 'Twenty')
501 eq(msg.keys(), ['First', 'Second', 'Third'])
502 eq(msg.values(), ['One', 'Twenty', 'Three'])
503 msg.add_header('First', 'Eleven')
504 msg.replace_header('First', 'One Hundred')
505 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
506 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
507 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
508
509 def test_broken_base64_payload(self):
510 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
511 msg = Message()
512 msg['content-type'] = 'audio/x-midi'
513 msg['content-transfer-encoding'] = 'base64'
514 msg.set_payload(x)
515 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000516 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000517
R. David Murrayccb9d052010-12-13 23:57:01 +0000518 # Issue 1078919
519 def test_ascii_add_header(self):
520 msg = Message()
521 msg.add_header('Content-Disposition', 'attachment',
522 filename='bud.gif')
523 self.assertEqual('attachment; filename="bud.gif"',
524 msg['Content-Disposition'])
525
526 def test_noascii_add_header(self):
527 msg = Message()
528 msg.add_header('Content-Disposition', 'attachment',
529 filename="Fußballer.ppt")
530 self.assertEqual(
531 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
532 msg['Content-Disposition'])
533
534 def test_nonascii_add_header_via_triple(self):
535 msg = Message()
536 msg.add_header('Content-Disposition', 'attachment',
537 filename=('iso-8859-1', '', 'Fußballer.ppt'))
538 self.assertEqual(
539 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
540 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000541
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000542
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000543# Test the email.encoders module
544class TestEncoders(unittest.TestCase):
545 def test_encode_empty_payload(self):
546 eq = self.assertEqual
547 msg = Message()
548 msg.set_charset('us-ascii')
549 eq(msg['content-transfer-encoding'], '7bit')
550
551 def test_default_cte(self):
552 eq = self.assertEqual
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000553 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000554 msg = MIMEText('hello world')
555 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000556 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000557 msg = MIMEText('hello \xf8 world')
558 eq(msg['content-transfer-encoding'], '8bit')
559 # And now with a different charset
560 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
561 eq(msg['content-transfer-encoding'], 'quoted-printable')
562
R. David Murrayf870d872010-05-06 01:53:03 +0000563 def test_encode7or8bit(self):
564 # Make sure a charset whose input character set is 8bit but
565 # whose output character set is 7bit gets a transfer-encoding
566 # of 7bit.
567 eq = self.assertEqual
R. David Murrayd2d08c62010-06-03 02:05:47 +0000568 msg = MIMEText('æ–‡', _charset='euc-jp')
R. David Murrayf870d872010-05-06 01:53:03 +0000569 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000570
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000571
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000572# Test long header wrapping
573class TestLongHeaders(TestEmailBase):
574 def test_split_long_continuation(self):
575 eq = self.ndiffAssertEqual
576 msg = email.message_from_string("""\
577Subject: bug demonstration
578\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
579\tmore text
580
581test
582""")
583 sfp = StringIO()
584 g = Generator(sfp)
585 g.flatten(msg)
586 eq(sfp.getvalue(), """\
587Subject: bug demonstration
588\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
589\tmore text
590
591test
592""")
593
594 def test_another_long_almost_unsplittable_header(self):
595 eq = self.ndiffAssertEqual
596 hstr = """\
597bug demonstration
598\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
599\tmore text"""
600 h = Header(hstr, continuation_ws='\t')
601 eq(h.encode(), """\
602bug demonstration
603\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
604\tmore text""")
605 h = Header(hstr.replace('\t', ' '))
606 eq(h.encode(), """\
607bug demonstration
608 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
609 more text""")
610
611 def test_long_nonstring(self):
612 eq = self.ndiffAssertEqual
613 g = Charset("iso-8859-1")
614 cz = Charset("iso-8859-2")
615 utf8 = Charset("utf-8")
616 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
617 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
618 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
619 b'bef\xf6rdert. ')
620 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
621 b'd\xf9vtipu.. ')
622 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
623 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
624 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
625 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
626 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
627 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
628 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
629 '\u3044\u307e\u3059\u3002')
630 h = Header(g_head, g, header_name='Subject')
631 h.append(cz_head, cz)
632 h.append(utf8_head, utf8)
633 msg = Message()
634 msg['Subject'] = h
635 sfp = StringIO()
636 g = Generator(sfp)
637 g.flatten(msg)
638 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000639Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
640 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
641 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
642 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
643 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
644 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
645 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
646 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
647 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
648 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
649 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000650
651""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000652 eq(h.encode(maxlinelen=76), """\
653=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
654 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
655 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
656 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
657 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
658 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
659 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
660 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
661 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
662 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
663 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000664
665 def test_long_header_encode(self):
666 eq = self.ndiffAssertEqual
667 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
668 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
669 header_name='X-Foobar-Spoink-Defrobnit')
670 eq(h.encode(), '''\
671wasnipoop; giraffes="very-long-necked-animals";
672 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
673
674 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
675 eq = self.ndiffAssertEqual
676 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
677 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
678 header_name='X-Foobar-Spoink-Defrobnit',
679 continuation_ws='\t')
680 eq(h.encode(), '''\
681wasnipoop; giraffes="very-long-necked-animals";
682 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
683
684 def test_long_header_encode_with_tab_continuation(self):
685 eq = self.ndiffAssertEqual
686 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
687 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
688 header_name='X-Foobar-Spoink-Defrobnit',
689 continuation_ws='\t')
690 eq(h.encode(), '''\
691wasnipoop; giraffes="very-long-necked-animals";
692\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
693
694 def test_header_splitter(self):
695 eq = self.ndiffAssertEqual
696 msg = MIMEText('')
697 # It'd be great if we could use add_header() here, but that doesn't
698 # guarantee an order of the parameters.
699 msg['X-Foobar-Spoink-Defrobnit'] = (
700 'wasnipoop; giraffes="very-long-necked-animals"; '
701 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
702 sfp = StringIO()
703 g = Generator(sfp)
704 g.flatten(msg)
705 eq(sfp.getvalue(), '''\
706Content-Type: text/plain; charset="us-ascii"
707MIME-Version: 1.0
708Content-Transfer-Encoding: 7bit
709X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
710 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
711
712''')
713
714 def test_no_semis_header_splitter(self):
715 eq = self.ndiffAssertEqual
716 msg = Message()
717 msg['From'] = 'test@dom.ain'
718 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
719 msg.set_payload('Test')
720 sfp = StringIO()
721 g = Generator(sfp)
722 g.flatten(msg)
723 eq(sfp.getvalue(), """\
724From: test@dom.ain
725References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
726 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
727
728Test""")
729
730 def test_no_split_long_header(self):
731 eq = self.ndiffAssertEqual
732 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000733 h = Header(hstr)
734 # These come on two lines because Headers are really field value
735 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000736 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000737References:
738 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
739 h = Header('x' * 80)
740 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000741
742 def test_splitting_multiple_long_lines(self):
743 eq = self.ndiffAssertEqual
744 hstr = """\
745from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
746\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
747\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
748"""
749 h = Header(hstr, continuation_ws='\t')
750 eq(h.encode(), """\
751from babylon.socal-raves.org (localhost [127.0.0.1]);
752 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
753 for <mailman-admin@babylon.socal-raves.org>;
754 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
755\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
756 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
757 for <mailman-admin@babylon.socal-raves.org>;
758 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
759\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
760 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
761 for <mailman-admin@babylon.socal-raves.org>;
762 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
763
764 def test_splitting_first_line_only_is_long(self):
765 eq = self.ndiffAssertEqual
766 hstr = """\
767from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
768\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
769\tid 17k4h5-00034i-00
770\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
771 h = Header(hstr, maxlinelen=78, header_name='Received',
772 continuation_ws='\t')
773 eq(h.encode(), """\
774from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
775 helo=cthulhu.gerg.ca)
776\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
777\tid 17k4h5-00034i-00
778\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
779
780 def test_long_8bit_header(self):
781 eq = self.ndiffAssertEqual
782 msg = Message()
783 h = Header('Britische Regierung gibt', 'iso-8859-1',
784 header_name='Subject')
785 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000786 eq(h.encode(maxlinelen=76), """\
787=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
788 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000789 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000790 eq(msg.as_string(maxheaderlen=76), """\
791Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
792 =?iso-8859-1?q?hore-Windkraftprojekte?=
793
794""")
795 eq(msg.as_string(maxheaderlen=0), """\
796Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000797
798""")
799
800 def test_long_8bit_header_no_charset(self):
801 eq = self.ndiffAssertEqual
802 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000803 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
804 'f\xfcr Offshore-Windkraftprojekte '
805 '<a-very-long-address@example.com>')
806 msg['Reply-To'] = header_string
807 self.assertRaises(UnicodeEncodeError, msg.as_string)
808 msg = Message()
809 msg['Reply-To'] = Header(header_string, 'utf-8',
810 header_name='Reply-To')
811 eq(msg.as_string(maxheaderlen=78), """\
812Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
813 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000814
815""")
816
817 def test_long_to_header(self):
818 eq = self.ndiffAssertEqual
819 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
820 '<someone@eecs.umich.edu>,'
821 '"Someone Test #B" <someone@umich.edu>, '
822 '"Someone Test #C" <someone@eecs.umich.edu>, '
823 '"Someone Test #D" <someone@eecs.umich.edu>')
824 msg = Message()
825 msg['To'] = to
826 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000827To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000828 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000829 "Someone Test #C" <someone@eecs.umich.edu>,
830 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000831
832''')
833
834 def test_long_line_after_append(self):
835 eq = self.ndiffAssertEqual
836 s = 'This is an example of string which has almost the limit of header length.'
837 h = Header(s)
838 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000839 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000840This is an example of string which has almost the limit of header length.
841 Add another line.""")
842
843 def test_shorter_line_with_append(self):
844 eq = self.ndiffAssertEqual
845 s = 'This is a shorter line.'
846 h = Header(s)
847 h.append('Add another sentence. (Surprise?)')
848 eq(h.encode(),
849 'This is a shorter line. Add another sentence. (Surprise?)')
850
851 def test_long_field_name(self):
852 eq = self.ndiffAssertEqual
853 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000854 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
855 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
856 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
857 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000858 h = Header(gs, 'iso-8859-1', header_name=fn)
859 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000860 eq(h.encode(maxlinelen=76), """\
861=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
862 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
863 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
864 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000865
866 def test_long_received_header(self):
867 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
868 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
869 'Wed, 05 Mar 2003 18:10:18 -0700')
870 msg = Message()
871 msg['Received-1'] = Header(h, continuation_ws='\t')
872 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000873 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000875Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
876 Wed, 05 Mar 2003 18:10:18 -0700
877Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
878 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000879
880""")
881
882 def test_string_headerinst_eq(self):
883 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
884 'tu-muenchen.de> (David Bremner\'s message of '
885 '"Thu, 6 Mar 2003 13:58:21 +0100")')
886 msg = Message()
887 msg['Received-1'] = Header(h, header_name='Received-1',
888 continuation_ws='\t')
889 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000890 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000891 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000892Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
893 6 Mar 2003 13:58:21 +0100\")
894Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
895 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896
897""")
898
899 def test_long_unbreakable_lines_with_continuation(self):
900 eq = self.ndiffAssertEqual
901 msg = Message()
902 t = """\
903iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
904 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
905 msg['Face-1'] = t
906 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000907 # XXX This splitting is all wrong. It the first value line should be
908 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000909 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000910Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000911 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000912 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000913Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000914 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
916
917""")
918
919 def test_another_long_multiline_header(self):
920 eq = self.ndiffAssertEqual
921 m = ('Received: from siimage.com '
922 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000923 'Microsoft SMTPSVC(5.0.2195.4905); '
924 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000925 msg = email.message_from_string(m)
926 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000927Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
928 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000929
930''')
931
932 def test_long_lines_with_different_header(self):
933 eq = self.ndiffAssertEqual
934 h = ('List-Unsubscribe: '
935 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
936 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
937 '?subject=unsubscribe>')
938 msg = Message()
939 msg['List'] = h
940 msg['List'] = Header(h, header_name='List')
941 eq(msg.as_string(maxheaderlen=78), """\
942List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000943 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000944List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000945 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946
947""")
948
949
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000950
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000951# Test mangling of "From " lines in the body of a message
952class TestFromMangling(unittest.TestCase):
953 def setUp(self):
954 self.msg = Message()
955 self.msg['From'] = 'aaa@bbb.org'
956 self.msg.set_payload("""\
957From the desk of A.A.A.:
958Blah blah blah
959""")
960
961 def test_mangled_from(self):
962 s = StringIO()
963 g = Generator(s, mangle_from_=True)
964 g.flatten(self.msg)
965 self.assertEqual(s.getvalue(), """\
966From: aaa@bbb.org
967
968>From the desk of A.A.A.:
969Blah blah blah
970""")
971
972 def test_dont_mangle_from(self):
973 s = StringIO()
974 g = Generator(s, mangle_from_=False)
975 g.flatten(self.msg)
976 self.assertEqual(s.getvalue(), """\
977From: aaa@bbb.org
978
979From the desk of A.A.A.:
980Blah blah blah
981""")
982
983
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000984
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000985# Test the basic MIMEAudio class
986class TestMIMEAudio(unittest.TestCase):
987 def setUp(self):
988 # Make sure we pick up the audiotest.au that lives in email/test/data.
989 # In Python, there's an audiotest.au living in Lib/test but that isn't
990 # included in some binary distros that don't include the test
991 # package. The trailing empty string on the .join() is significant
992 # since findfile() will do a dirname().
993 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
994 with open(findfile('audiotest.au', datadir), 'rb') as fp:
995 self._audiodata = fp.read()
996 self._au = MIMEAudio(self._audiodata)
997
998 def test_guess_minor_type(self):
999 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1000
1001 def test_encoding(self):
1002 payload = self._au.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +00001003 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1004 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001005
1006 def test_checkSetMinor(self):
1007 au = MIMEAudio(self._audiodata, 'fish')
1008 self.assertEqual(au.get_content_type(), 'audio/fish')
1009
1010 def test_add_header(self):
1011 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001012 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001013 self._au.add_header('Content-Disposition', 'attachment',
1014 filename='audiotest.au')
1015 eq(self._au['content-disposition'],
1016 'attachment; filename="audiotest.au"')
1017 eq(self._au.get_params(header='content-disposition'),
1018 [('attachment', ''), ('filename', 'audiotest.au')])
1019 eq(self._au.get_param('filename', header='content-disposition'),
1020 'audiotest.au')
1021 missing = []
1022 eq(self._au.get_param('attachment', header='content-disposition'), '')
1023 unless(self._au.get_param('foo', failobj=missing,
1024 header='content-disposition') is missing)
1025 # Try some missing stuff
1026 unless(self._au.get_param('foobar', missing) is missing)
1027 unless(self._au.get_param('attachment', missing,
1028 header='foobar') is missing)
1029
1030
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001031
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001032# Test the basic MIMEImage class
1033class TestMIMEImage(unittest.TestCase):
1034 def setUp(self):
1035 with openfile('PyBanner048.gif', 'rb') as fp:
1036 self._imgdata = fp.read()
1037 self._im = MIMEImage(self._imgdata)
1038
1039 def test_guess_minor_type(self):
1040 self.assertEqual(self._im.get_content_type(), 'image/gif')
1041
1042 def test_encoding(self):
1043 payload = self._im.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +00001044 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1045 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001046
1047 def test_checkSetMinor(self):
1048 im = MIMEImage(self._imgdata, 'fish')
1049 self.assertEqual(im.get_content_type(), 'image/fish')
1050
1051 def test_add_header(self):
1052 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001053 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001054 self._im.add_header('Content-Disposition', 'attachment',
1055 filename='dingusfish.gif')
1056 eq(self._im['content-disposition'],
1057 'attachment; filename="dingusfish.gif"')
1058 eq(self._im.get_params(header='content-disposition'),
1059 [('attachment', ''), ('filename', 'dingusfish.gif')])
1060 eq(self._im.get_param('filename', header='content-disposition'),
1061 'dingusfish.gif')
1062 missing = []
1063 eq(self._im.get_param('attachment', header='content-disposition'), '')
1064 unless(self._im.get_param('foo', failobj=missing,
1065 header='content-disposition') is missing)
1066 # Try some missing stuff
1067 unless(self._im.get_param('foobar', missing) is missing)
1068 unless(self._im.get_param('attachment', missing,
1069 header='foobar') is missing)
1070
1071
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001072
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073# Test the basic MIMEApplication class
1074class TestMIMEApplication(unittest.TestCase):
1075 def test_headers(self):
1076 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001077 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001078 eq(msg.get_content_type(), 'application/octet-stream')
1079 eq(msg['content-transfer-encoding'], 'base64')
1080
1081 def test_body(self):
1082 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001083 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001084 msg = MIMEApplication(bytes)
R. David Murray99147c42010-06-04 16:15:34 +00001085 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001086 eq(msg.get_payload(decode=True), bytes)
1087
1088
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001089
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001090# Test the basic MIMEText class
1091class TestMIMEText(unittest.TestCase):
1092 def setUp(self):
1093 self._msg = MIMEText('hello there')
1094
1095 def test_types(self):
1096 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001097 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001098 eq(self._msg.get_content_type(), 'text/plain')
1099 eq(self._msg.get_param('charset'), 'us-ascii')
1100 missing = []
1101 unless(self._msg.get_param('foobar', missing) is missing)
1102 unless(self._msg.get_param('charset', missing, header='foobar')
1103 is missing)
1104
1105 def test_payload(self):
1106 self.assertEqual(self._msg.get_payload(), 'hello there')
Georg Brandlab91fde2009-08-13 08:51:18 +00001107 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001108
1109 def test_charset(self):
1110 eq = self.assertEqual
1111 msg = MIMEText('hello there', _charset='us-ascii')
1112 eq(msg.get_charset().input_charset, 'us-ascii')
1113 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1114
R. David Murrayd2d08c62010-06-03 02:05:47 +00001115 def test_7bit_input(self):
1116 eq = self.assertEqual
1117 msg = MIMEText('hello there', _charset='us-ascii')
1118 eq(msg.get_charset().input_charset, 'us-ascii')
1119 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1120
1121 def test_7bit_input_no_charset(self):
1122 eq = self.assertEqual
1123 msg = MIMEText('hello there')
1124 eq(msg.get_charset(), 'us-ascii')
1125 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1126 self.assertTrue('hello there' in msg.as_string())
1127
1128 def test_utf8_input(self):
1129 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1130 eq = self.assertEqual
1131 msg = MIMEText(teststr, _charset='utf-8')
1132 eq(msg.get_charset().output_charset, 'utf-8')
1133 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1134 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1135
1136 @unittest.skip("can't fix because of backward compat in email5, "
1137 "will fix in email6")
1138 def test_utf8_input_no_charset(self):
1139 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1140 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1141
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001142
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001143
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001144# Test complicated multipart/* messages
1145class TestMultipart(TestEmailBase):
1146 def setUp(self):
1147 with openfile('PyBanner048.gif', 'rb') as fp:
1148 data = fp.read()
1149 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1150 image = MIMEImage(data, name='dingusfish.gif')
1151 image.add_header('content-disposition', 'attachment',
1152 filename='dingusfish.gif')
1153 intro = MIMEText('''\
1154Hi there,
1155
1156This is the dingus fish.
1157''')
1158 container.attach(intro)
1159 container.attach(image)
1160 container['From'] = 'Barry <barry@digicool.com>'
1161 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1162 container['Subject'] = 'Here is your dingus fish'
1163
1164 now = 987809702.54848599
1165 timetuple = time.localtime(now)
1166 if timetuple[-1] == 0:
1167 tzsecs = time.timezone
1168 else:
1169 tzsecs = time.altzone
1170 if tzsecs > 0:
1171 sign = '-'
1172 else:
1173 sign = '+'
1174 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1175 container['Date'] = time.strftime(
1176 '%a, %d %b %Y %H:%M:%S',
1177 time.localtime(now)) + tzoffset
1178 self._msg = container
1179 self._im = image
1180 self._txt = intro
1181
1182 def test_hierarchy(self):
1183 # convenience
1184 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001185 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001186 raises = self.assertRaises
1187 # tests
1188 m = self._msg
1189 unless(m.is_multipart())
1190 eq(m.get_content_type(), 'multipart/mixed')
1191 eq(len(m.get_payload()), 2)
1192 raises(IndexError, m.get_payload, 2)
1193 m0 = m.get_payload(0)
1194 m1 = m.get_payload(1)
1195 unless(m0 is self._txt)
1196 unless(m1 is self._im)
1197 eq(m.get_payload(), [m0, m1])
1198 unless(not m0.is_multipart())
1199 unless(not m1.is_multipart())
1200
1201 def test_empty_multipart_idempotent(self):
1202 text = """\
1203Content-Type: multipart/mixed; boundary="BOUNDARY"
1204MIME-Version: 1.0
1205Subject: A subject
1206To: aperson@dom.ain
1207From: bperson@dom.ain
1208
1209
1210--BOUNDARY
1211
1212
1213--BOUNDARY--
1214"""
1215 msg = Parser().parsestr(text)
1216 self.ndiffAssertEqual(text, msg.as_string())
1217
1218 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1219 outer = MIMEBase('multipart', 'mixed')
1220 outer['Subject'] = 'A subject'
1221 outer['To'] = 'aperson@dom.ain'
1222 outer['From'] = 'bperson@dom.ain'
1223 outer.set_boundary('BOUNDARY')
1224 self.ndiffAssertEqual(outer.as_string(), '''\
1225Content-Type: multipart/mixed; boundary="BOUNDARY"
1226MIME-Version: 1.0
1227Subject: A subject
1228To: aperson@dom.ain
1229From: bperson@dom.ain
1230
1231--BOUNDARY
1232
1233--BOUNDARY--''')
1234
1235 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1236 outer = MIMEBase('multipart', 'mixed')
1237 outer['Subject'] = 'A subject'
1238 outer['To'] = 'aperson@dom.ain'
1239 outer['From'] = 'bperson@dom.ain'
1240 outer.preamble = ''
1241 outer.epilogue = ''
1242 outer.set_boundary('BOUNDARY')
1243 self.ndiffAssertEqual(outer.as_string(), '''\
1244Content-Type: multipart/mixed; boundary="BOUNDARY"
1245MIME-Version: 1.0
1246Subject: A subject
1247To: aperson@dom.ain
1248From: bperson@dom.ain
1249
1250
1251--BOUNDARY
1252
1253--BOUNDARY--
1254''')
1255
1256 def test_one_part_in_a_multipart(self):
1257 eq = self.ndiffAssertEqual
1258 outer = MIMEBase('multipart', 'mixed')
1259 outer['Subject'] = 'A subject'
1260 outer['To'] = 'aperson@dom.ain'
1261 outer['From'] = 'bperson@dom.ain'
1262 outer.set_boundary('BOUNDARY')
1263 msg = MIMEText('hello world')
1264 outer.attach(msg)
1265 eq(outer.as_string(), '''\
1266Content-Type: multipart/mixed; boundary="BOUNDARY"
1267MIME-Version: 1.0
1268Subject: A subject
1269To: aperson@dom.ain
1270From: bperson@dom.ain
1271
1272--BOUNDARY
1273Content-Type: text/plain; charset="us-ascii"
1274MIME-Version: 1.0
1275Content-Transfer-Encoding: 7bit
1276
1277hello world
1278--BOUNDARY--''')
1279
1280 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1281 eq = self.ndiffAssertEqual
1282 outer = MIMEBase('multipart', 'mixed')
1283 outer['Subject'] = 'A subject'
1284 outer['To'] = 'aperson@dom.ain'
1285 outer['From'] = 'bperson@dom.ain'
1286 outer.preamble = ''
1287 msg = MIMEText('hello world')
1288 outer.attach(msg)
1289 outer.set_boundary('BOUNDARY')
1290 eq(outer.as_string(), '''\
1291Content-Type: multipart/mixed; boundary="BOUNDARY"
1292MIME-Version: 1.0
1293Subject: A subject
1294To: aperson@dom.ain
1295From: bperson@dom.ain
1296
1297
1298--BOUNDARY
1299Content-Type: text/plain; charset="us-ascii"
1300MIME-Version: 1.0
1301Content-Transfer-Encoding: 7bit
1302
1303hello world
1304--BOUNDARY--''')
1305
1306
1307 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1308 eq = self.ndiffAssertEqual
1309 outer = MIMEBase('multipart', 'mixed')
1310 outer['Subject'] = 'A subject'
1311 outer['To'] = 'aperson@dom.ain'
1312 outer['From'] = 'bperson@dom.ain'
1313 outer.preamble = None
1314 msg = MIMEText('hello world')
1315 outer.attach(msg)
1316 outer.set_boundary('BOUNDARY')
1317 eq(outer.as_string(), '''\
1318Content-Type: multipart/mixed; boundary="BOUNDARY"
1319MIME-Version: 1.0
1320Subject: A subject
1321To: aperson@dom.ain
1322From: bperson@dom.ain
1323
1324--BOUNDARY
1325Content-Type: text/plain; charset="us-ascii"
1326MIME-Version: 1.0
1327Content-Transfer-Encoding: 7bit
1328
1329hello world
1330--BOUNDARY--''')
1331
1332
1333 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1334 eq = self.ndiffAssertEqual
1335 outer = MIMEBase('multipart', 'mixed')
1336 outer['Subject'] = 'A subject'
1337 outer['To'] = 'aperson@dom.ain'
1338 outer['From'] = 'bperson@dom.ain'
1339 outer.epilogue = None
1340 msg = MIMEText('hello world')
1341 outer.attach(msg)
1342 outer.set_boundary('BOUNDARY')
1343 eq(outer.as_string(), '''\
1344Content-Type: multipart/mixed; boundary="BOUNDARY"
1345MIME-Version: 1.0
1346Subject: A subject
1347To: aperson@dom.ain
1348From: bperson@dom.ain
1349
1350--BOUNDARY
1351Content-Type: text/plain; charset="us-ascii"
1352MIME-Version: 1.0
1353Content-Transfer-Encoding: 7bit
1354
1355hello world
1356--BOUNDARY--''')
1357
1358
1359 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1360 eq = self.ndiffAssertEqual
1361 outer = MIMEBase('multipart', 'mixed')
1362 outer['Subject'] = 'A subject'
1363 outer['To'] = 'aperson@dom.ain'
1364 outer['From'] = 'bperson@dom.ain'
1365 outer.epilogue = ''
1366 msg = MIMEText('hello world')
1367 outer.attach(msg)
1368 outer.set_boundary('BOUNDARY')
1369 eq(outer.as_string(), '''\
1370Content-Type: multipart/mixed; boundary="BOUNDARY"
1371MIME-Version: 1.0
1372Subject: A subject
1373To: aperson@dom.ain
1374From: bperson@dom.ain
1375
1376--BOUNDARY
1377Content-Type: text/plain; charset="us-ascii"
1378MIME-Version: 1.0
1379Content-Transfer-Encoding: 7bit
1380
1381hello world
1382--BOUNDARY--
1383''')
1384
1385
1386 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1387 eq = self.ndiffAssertEqual
1388 outer = MIMEBase('multipart', 'mixed')
1389 outer['Subject'] = 'A subject'
1390 outer['To'] = 'aperson@dom.ain'
1391 outer['From'] = 'bperson@dom.ain'
1392 outer.epilogue = '\n'
1393 msg = MIMEText('hello world')
1394 outer.attach(msg)
1395 outer.set_boundary('BOUNDARY')
1396 eq(outer.as_string(), '''\
1397Content-Type: multipart/mixed; boundary="BOUNDARY"
1398MIME-Version: 1.0
1399Subject: A subject
1400To: aperson@dom.ain
1401From: bperson@dom.ain
1402
1403--BOUNDARY
1404Content-Type: text/plain; charset="us-ascii"
1405MIME-Version: 1.0
1406Content-Transfer-Encoding: 7bit
1407
1408hello world
1409--BOUNDARY--
1410
1411''')
1412
1413 def test_message_external_body(self):
1414 eq = self.assertEqual
1415 msg = self._msgobj('msg_36.txt')
1416 eq(len(msg.get_payload()), 2)
1417 msg1 = msg.get_payload(1)
1418 eq(msg1.get_content_type(), 'multipart/alternative')
1419 eq(len(msg1.get_payload()), 2)
1420 for subpart in msg1.get_payload():
1421 eq(subpart.get_content_type(), 'message/external-body')
1422 eq(len(subpart.get_payload()), 1)
1423 subsubpart = subpart.get_payload(0)
1424 eq(subsubpart.get_content_type(), 'text/plain')
1425
1426 def test_double_boundary(self):
1427 # msg_37.txt is a multipart that contains two dash-boundary's in a
1428 # row. Our interpretation of RFC 2046 calls for ignoring the second
1429 # and subsequent boundaries.
1430 msg = self._msgobj('msg_37.txt')
1431 self.assertEqual(len(msg.get_payload()), 3)
1432
1433 def test_nested_inner_contains_outer_boundary(self):
1434 eq = self.ndiffAssertEqual
1435 # msg_38.txt has an inner part that contains outer boundaries. My
1436 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1437 # these are illegal and should be interpreted as unterminated inner
1438 # parts.
1439 msg = self._msgobj('msg_38.txt')
1440 sfp = StringIO()
1441 iterators._structure(msg, sfp)
1442 eq(sfp.getvalue(), """\
1443multipart/mixed
1444 multipart/mixed
1445 multipart/alternative
1446 text/plain
1447 text/plain
1448 text/plain
1449 text/plain
1450""")
1451
1452 def test_nested_with_same_boundary(self):
1453 eq = self.ndiffAssertEqual
1454 # msg 39.txt is similarly evil in that it's got inner parts that use
1455 # the same boundary as outer parts. Again, I believe the way this is
1456 # parsed is closest to the spirit of RFC 2046
1457 msg = self._msgobj('msg_39.txt')
1458 sfp = StringIO()
1459 iterators._structure(msg, sfp)
1460 eq(sfp.getvalue(), """\
1461multipart/mixed
1462 multipart/mixed
1463 multipart/alternative
1464 application/octet-stream
1465 application/octet-stream
1466 text/plain
1467""")
1468
1469 def test_boundary_in_non_multipart(self):
1470 msg = self._msgobj('msg_40.txt')
1471 self.assertEqual(msg.as_string(), '''\
1472MIME-Version: 1.0
1473Content-Type: text/html; boundary="--961284236552522269"
1474
1475----961284236552522269
1476Content-Type: text/html;
1477Content-Transfer-Encoding: 7Bit
1478
1479<html></html>
1480
1481----961284236552522269--
1482''')
1483
1484 def test_boundary_with_leading_space(self):
1485 eq = self.assertEqual
1486 msg = email.message_from_string('''\
1487MIME-Version: 1.0
1488Content-Type: multipart/mixed; boundary=" XXXX"
1489
1490-- XXXX
1491Content-Type: text/plain
1492
1493
1494-- XXXX
1495Content-Type: text/plain
1496
1497-- XXXX--
1498''')
Georg Brandlab91fde2009-08-13 08:51:18 +00001499 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001500 eq(msg.get_boundary(), ' XXXX')
1501 eq(len(msg.get_payload()), 2)
1502
1503 def test_boundary_without_trailing_newline(self):
1504 m = Parser().parsestr("""\
1505Content-Type: multipart/mixed; boundary="===============0012394164=="
1506MIME-Version: 1.0
1507
1508--===============0012394164==
1509Content-Type: image/file1.jpg
1510MIME-Version: 1.0
1511Content-Transfer-Encoding: base64
1512
1513YXNkZg==
1514--===============0012394164==--""")
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001515 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001516
1517
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001518
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001519# Test some badly formatted messages
1520class TestNonConformant(TestEmailBase):
1521 def test_parse_missing_minor_type(self):
1522 eq = self.assertEqual
1523 msg = self._msgobj('msg_14.txt')
1524 eq(msg.get_content_type(), 'text/plain')
1525 eq(msg.get_content_maintype(), 'text')
1526 eq(msg.get_content_subtype(), 'plain')
1527
1528 def test_same_boundary_inner_outer(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001529 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001530 msg = self._msgobj('msg_15.txt')
1531 # XXX We can probably eventually do better
1532 inner = msg.get_payload(0)
1533 unless(hasattr(inner, 'defects'))
1534 self.assertEqual(len(inner.defects), 1)
1535 unless(isinstance(inner.defects[0],
1536 errors.StartBoundaryNotFoundDefect))
1537
1538 def test_multipart_no_boundary(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001539 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001540 msg = self._msgobj('msg_25.txt')
1541 unless(isinstance(msg.get_payload(), str))
1542 self.assertEqual(len(msg.defects), 2)
1543 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1544 unless(isinstance(msg.defects[1],
1545 errors.MultipartInvariantViolationDefect))
1546
1547 def test_invalid_content_type(self):
1548 eq = self.assertEqual
1549 neq = self.ndiffAssertEqual
1550 msg = Message()
1551 # RFC 2045, $5.2 says invalid yields text/plain
1552 msg['Content-Type'] = 'text'
1553 eq(msg.get_content_maintype(), 'text')
1554 eq(msg.get_content_subtype(), 'plain')
1555 eq(msg.get_content_type(), 'text/plain')
1556 # Clear the old value and try something /really/ invalid
1557 del msg['content-type']
1558 msg['Content-Type'] = 'foo'
1559 eq(msg.get_content_maintype(), 'text')
1560 eq(msg.get_content_subtype(), 'plain')
1561 eq(msg.get_content_type(), 'text/plain')
1562 # Still, make sure that the message is idempotently generated
1563 s = StringIO()
1564 g = Generator(s)
1565 g.flatten(msg)
1566 neq(s.getvalue(), 'Content-Type: foo\n\n')
1567
1568 def test_no_start_boundary(self):
1569 eq = self.ndiffAssertEqual
1570 msg = self._msgobj('msg_31.txt')
1571 eq(msg.get_payload(), """\
1572--BOUNDARY
1573Content-Type: text/plain
1574
1575message 1
1576
1577--BOUNDARY
1578Content-Type: text/plain
1579
1580message 2
1581
1582--BOUNDARY--
1583""")
1584
1585 def test_no_separating_blank_line(self):
1586 eq = self.ndiffAssertEqual
1587 msg = self._msgobj('msg_35.txt')
1588 eq(msg.as_string(), """\
1589From: aperson@dom.ain
1590To: bperson@dom.ain
1591Subject: here's something interesting
1592
1593counter to RFC 2822, there's no separating newline here
1594""")
1595
1596 def test_lying_multipart(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001597 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001598 msg = self._msgobj('msg_41.txt')
1599 unless(hasattr(msg, 'defects'))
1600 self.assertEqual(len(msg.defects), 2)
1601 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1602 unless(isinstance(msg.defects[1],
1603 errors.MultipartInvariantViolationDefect))
1604
1605 def test_missing_start_boundary(self):
1606 outer = self._msgobj('msg_42.txt')
1607 # The message structure is:
1608 #
1609 # multipart/mixed
1610 # text/plain
1611 # message/rfc822
1612 # multipart/mixed [*]
1613 #
1614 # [*] This message is missing its start boundary
1615 bad = outer.get_payload(1).get_payload(0)
1616 self.assertEqual(len(bad.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001617 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001618 errors.StartBoundaryNotFoundDefect))
1619
1620 def test_first_line_is_continuation_header(self):
1621 eq = self.assertEqual
1622 m = ' Line 1\nLine 2\nLine 3'
1623 msg = email.message_from_string(m)
1624 eq(msg.keys(), [])
1625 eq(msg.get_payload(), 'Line 2\nLine 3')
1626 eq(len(msg.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001627 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001628 errors.FirstHeaderLineIsContinuationDefect))
1629 eq(msg.defects[0].line, ' Line 1\n')
1630
1631
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001632
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001633# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001634class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001635 def test_rfc2047_multiline(self):
1636 eq = self.assertEqual
1637 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1638 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1639 dh = decode_header(s)
1640 eq(dh, [
1641 (b'Re:', None),
1642 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1643 (b'baz foo bar', None),
1644 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1645 header = make_header(dh)
1646 eq(str(header),
1647 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001648 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001649Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1650 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001651
1652 def test_whitespace_eater_unicode(self):
1653 eq = self.assertEqual
1654 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1655 dh = decode_header(s)
1656 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1657 (b'Pirard <pirard@dom.ain>', None)])
1658 header = str(make_header(dh))
1659 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1660
1661 def test_whitespace_eater_unicode_2(self):
1662 eq = self.assertEqual
1663 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1664 dh = decode_header(s)
1665 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1666 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1667 hu = str(make_header(dh))
1668 eq(hu, 'The quick brown fox jumped over the lazy dog')
1669
1670 def test_rfc2047_missing_whitespace(self):
1671 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1672 dh = decode_header(s)
1673 self.assertEqual(dh, [(s, None)])
1674
1675 def test_rfc2047_with_whitespace(self):
1676 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1677 dh = decode_header(s)
1678 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1679 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1680 (b'sbord', None)])
1681
R. David Murraye06528c2010-08-03 23:35:44 +00001682 def test_rfc2047_B_bad_padding(self):
1683 s = '=?iso-8859-1?B?%s?='
1684 data = [ # only test complete bytes
1685 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1686 ('dmk=', b'vi'), ('dmk', b'vi')
1687 ]
1688 for q, a in data:
1689 dh = decode_header(s % q)
1690 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001691
R. David Murrayf9c957f2010-10-01 15:45:48 +00001692 def test_rfc2047_Q_invalid_digits(self):
1693 # issue 10004.
1694 s = '=?iso-8659-1?Q?andr=e9=zz?='
1695 self.assertEqual(decode_header(s),
1696 [(b'andr\xe9=zz', 'iso-8659-1')])
1697
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001698
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001699# Test the MIMEMessage class
1700class TestMIMEMessage(TestEmailBase):
1701 def setUp(self):
1702 with openfile('msg_11.txt') as fp:
1703 self._text = fp.read()
1704
1705 def test_type_error(self):
1706 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1707
1708 def test_valid_argument(self):
1709 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001710 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001711 subject = 'A sub-message'
1712 m = Message()
1713 m['Subject'] = subject
1714 r = MIMEMessage(m)
1715 eq(r.get_content_type(), 'message/rfc822')
1716 payload = r.get_payload()
1717 unless(isinstance(payload, list))
1718 eq(len(payload), 1)
1719 subpart = payload[0]
1720 unless(subpart is m)
1721 eq(subpart['subject'], subject)
1722
1723 def test_bad_multipart(self):
1724 eq = self.assertEqual
1725 msg1 = Message()
1726 msg1['Subject'] = 'subpart 1'
1727 msg2 = Message()
1728 msg2['Subject'] = 'subpart 2'
1729 r = MIMEMessage(msg1)
1730 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1731
1732 def test_generate(self):
1733 # First craft the message to be encapsulated
1734 m = Message()
1735 m['Subject'] = 'An enclosed message'
1736 m.set_payload('Here is the body of the message.\n')
1737 r = MIMEMessage(m)
1738 r['Subject'] = 'The enclosing message'
1739 s = StringIO()
1740 g = Generator(s)
1741 g.flatten(r)
1742 self.assertEqual(s.getvalue(), """\
1743Content-Type: message/rfc822
1744MIME-Version: 1.0
1745Subject: The enclosing message
1746
1747Subject: An enclosed message
1748
1749Here is the body of the message.
1750""")
1751
1752 def test_parse_message_rfc822(self):
1753 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001754 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001755 msg = self._msgobj('msg_11.txt')
1756 eq(msg.get_content_type(), 'message/rfc822')
1757 payload = msg.get_payload()
1758 unless(isinstance(payload, list))
1759 eq(len(payload), 1)
1760 submsg = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00001761 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001762 eq(submsg['subject'], 'An enclosed message')
1763 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1764
1765 def test_dsn(self):
1766 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001767 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001768 # msg 16 is a Delivery Status Notification, see RFC 1894
1769 msg = self._msgobj('msg_16.txt')
1770 eq(msg.get_content_type(), 'multipart/report')
1771 unless(msg.is_multipart())
1772 eq(len(msg.get_payload()), 3)
1773 # Subpart 1 is a text/plain, human readable section
1774 subpart = msg.get_payload(0)
1775 eq(subpart.get_content_type(), 'text/plain')
1776 eq(subpart.get_payload(), """\
1777This report relates to a message you sent with the following header fields:
1778
1779 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1780 Date: Sun, 23 Sep 2001 20:10:55 -0700
1781 From: "Ian T. Henry" <henryi@oxy.edu>
1782 To: SoCal Raves <scr@socal-raves.org>
1783 Subject: [scr] yeah for Ians!!
1784
1785Your message cannot be delivered to the following recipients:
1786
1787 Recipient address: jangel1@cougar.noc.ucla.edu
1788 Reason: recipient reached disk quota
1789
1790""")
1791 # Subpart 2 contains the machine parsable DSN information. It
1792 # consists of two blocks of headers, represented by two nested Message
1793 # objects.
1794 subpart = msg.get_payload(1)
1795 eq(subpart.get_content_type(), 'message/delivery-status')
1796 eq(len(subpart.get_payload()), 2)
1797 # message/delivery-status should treat each block as a bunch of
1798 # headers, i.e. a bunch of Message objects.
1799 dsn1 = subpart.get_payload(0)
1800 unless(isinstance(dsn1, Message))
1801 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1802 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1803 # Try a missing one <wink>
1804 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1805 dsn2 = subpart.get_payload(1)
1806 unless(isinstance(dsn2, Message))
1807 eq(dsn2['action'], 'failed')
1808 eq(dsn2.get_params(header='original-recipient'),
1809 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1810 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1811 # Subpart 3 is the original message
1812 subpart = msg.get_payload(2)
1813 eq(subpart.get_content_type(), 'message/rfc822')
1814 payload = subpart.get_payload()
1815 unless(isinstance(payload, list))
1816 eq(len(payload), 1)
1817 subsubpart = payload[0]
1818 unless(isinstance(subsubpart, Message))
1819 eq(subsubpart.get_content_type(), 'text/plain')
1820 eq(subsubpart['message-id'],
1821 '<002001c144a6$8752e060$56104586@oxy.edu>')
1822
1823 def test_epilogue(self):
1824 eq = self.ndiffAssertEqual
1825 with openfile('msg_21.txt') as fp:
1826 text = fp.read()
1827 msg = Message()
1828 msg['From'] = 'aperson@dom.ain'
1829 msg['To'] = 'bperson@dom.ain'
1830 msg['Subject'] = 'Test'
1831 msg.preamble = 'MIME message'
1832 msg.epilogue = 'End of MIME message\n'
1833 msg1 = MIMEText('One')
1834 msg2 = MIMEText('Two')
1835 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1836 msg.attach(msg1)
1837 msg.attach(msg2)
1838 sfp = StringIO()
1839 g = Generator(sfp)
1840 g.flatten(msg)
1841 eq(sfp.getvalue(), text)
1842
1843 def test_no_nl_preamble(self):
1844 eq = self.ndiffAssertEqual
1845 msg = Message()
1846 msg['From'] = 'aperson@dom.ain'
1847 msg['To'] = 'bperson@dom.ain'
1848 msg['Subject'] = 'Test'
1849 msg.preamble = 'MIME message'
1850 msg.epilogue = ''
1851 msg1 = MIMEText('One')
1852 msg2 = MIMEText('Two')
1853 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1854 msg.attach(msg1)
1855 msg.attach(msg2)
1856 eq(msg.as_string(), """\
1857From: aperson@dom.ain
1858To: bperson@dom.ain
1859Subject: Test
1860Content-Type: multipart/mixed; boundary="BOUNDARY"
1861
1862MIME message
1863--BOUNDARY
1864Content-Type: text/plain; charset="us-ascii"
1865MIME-Version: 1.0
1866Content-Transfer-Encoding: 7bit
1867
1868One
1869--BOUNDARY
1870Content-Type: text/plain; charset="us-ascii"
1871MIME-Version: 1.0
1872Content-Transfer-Encoding: 7bit
1873
1874Two
1875--BOUNDARY--
1876""")
1877
1878 def test_default_type(self):
1879 eq = self.assertEqual
1880 with openfile('msg_30.txt') as fp:
1881 msg = email.message_from_file(fp)
1882 container1 = msg.get_payload(0)
1883 eq(container1.get_default_type(), 'message/rfc822')
1884 eq(container1.get_content_type(), 'message/rfc822')
1885 container2 = msg.get_payload(1)
1886 eq(container2.get_default_type(), 'message/rfc822')
1887 eq(container2.get_content_type(), 'message/rfc822')
1888 container1a = container1.get_payload(0)
1889 eq(container1a.get_default_type(), 'text/plain')
1890 eq(container1a.get_content_type(), 'text/plain')
1891 container2a = container2.get_payload(0)
1892 eq(container2a.get_default_type(), 'text/plain')
1893 eq(container2a.get_content_type(), 'text/plain')
1894
1895 def test_default_type_with_explicit_container_type(self):
1896 eq = self.assertEqual
1897 with openfile('msg_28.txt') as fp:
1898 msg = email.message_from_file(fp)
1899 container1 = msg.get_payload(0)
1900 eq(container1.get_default_type(), 'message/rfc822')
1901 eq(container1.get_content_type(), 'message/rfc822')
1902 container2 = msg.get_payload(1)
1903 eq(container2.get_default_type(), 'message/rfc822')
1904 eq(container2.get_content_type(), 'message/rfc822')
1905 container1a = container1.get_payload(0)
1906 eq(container1a.get_default_type(), 'text/plain')
1907 eq(container1a.get_content_type(), 'text/plain')
1908 container2a = container2.get_payload(0)
1909 eq(container2a.get_default_type(), 'text/plain')
1910 eq(container2a.get_content_type(), 'text/plain')
1911
1912 def test_default_type_non_parsed(self):
1913 eq = self.assertEqual
1914 neq = self.ndiffAssertEqual
1915 # Set up container
1916 container = MIMEMultipart('digest', 'BOUNDARY')
1917 container.epilogue = ''
1918 # Set up subparts
1919 subpart1a = MIMEText('message 1\n')
1920 subpart2a = MIMEText('message 2\n')
1921 subpart1 = MIMEMessage(subpart1a)
1922 subpart2 = MIMEMessage(subpart2a)
1923 container.attach(subpart1)
1924 container.attach(subpart2)
1925 eq(subpart1.get_content_type(), 'message/rfc822')
1926 eq(subpart1.get_default_type(), 'message/rfc822')
1927 eq(subpart2.get_content_type(), 'message/rfc822')
1928 eq(subpart2.get_default_type(), 'message/rfc822')
1929 neq(container.as_string(0), '''\
1930Content-Type: multipart/digest; boundary="BOUNDARY"
1931MIME-Version: 1.0
1932
1933--BOUNDARY
1934Content-Type: message/rfc822
1935MIME-Version: 1.0
1936
1937Content-Type: text/plain; charset="us-ascii"
1938MIME-Version: 1.0
1939Content-Transfer-Encoding: 7bit
1940
1941message 1
1942
1943--BOUNDARY
1944Content-Type: message/rfc822
1945MIME-Version: 1.0
1946
1947Content-Type: text/plain; charset="us-ascii"
1948MIME-Version: 1.0
1949Content-Transfer-Encoding: 7bit
1950
1951message 2
1952
1953--BOUNDARY--
1954''')
1955 del subpart1['content-type']
1956 del subpart1['mime-version']
1957 del subpart2['content-type']
1958 del subpart2['mime-version']
1959 eq(subpart1.get_content_type(), 'message/rfc822')
1960 eq(subpart1.get_default_type(), 'message/rfc822')
1961 eq(subpart2.get_content_type(), 'message/rfc822')
1962 eq(subpart2.get_default_type(), 'message/rfc822')
1963 neq(container.as_string(0), '''\
1964Content-Type: multipart/digest; boundary="BOUNDARY"
1965MIME-Version: 1.0
1966
1967--BOUNDARY
1968
1969Content-Type: text/plain; charset="us-ascii"
1970MIME-Version: 1.0
1971Content-Transfer-Encoding: 7bit
1972
1973message 1
1974
1975--BOUNDARY
1976
1977Content-Type: text/plain; charset="us-ascii"
1978MIME-Version: 1.0
1979Content-Transfer-Encoding: 7bit
1980
1981message 2
1982
1983--BOUNDARY--
1984''')
1985
1986 def test_mime_attachments_in_constructor(self):
1987 eq = self.assertEqual
1988 text1 = MIMEText('')
1989 text2 = MIMEText('')
1990 msg = MIMEMultipart(_subparts=(text1, text2))
1991 eq(len(msg.get_payload()), 2)
1992 eq(msg.get_payload(0), text1)
1993 eq(msg.get_payload(1), text2)
1994
Christian Heimes587c2bf2008-01-19 16:21:02 +00001995 def test_default_multipart_constructor(self):
1996 msg = MIMEMultipart()
1997 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001998
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001999
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002000# A general test of parser->model->generator idempotency. IOW, read a message
2001# in, parse it into a message object tree, then without touching the tree,
2002# regenerate the plain text. The original text and the transformed text
2003# should be identical. Note: that we ignore the Unix-From since that may
2004# contain a changed date.
2005class TestIdempotent(TestEmailBase):
2006 def _msgobj(self, filename):
2007 with openfile(filename) as fp:
2008 data = fp.read()
2009 msg = email.message_from_string(data)
2010 return msg, data
2011
2012 def _idempotent(self, msg, text):
2013 eq = self.ndiffAssertEqual
2014 s = StringIO()
2015 g = Generator(s, maxheaderlen=0)
2016 g.flatten(msg)
2017 eq(text, s.getvalue())
2018
2019 def test_parse_text_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002020 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002021 msg, text = self._msgobj('msg_01.txt')
2022 eq(msg.get_content_type(), 'text/plain')
2023 eq(msg.get_content_maintype(), 'text')
2024 eq(msg.get_content_subtype(), 'plain')
2025 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2026 eq(msg.get_param('charset'), 'us-ascii')
2027 eq(msg.preamble, None)
2028 eq(msg.epilogue, None)
2029 self._idempotent(msg, text)
2030
2031 def test_parse_untyped_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002032 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002033 msg, text = self._msgobj('msg_03.txt')
2034 eq(msg.get_content_type(), 'text/plain')
2035 eq(msg.get_params(), None)
2036 eq(msg.get_param('charset'), None)
2037 self._idempotent(msg, text)
2038
2039 def test_simple_multipart(self):
2040 msg, text = self._msgobj('msg_04.txt')
2041 self._idempotent(msg, text)
2042
2043 def test_MIME_digest(self):
2044 msg, text = self._msgobj('msg_02.txt')
2045 self._idempotent(msg, text)
2046
2047 def test_long_header(self):
2048 msg, text = self._msgobj('msg_27.txt')
2049 self._idempotent(msg, text)
2050
2051 def test_MIME_digest_with_part_headers(self):
2052 msg, text = self._msgobj('msg_28.txt')
2053 self._idempotent(msg, text)
2054
2055 def test_mixed_with_image(self):
2056 msg, text = self._msgobj('msg_06.txt')
2057 self._idempotent(msg, text)
2058
2059 def test_multipart_report(self):
2060 msg, text = self._msgobj('msg_05.txt')
2061 self._idempotent(msg, text)
2062
2063 def test_dsn(self):
2064 msg, text = self._msgobj('msg_16.txt')
2065 self._idempotent(msg, text)
2066
2067 def test_preamble_epilogue(self):
2068 msg, text = self._msgobj('msg_21.txt')
2069 self._idempotent(msg, text)
2070
2071 def test_multipart_one_part(self):
2072 msg, text = self._msgobj('msg_23.txt')
2073 self._idempotent(msg, text)
2074
2075 def test_multipart_no_parts(self):
2076 msg, text = self._msgobj('msg_24.txt')
2077 self._idempotent(msg, text)
2078
2079 def test_no_start_boundary(self):
2080 msg, text = self._msgobj('msg_31.txt')
2081 self._idempotent(msg, text)
2082
2083 def test_rfc2231_charset(self):
2084 msg, text = self._msgobj('msg_32.txt')
2085 self._idempotent(msg, text)
2086
2087 def test_more_rfc2231_parameters(self):
2088 msg, text = self._msgobj('msg_33.txt')
2089 self._idempotent(msg, text)
2090
2091 def test_text_plain_in_a_multipart_digest(self):
2092 msg, text = self._msgobj('msg_34.txt')
2093 self._idempotent(msg, text)
2094
2095 def test_nested_multipart_mixeds(self):
2096 msg, text = self._msgobj('msg_12a.txt')
2097 self._idempotent(msg, text)
2098
2099 def test_message_external_body_idempotent(self):
2100 msg, text = self._msgobj('msg_36.txt')
2101 self._idempotent(msg, text)
2102
2103 def test_content_type(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002104 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002105 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002106 # Get a message object and reset the seek pointer for other tests
2107 msg, text = self._msgobj('msg_05.txt')
2108 eq(msg.get_content_type(), 'multipart/report')
2109 # Test the Content-Type: parameters
2110 params = {}
2111 for pk, pv in msg.get_params():
2112 params[pk] = pv
2113 eq(params['report-type'], 'delivery-status')
2114 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2115 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2116 eq(msg.epilogue, '\n')
2117 eq(len(msg.get_payload()), 3)
2118 # Make sure the subparts are what we expect
2119 msg1 = msg.get_payload(0)
2120 eq(msg1.get_content_type(), 'text/plain')
2121 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2122 msg2 = msg.get_payload(1)
2123 eq(msg2.get_content_type(), 'text/plain')
2124 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2125 msg3 = msg.get_payload(2)
2126 eq(msg3.get_content_type(), 'message/rfc822')
Georg Brandlab91fde2009-08-13 08:51:18 +00002127 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002128 payload = msg3.get_payload()
2129 unless(isinstance(payload, list))
2130 eq(len(payload), 1)
2131 msg4 = payload[0]
2132 unless(isinstance(msg4, Message))
2133 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2134
2135 def test_parser(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002136 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002137 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002138 msg, text = self._msgobj('msg_06.txt')
2139 # Check some of the outer headers
2140 eq(msg.get_content_type(), 'message/rfc822')
2141 # Make sure the payload is a list of exactly one sub-Message, and that
2142 # that submessage has a type of text/plain
2143 payload = msg.get_payload()
2144 unless(isinstance(payload, list))
2145 eq(len(payload), 1)
2146 msg1 = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00002147 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002148 eq(msg1.get_content_type(), 'text/plain')
Georg Brandlab91fde2009-08-13 08:51:18 +00002149 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002150 eq(msg1.get_payload(), '\n')
2151
2152
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002153
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154# Test various other bits of the package's functionality
2155class TestMiscellaneous(TestEmailBase):
2156 def test_message_from_string(self):
2157 with openfile('msg_01.txt') as fp:
2158 text = fp.read()
2159 msg = email.message_from_string(text)
2160 s = StringIO()
2161 # Don't wrap/continue long headers since we're trying to test
2162 # idempotency.
2163 g = Generator(s, maxheaderlen=0)
2164 g.flatten(msg)
2165 self.assertEqual(text, s.getvalue())
2166
2167 def test_message_from_file(self):
2168 with openfile('msg_01.txt') as fp:
2169 text = fp.read()
2170 fp.seek(0)
2171 msg = email.message_from_file(fp)
2172 s = StringIO()
2173 # Don't wrap/continue long headers since we're trying to test
2174 # idempotency.
2175 g = Generator(s, maxheaderlen=0)
2176 g.flatten(msg)
2177 self.assertEqual(text, s.getvalue())
2178
2179 def test_message_from_string_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002180 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002181 with openfile('msg_01.txt') as fp:
2182 text = fp.read()
2183
2184 # Create a subclass
2185 class MyMessage(Message):
2186 pass
2187
2188 msg = email.message_from_string(text, MyMessage)
2189 unless(isinstance(msg, MyMessage))
2190 # Try something more complicated
2191 with openfile('msg_02.txt') as fp:
2192 text = fp.read()
2193 msg = email.message_from_string(text, MyMessage)
2194 for subpart in msg.walk():
2195 unless(isinstance(subpart, MyMessage))
2196
2197 def test_message_from_file_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002198 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002199 # Create a subclass
2200 class MyMessage(Message):
2201 pass
2202
2203 with openfile('msg_01.txt') as fp:
2204 msg = email.message_from_file(fp, MyMessage)
2205 unless(isinstance(msg, MyMessage))
2206 # Try something more complicated
2207 with openfile('msg_02.txt') as fp:
2208 msg = email.message_from_file(fp, MyMessage)
2209 for subpart in msg.walk():
2210 unless(isinstance(subpart, MyMessage))
2211
2212 def test__all__(self):
2213 module = __import__('email')
2214 # Can't use sorted() here due to Python 2.3 compatibility
2215 all = module.__all__[:]
2216 all.sort()
2217 self.assertEqual(all, [
2218 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2219 'header', 'iterators', 'message', 'message_from_file',
2220 'message_from_string', 'mime', 'parser',
2221 'quoprimime', 'utils',
2222 ])
2223
2224 def test_formatdate(self):
2225 now = time.time()
2226 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2227 time.gmtime(now)[:6])
2228
2229 def test_formatdate_localtime(self):
2230 now = time.time()
2231 self.assertEqual(
2232 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2233 time.localtime(now)[:6])
2234
2235 def test_formatdate_usegmt(self):
2236 now = time.time()
2237 self.assertEqual(
2238 utils.formatdate(now, localtime=False),
2239 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2240 self.assertEqual(
2241 utils.formatdate(now, localtime=False, usegmt=True),
2242 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2243
2244 def test_parsedate_none(self):
2245 self.assertEqual(utils.parsedate(''), None)
2246
2247 def test_parsedate_compact(self):
2248 # The FWS after the comma is optional
2249 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2250 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2251
2252 def test_parsedate_no_dayofweek(self):
2253 eq = self.assertEqual
2254 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2255 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2256
2257 def test_parsedate_compact_no_dayofweek(self):
2258 eq = self.assertEqual
2259 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2260 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2261
2262 def test_parsedate_acceptable_to_time_functions(self):
2263 eq = self.assertEqual
2264 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2265 t = int(time.mktime(timetup))
2266 eq(time.localtime(t)[:6], timetup[:6])
2267 eq(int(time.strftime('%Y', timetup)), 2003)
2268 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2269 t = int(time.mktime(timetup[:9]))
2270 eq(time.localtime(t)[:6], timetup[:6])
2271 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2272
R. David Murray1061f182010-08-25 01:55:24 +00002273 def test_parsedate_y2k(self):
2274 """Test for parsing a date with a two-digit year.
2275
2276 Parsing a date with a two-digit year should return the correct
2277 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2278 obsoletes RFC822) requires four-digit years.
2279
2280 """
2281 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2282 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2283 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2284 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2285
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002286 def test_parseaddr_empty(self):
2287 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2288 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2289
2290 def test_noquote_dump(self):
2291 self.assertEqual(
2292 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2293 'A Silly Person <person@dom.ain>')
2294
2295 def test_escape_dump(self):
2296 self.assertEqual(
2297 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2298 r'"A \(Very\) Silly Person" <person@dom.ain>')
2299 a = r'A \(Special\) Person'
2300 b = 'person@dom.ain'
2301 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2302
2303 def test_escape_backslashes(self):
2304 self.assertEqual(
2305 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2306 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2307 a = r'Arthur \Backslash\ Foobar'
2308 b = 'person@dom.ain'
2309 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2310
2311 def test_name_with_dot(self):
2312 x = 'John X. Doe <jxd@example.com>'
2313 y = '"John X. Doe" <jxd@example.com>'
2314 a, b = ('John X. Doe', 'jxd@example.com')
2315 self.assertEqual(utils.parseaddr(x), (a, b))
2316 self.assertEqual(utils.parseaddr(y), (a, b))
2317 # formataddr() quotes the name if there's a dot in it
2318 self.assertEqual(utils.formataddr((a, b)), y)
2319
R. David Murray7f8199a2010-10-02 16:04:44 +00002320 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2321 # issue 10005. Note that in the third test the second pair of
2322 # backslashes is not actually a quoted pair because it is not inside a
2323 # comment or quoted string: the address being parsed has a quoted
2324 # string containing a quoted backslash, followed by 'example' and two
2325 # backslashes, followed by another quoted string containing a space and
2326 # the word 'example'. parseaddr copies those two backslashes
2327 # literally. Per rfc5322 this is not technically correct since a \ may
2328 # not appear in an address outside of a quoted string. It is probably
2329 # a sensible Postel interpretation, though.
2330 eq = self.assertEqual
2331 eq(utils.parseaddr('""example" example"@example.com'),
2332 ('', '""example" example"@example.com'))
2333 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2334 ('', '"\\"example\\" example"@example.com'))
2335 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2336 ('', '"\\\\"example\\\\" example"@example.com'))
2337
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002338 def test_multiline_from_comment(self):
2339 x = """\
2340Foo
2341\tBar <foo@example.com>"""
2342 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2343
2344 def test_quote_dump(self):
2345 self.assertEqual(
2346 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2347 r'"A Silly; Person" <person@dom.ain>')
2348
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002349 def test_charset_richcomparisons(self):
2350 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002351 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002352 cset1 = Charset()
2353 cset2 = Charset()
2354 eq(cset1, 'us-ascii')
2355 eq(cset1, 'US-ASCII')
2356 eq(cset1, 'Us-AsCiI')
2357 eq('us-ascii', cset1)
2358 eq('US-ASCII', cset1)
2359 eq('Us-AsCiI', cset1)
2360 ne(cset1, 'usascii')
2361 ne(cset1, 'USASCII')
2362 ne(cset1, 'UsAsCiI')
2363 ne('usascii', cset1)
2364 ne('USASCII', cset1)
2365 ne('UsAsCiI', cset1)
2366 eq(cset1, cset2)
2367 eq(cset2, cset1)
2368
2369 def test_getaddresses(self):
2370 eq = self.assertEqual
2371 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2372 'Bud Person <bperson@dom.ain>']),
2373 [('Al Person', 'aperson@dom.ain'),
2374 ('Bud Person', 'bperson@dom.ain')])
2375
2376 def test_getaddresses_nasty(self):
2377 eq = self.assertEqual
2378 eq(utils.getaddresses(['foo: ;']), [('', '')])
2379 eq(utils.getaddresses(
2380 ['[]*-- =~$']),
2381 [('', ''), ('', ''), ('', '*--')])
2382 eq(utils.getaddresses(
2383 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2384 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2385
2386 def test_getaddresses_embedded_comment(self):
2387 """Test proper handling of a nested comment"""
2388 eq = self.assertEqual
2389 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2390 eq(addrs[0][1], 'foo@bar.com')
2391
2392 def test_utils_quote_unquote(self):
2393 eq = self.assertEqual
2394 msg = Message()
2395 msg.add_header('content-disposition', 'attachment',
2396 filename='foo\\wacky"name')
2397 eq(msg.get_filename(), 'foo\\wacky"name')
2398
2399 def test_get_body_encoding_with_bogus_charset(self):
2400 charset = Charset('not a charset')
2401 self.assertEqual(charset.get_body_encoding(), 'base64')
2402
2403 def test_get_body_encoding_with_uppercase_charset(self):
2404 eq = self.assertEqual
2405 msg = Message()
2406 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2407 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2408 charsets = msg.get_charsets()
2409 eq(len(charsets), 1)
2410 eq(charsets[0], 'utf-8')
2411 charset = Charset(charsets[0])
2412 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002413 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2415 eq(msg.get_payload(decode=True), b'hello world')
2416 eq(msg['content-transfer-encoding'], 'base64')
2417 # Try another one
2418 msg = Message()
2419 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2420 charsets = msg.get_charsets()
2421 eq(len(charsets), 1)
2422 eq(charsets[0], 'us-ascii')
2423 charset = Charset(charsets[0])
2424 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2425 msg.set_payload('hello world', charset=charset)
2426 eq(msg.get_payload(), 'hello world')
2427 eq(msg['content-transfer-encoding'], '7bit')
2428
2429 def test_charsets_case_insensitive(self):
2430 lc = Charset('us-ascii')
2431 uc = Charset('US-ASCII')
2432 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2433
2434 def test_partial_falls_inside_message_delivery_status(self):
2435 eq = self.ndiffAssertEqual
2436 # The Parser interface provides chunks of data to FeedParser in 8192
2437 # byte gulps. SF bug #1076485 found one of those chunks inside
2438 # message/delivery-status header block, which triggered an
2439 # unreadline() of NeedMoreData.
2440 msg = self._msgobj('msg_43.txt')
2441 sfp = StringIO()
2442 iterators._structure(msg, sfp)
2443 eq(sfp.getvalue(), """\
2444multipart/report
2445 text/plain
2446 message/delivery-status
2447 text/plain
2448 text/plain
2449 text/plain
2450 text/plain
2451 text/plain
2452 text/plain
2453 text/plain
2454 text/plain
2455 text/plain
2456 text/plain
2457 text/plain
2458 text/plain
2459 text/plain
2460 text/plain
2461 text/plain
2462 text/plain
2463 text/plain
2464 text/plain
2465 text/plain
2466 text/plain
2467 text/plain
2468 text/plain
2469 text/plain
2470 text/plain
2471 text/plain
2472 text/plain
2473 text/rfc822-headers
2474""")
2475
2476
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002477
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002478# Test the iterator/generators
2479class TestIterators(TestEmailBase):
2480 def test_body_line_iterator(self):
2481 eq = self.assertEqual
2482 neq = self.ndiffAssertEqual
2483 # First a simple non-multipart message
2484 msg = self._msgobj('msg_01.txt')
2485 it = iterators.body_line_iterator(msg)
2486 lines = list(it)
2487 eq(len(lines), 6)
2488 neq(EMPTYSTRING.join(lines), msg.get_payload())
2489 # Now a more complicated multipart
2490 msg = self._msgobj('msg_02.txt')
2491 it = iterators.body_line_iterator(msg)
2492 lines = list(it)
2493 eq(len(lines), 43)
2494 with openfile('msg_19.txt') as fp:
2495 neq(EMPTYSTRING.join(lines), fp.read())
2496
2497 def test_typed_subpart_iterator(self):
2498 eq = self.assertEqual
2499 msg = self._msgobj('msg_04.txt')
2500 it = iterators.typed_subpart_iterator(msg, 'text')
2501 lines = []
2502 subparts = 0
2503 for subpart in it:
2504 subparts += 1
2505 lines.append(subpart.get_payload())
2506 eq(subparts, 2)
2507 eq(EMPTYSTRING.join(lines), """\
2508a simple kind of mirror
2509to reflect upon our own
2510a simple kind of mirror
2511to reflect upon our own
2512""")
2513
2514 def test_typed_subpart_iterator_default_type(self):
2515 eq = self.assertEqual
2516 msg = self._msgobj('msg_03.txt')
2517 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2518 lines = []
2519 subparts = 0
2520 for subpart in it:
2521 subparts += 1
2522 lines.append(subpart.get_payload())
2523 eq(subparts, 1)
2524 eq(EMPTYSTRING.join(lines), """\
2525
2526Hi,
2527
2528Do you like this message?
2529
2530-Me
2531""")
2532
R. David Murray6d4a06c2010-07-17 01:28:04 +00002533 def test_pushCR_LF(self):
2534 '''FeedParser BufferedSubFile.push() assumed it received complete
2535 line endings. A CR ending one push() followed by a LF starting
2536 the next push() added an empty line.
2537 '''
2538 imt = [
2539 ("a\r \n", 2),
2540 ("b", 0),
2541 ("c\n", 1),
2542 ("", 0),
2543 ("d\r\n", 1),
2544 ("e\r", 0),
2545 ("\nf", 1),
2546 ("\r\n", 1),
2547 ]
2548 from email.feedparser import BufferedSubFile, NeedMoreData
2549 bsf = BufferedSubFile()
2550 om = []
2551 nt = 0
2552 for il, n in imt:
2553 bsf.push(il)
2554 nt += n
2555 n1 = 0
2556 while True:
2557 ol = bsf.readline()
2558 if ol == NeedMoreData:
2559 break
2560 om.append(ol)
2561 n1 += 1
2562 self.assertTrue(n == n1)
2563 self.assertTrue(len(om) == nt)
2564 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2565
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002566
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002567
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568class TestParsers(TestEmailBase):
2569 def test_header_parser(self):
2570 eq = self.assertEqual
2571 # Parse only the headers of a complex multipart MIME document
2572 with openfile('msg_02.txt') as fp:
2573 msg = HeaderParser().parse(fp)
2574 eq(msg['from'], 'ppp-request@zzz.org')
2575 eq(msg['to'], 'ppp@zzz.org')
2576 eq(msg.get_content_type(), 'multipart/mixed')
Georg Brandlab91fde2009-08-13 08:51:18 +00002577 self.assertFalse(msg.is_multipart())
2578 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579
2580 def test_whitespace_continuation(self):
2581 eq = self.assertEqual
2582 # This message contains a line after the Subject: header that has only
2583 # whitespace, but it is not empty!
2584 msg = email.message_from_string("""\
2585From: aperson@dom.ain
2586To: bperson@dom.ain
2587Subject: the next line has a space on it
2588\x20
2589Date: Mon, 8 Apr 2002 15:09:19 -0400
2590Message-ID: spam
2591
2592Here's the message body
2593""")
2594 eq(msg['subject'], 'the next line has a space on it\n ')
2595 eq(msg['message-id'], 'spam')
2596 eq(msg.get_payload(), "Here's the message body\n")
2597
2598 def test_whitespace_continuation_last_header(self):
2599 eq = self.assertEqual
2600 # Like the previous test, but the subject line is the last
2601 # header.
2602 msg = email.message_from_string("""\
2603From: aperson@dom.ain
2604To: bperson@dom.ain
2605Date: Mon, 8 Apr 2002 15:09:19 -0400
2606Message-ID: spam
2607Subject: the next line has a space on it
2608\x20
2609
2610Here's the message body
2611""")
2612 eq(msg['subject'], 'the next line has a space on it\n ')
2613 eq(msg['message-id'], 'spam')
2614 eq(msg.get_payload(), "Here's the message body\n")
2615
2616 def test_crlf_separation(self):
2617 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002618 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002619 msg = Parser().parse(fp)
2620 eq(len(msg.get_payload()), 2)
2621 part1 = msg.get_payload(0)
2622 eq(part1.get_content_type(), 'text/plain')
2623 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2624 part2 = msg.get_payload(1)
2625 eq(part2.get_content_type(), 'application/riscos')
2626
2627 def test_multipart_digest_with_extra_mime_headers(self):
2628 eq = self.assertEqual
2629 neq = self.ndiffAssertEqual
2630 with openfile('msg_28.txt') as fp:
2631 msg = email.message_from_file(fp)
2632 # Structure is:
2633 # multipart/digest
2634 # message/rfc822
2635 # text/plain
2636 # message/rfc822
2637 # text/plain
2638 eq(msg.is_multipart(), 1)
2639 eq(len(msg.get_payload()), 2)
2640 part1 = msg.get_payload(0)
2641 eq(part1.get_content_type(), 'message/rfc822')
2642 eq(part1.is_multipart(), 1)
2643 eq(len(part1.get_payload()), 1)
2644 part1a = part1.get_payload(0)
2645 eq(part1a.is_multipart(), 0)
2646 eq(part1a.get_content_type(), 'text/plain')
2647 neq(part1a.get_payload(), 'message 1\n')
2648 # next message/rfc822
2649 part2 = msg.get_payload(1)
2650 eq(part2.get_content_type(), 'message/rfc822')
2651 eq(part2.is_multipart(), 1)
2652 eq(len(part2.get_payload()), 1)
2653 part2a = part2.get_payload(0)
2654 eq(part2a.is_multipart(), 0)
2655 eq(part2a.get_content_type(), 'text/plain')
2656 neq(part2a.get_payload(), 'message 2\n')
2657
2658 def test_three_lines(self):
2659 # A bug report by Andrew McNamara
2660 lines = ['From: Andrew Person <aperson@dom.ain',
2661 'Subject: Test',
2662 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2663 msg = email.message_from_string(NL.join(lines))
2664 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2665
2666 def test_strip_line_feed_and_carriage_return_in_headers(self):
2667 eq = self.assertEqual
2668 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2669 value1 = 'text'
2670 value2 = 'more text'
2671 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2672 value1, value2)
2673 msg = email.message_from_string(m)
2674 eq(msg.get('Header'), value1)
2675 eq(msg.get('Next-Header'), value2)
2676
2677 def test_rfc2822_header_syntax(self):
2678 eq = self.assertEqual
2679 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2680 msg = email.message_from_string(m)
2681 eq(len(msg), 3)
2682 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2683 eq(msg.get_payload(), 'body')
2684
2685 def test_rfc2822_space_not_allowed_in_header(self):
2686 eq = self.assertEqual
2687 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2688 msg = email.message_from_string(m)
2689 eq(len(msg.keys()), 0)
2690
2691 def test_rfc2822_one_character_header(self):
2692 eq = self.assertEqual
2693 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2694 msg = email.message_from_string(m)
2695 headers = msg.keys()
2696 headers.sort()
2697 eq(headers, ['A', 'B', 'CC'])
2698 eq(msg.get_payload(), 'body')
2699
R. David Murray71df9d92010-06-16 02:22:56 +00002700 def test_CRLFLF_at_end_of_part(self):
2701 # issue 5610: feedparser should not eat two chars from body part ending
2702 # with "\r\n\n".
2703 m = (
2704 "From: foo@bar.com\n"
2705 "To: baz\n"
2706 "Mime-Version: 1.0\n"
2707 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2708 "\n"
2709 "--BOUNDARY\n"
2710 "Content-Type: text/plain\n"
2711 "\n"
2712 "body ending with CRLF newline\r\n"
2713 "\n"
2714 "--BOUNDARY--\n"
2715 )
2716 msg = email.message_from_string(m)
2717 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002718
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002719
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002720class TestBase64(unittest.TestCase):
2721 def test_len(self):
2722 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002723 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002724 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002725 for size in range(15):
2726 if size == 0 : bsize = 0
2727 elif size <= 3 : bsize = 4
2728 elif size <= 6 : bsize = 8
2729 elif size <= 9 : bsize = 12
2730 elif size <= 12: bsize = 16
2731 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002732 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002733
2734 def test_decode(self):
2735 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002736 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002737 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002738
2739 def test_encode(self):
2740 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002741 eq(base64mime.body_encode(b''), b'')
2742 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002743 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002744 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002745 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002746 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002747eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2748eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2749eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2750eHh4eCB4eHh4IA==
2751""")
2752 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002753 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002754 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002755eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2756eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2757eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2758eHh4eCB4eHh4IA==\r
2759""")
2760
2761 def test_header_encode(self):
2762 eq = self.assertEqual
2763 he = base64mime.header_encode
2764 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002765 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2766 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002767 # Test the charset option
2768 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2769 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002770
2771
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002772
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002773class TestQuopri(unittest.TestCase):
2774 def setUp(self):
2775 # Set of characters (as byte integers) that don't need to be encoded
2776 # in headers.
2777 self.hlit = list(chain(
2778 range(ord('a'), ord('z') + 1),
2779 range(ord('A'), ord('Z') + 1),
2780 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002781 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782 # Set of characters (as byte integers) that do need to be encoded in
2783 # headers.
2784 self.hnon = [c for c in range(256) if c not in self.hlit]
2785 assert len(self.hlit) + len(self.hnon) == 256
2786 # Set of characters (as byte integers) that don't need to be encoded
2787 # in bodies.
2788 self.blit = list(range(ord(' '), ord('~') + 1))
2789 self.blit.append(ord('\t'))
2790 self.blit.remove(ord('='))
2791 # Set of characters (as byte integers) that do need to be encoded in
2792 # bodies.
2793 self.bnon = [c for c in range(256) if c not in self.blit]
2794 assert len(self.blit) + len(self.bnon) == 256
2795
Guido van Rossum9604e662007-08-30 03:46:43 +00002796 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002797 for c in self.hlit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002798 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002799 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002800 for c in self.hnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002801 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002802 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002803
Guido van Rossum9604e662007-08-30 03:46:43 +00002804 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002805 for c in self.blit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002806 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002807 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002808 for c in self.bnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002809 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002810 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002811
2812 def test_header_quopri_len(self):
2813 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002814 eq(quoprimime.header_length(b'hello'), 5)
2815 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002816 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002817 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002818 # =?xxx?q?...?= means 10 extra characters
2819 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002820 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2821 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002822 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002823 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002824 # =?xxx?q?...?= means 10 extra characters
2825 10)
2826 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002827 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002828 'expected length 1 for %r' % chr(c))
2829 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002830 # Space is special; it's encoded to _
2831 if c == ord(' '):
2832 continue
2833 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002834 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002835 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002836
2837 def test_body_quopri_len(self):
2838 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002839 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002840 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002841 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002842 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002843
2844 def test_quote_unquote_idempotent(self):
2845 for x in range(256):
2846 c = chr(x)
2847 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2848
2849 def test_header_encode(self):
2850 eq = self.assertEqual
2851 he = quoprimime.header_encode
2852 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2853 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2854 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2855 # Test a non-ASCII character
2856 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2857
2858 def test_decode(self):
2859 eq = self.assertEqual
2860 eq(quoprimime.decode(''), '')
2861 eq(quoprimime.decode('hello'), 'hello')
2862 eq(quoprimime.decode('hello', 'X'), 'hello')
2863 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2864
2865 def test_encode(self):
2866 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002867 eq(quoprimime.body_encode(''), '')
2868 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002869 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002870 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002871 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002872 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002873xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2874 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2875x xxxx xxxx xxxx xxxx=20""")
2876 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002877 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2878 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002879xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2880 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2881x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002882 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002883one line
2884
2885two line"""), """\
2886one line
2887
2888two line""")
2889
2890
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002891
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002892# Test the Charset class
2893class TestCharset(unittest.TestCase):
2894 def tearDown(self):
2895 from email import charset as CharsetModule
2896 try:
2897 del CharsetModule.CHARSETS['fake']
2898 except KeyError:
2899 pass
2900
Guido van Rossum9604e662007-08-30 03:46:43 +00002901 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002902 eq = self.assertEqual
2903 # Make sure us-ascii = no Unicode conversion
2904 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002905 eq(c.header_encode('Hello World!'), 'Hello World!')
2906 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002907 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002908 self.assertRaises(UnicodeError, c.header_encode, s)
2909 c = Charset('utf-8')
2910 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002911
2912 def test_body_encode(self):
2913 eq = self.assertEqual
2914 # Try a charset with QP body encoding
2915 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002916 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002917 # Try a charset with Base64 body encoding
2918 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002919 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002920 # Try a charset with None body encoding
2921 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002922 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002923 # Try the convert argument, where input codec != output codec
2924 c = Charset('euc-jp')
2925 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002926 # XXX FIXME
2927## try:
2928## eq('\x1b$B5FCO;~IW\x1b(B',
2929## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2930## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2931## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2932## except LookupError:
2933## # We probably don't have the Japanese codecs installed
2934## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002935 # Testing SF bug #625509, which we have to fake, since there are no
2936 # built-in encodings where the header encoding is QP but the body
2937 # encoding is not.
2938 from email import charset as CharsetModule
2939 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2940 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002941 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002942
2943 def test_unicode_charset_name(self):
2944 charset = Charset('us-ascii')
2945 self.assertEqual(str(charset), 'us-ascii')
2946 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2947
2948
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002949
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002950# Test multilingual MIME headers.
2951class TestHeader(TestEmailBase):
2952 def test_simple(self):
2953 eq = self.ndiffAssertEqual
2954 h = Header('Hello World!')
2955 eq(h.encode(), 'Hello World!')
2956 h.append(' Goodbye World!')
2957 eq(h.encode(), 'Hello World! Goodbye World!')
2958
2959 def test_simple_surprise(self):
2960 eq = self.ndiffAssertEqual
2961 h = Header('Hello World!')
2962 eq(h.encode(), 'Hello World!')
2963 h.append('Goodbye World!')
2964 eq(h.encode(), 'Hello World! Goodbye World!')
2965
2966 def test_header_needs_no_decoding(self):
2967 h = 'no decoding needed'
2968 self.assertEqual(decode_header(h), [(h, None)])
2969
2970 def test_long(self):
2971 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2972 maxlinelen=76)
2973 for l in h.encode(splitchars=' ').split('\n '):
Georg Brandlab91fde2009-08-13 08:51:18 +00002974 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002975
2976 def test_multilingual(self):
2977 eq = self.ndiffAssertEqual
2978 g = Charset("iso-8859-1")
2979 cz = Charset("iso-8859-2")
2980 utf8 = Charset("utf-8")
2981 g_head = (b'Die Mieter treten hier ein werden mit einem '
2982 b'Foerderband komfortabel den Korridor entlang, '
2983 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2984 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2985 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2986 b'd\xf9vtipu.. ')
2987 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
2988 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
2989 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
2990 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
2991 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
2992 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
2993 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
2994 '\u3044\u307e\u3059\u3002')
2995 h = Header(g_head, g)
2996 h.append(cz_head, cz)
2997 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00002998 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002999 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003000=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3001 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3002 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3003 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003004 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3005 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3006 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3007 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003008 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3009 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3010 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3011 decoded = decode_header(enc)
3012 eq(len(decoded), 3)
3013 eq(decoded[0], (g_head, 'iso-8859-1'))
3014 eq(decoded[1], (cz_head, 'iso-8859-2'))
3015 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003016 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003017 eq(ustr,
3018 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3019 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3020 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3021 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3022 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3023 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3024 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3025 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3026 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3027 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3028 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3029 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3030 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3031 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3032 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3033 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3034 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035 # Test make_header()
3036 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003037 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003038
3039 def test_empty_header_encode(self):
3040 h = Header()
3041 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003042
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003043 def test_header_ctor_default_args(self):
3044 eq = self.ndiffAssertEqual
3045 h = Header()
3046 eq(h, '')
3047 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003048 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003049
3050 def test_explicit_maxlinelen(self):
3051 eq = self.ndiffAssertEqual
3052 hstr = ('A very long line that must get split to something other '
3053 'than at the 76th character boundary to test the non-default '
3054 'behavior')
3055 h = Header(hstr)
3056 eq(h.encode(), '''\
3057A very long line that must get split to something other than at the 76th
3058 character boundary to test the non-default behavior''')
3059 eq(str(h), hstr)
3060 h = Header(hstr, header_name='Subject')
3061 eq(h.encode(), '''\
3062A very long line that must get split to something other than at the
3063 76th character boundary to test the non-default behavior''')
3064 eq(str(h), hstr)
3065 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3066 eq(h.encode(), hstr)
3067 eq(str(h), hstr)
3068
Guido van Rossum9604e662007-08-30 03:46:43 +00003069 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003070 eq = self.ndiffAssertEqual
3071 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003072 x = 'xxxx ' * 20
3073 h.append(x)
3074 s = h.encode()
3075 eq(s, """\
3076=?iso-8859-1?q?xxx?=
3077 =?iso-8859-1?q?x_?=
3078 =?iso-8859-1?q?xx?=
3079 =?iso-8859-1?q?xx?=
3080 =?iso-8859-1?q?_x?=
3081 =?iso-8859-1?q?xx?=
3082 =?iso-8859-1?q?x_?=
3083 =?iso-8859-1?q?xx?=
3084 =?iso-8859-1?q?xx?=
3085 =?iso-8859-1?q?_x?=
3086 =?iso-8859-1?q?xx?=
3087 =?iso-8859-1?q?x_?=
3088 =?iso-8859-1?q?xx?=
3089 =?iso-8859-1?q?xx?=
3090 =?iso-8859-1?q?_x?=
3091 =?iso-8859-1?q?xx?=
3092 =?iso-8859-1?q?x_?=
3093 =?iso-8859-1?q?xx?=
3094 =?iso-8859-1?q?xx?=
3095 =?iso-8859-1?q?_x?=
3096 =?iso-8859-1?q?xx?=
3097 =?iso-8859-1?q?x_?=
3098 =?iso-8859-1?q?xx?=
3099 =?iso-8859-1?q?xx?=
3100 =?iso-8859-1?q?_x?=
3101 =?iso-8859-1?q?xx?=
3102 =?iso-8859-1?q?x_?=
3103 =?iso-8859-1?q?xx?=
3104 =?iso-8859-1?q?xx?=
3105 =?iso-8859-1?q?_x?=
3106 =?iso-8859-1?q?xx?=
3107 =?iso-8859-1?q?x_?=
3108 =?iso-8859-1?q?xx?=
3109 =?iso-8859-1?q?xx?=
3110 =?iso-8859-1?q?_x?=
3111 =?iso-8859-1?q?xx?=
3112 =?iso-8859-1?q?x_?=
3113 =?iso-8859-1?q?xx?=
3114 =?iso-8859-1?q?xx?=
3115 =?iso-8859-1?q?_x?=
3116 =?iso-8859-1?q?xx?=
3117 =?iso-8859-1?q?x_?=
3118 =?iso-8859-1?q?xx?=
3119 =?iso-8859-1?q?xx?=
3120 =?iso-8859-1?q?_x?=
3121 =?iso-8859-1?q?xx?=
3122 =?iso-8859-1?q?x_?=
3123 =?iso-8859-1?q?xx?=
3124 =?iso-8859-1?q?xx?=
3125 =?iso-8859-1?q?_?=""")
3126 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003127 h = Header(charset='iso-8859-1', maxlinelen=40)
3128 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003129 s = h.encode()
3130 eq(s, """\
3131=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3132 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3133 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3134 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3135 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3136 eq(x, str(make_header(decode_header(s))))
3137
3138 def test_base64_splittable(self):
3139 eq = self.ndiffAssertEqual
3140 h = Header(charset='koi8-r', maxlinelen=20)
3141 x = 'xxxx ' * 20
3142 h.append(x)
3143 s = h.encode()
3144 eq(s, """\
3145=?koi8-r?b?eHh4?=
3146 =?koi8-r?b?eCB4?=
3147 =?koi8-r?b?eHh4?=
3148 =?koi8-r?b?IHh4?=
3149 =?koi8-r?b?eHgg?=
3150 =?koi8-r?b?eHh4?=
3151 =?koi8-r?b?eCB4?=
3152 =?koi8-r?b?eHh4?=
3153 =?koi8-r?b?IHh4?=
3154 =?koi8-r?b?eHgg?=
3155 =?koi8-r?b?eHh4?=
3156 =?koi8-r?b?eCB4?=
3157 =?koi8-r?b?eHh4?=
3158 =?koi8-r?b?IHh4?=
3159 =?koi8-r?b?eHgg?=
3160 =?koi8-r?b?eHh4?=
3161 =?koi8-r?b?eCB4?=
3162 =?koi8-r?b?eHh4?=
3163 =?koi8-r?b?IHh4?=
3164 =?koi8-r?b?eHgg?=
3165 =?koi8-r?b?eHh4?=
3166 =?koi8-r?b?eCB4?=
3167 =?koi8-r?b?eHh4?=
3168 =?koi8-r?b?IHh4?=
3169 =?koi8-r?b?eHgg?=
3170 =?koi8-r?b?eHh4?=
3171 =?koi8-r?b?eCB4?=
3172 =?koi8-r?b?eHh4?=
3173 =?koi8-r?b?IHh4?=
3174 =?koi8-r?b?eHgg?=
3175 =?koi8-r?b?eHh4?=
3176 =?koi8-r?b?eCB4?=
3177 =?koi8-r?b?eHh4?=
3178 =?koi8-r?b?IA==?=""")
3179 eq(x, str(make_header(decode_header(s))))
3180 h = Header(charset='koi8-r', maxlinelen=40)
3181 h.append(x)
3182 s = h.encode()
3183 eq(s, """\
3184=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3185 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3186 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3187 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3188 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3189 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3190 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003191
3192 def test_us_ascii_header(self):
3193 eq = self.assertEqual
3194 s = 'hello'
3195 x = decode_header(s)
3196 eq(x, [('hello', None)])
3197 h = make_header(x)
3198 eq(s, h.encode())
3199
3200 def test_string_charset(self):
3201 eq = self.assertEqual
3202 h = Header()
3203 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003204 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003205
3206## def test_unicode_error(self):
3207## raises = self.assertRaises
3208## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3209## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3210## h = Header()
3211## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3212## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3213## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3214
3215 def test_utf8_shortest(self):
3216 eq = self.assertEqual
3217 h = Header('p\xf6stal', 'utf-8')
3218 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3219 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3220 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3221
3222 def test_bad_8bit_header(self):
3223 raises = self.assertRaises
3224 eq = self.assertEqual
3225 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3226 raises(UnicodeError, Header, x)
3227 h = Header()
3228 raises(UnicodeError, h.append, x)
3229 e = x.decode('utf-8', 'replace')
3230 eq(str(Header(x, errors='replace')), e)
3231 h.append(x, errors='replace')
3232 eq(str(h), e)
3233
3234 def test_encoded_adjacent_nonencoded(self):
3235 eq = self.assertEqual
3236 h = Header()
3237 h.append('hello', 'iso-8859-1')
3238 h.append('world')
3239 s = h.encode()
3240 eq(s, '=?iso-8859-1?q?hello?= world')
3241 h = make_header(decode_header(s))
3242 eq(h.encode(), s)
3243
3244 def test_whitespace_eater(self):
3245 eq = self.assertEqual
3246 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3247 parts = decode_header(s)
3248 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3249 hdr = make_header(parts)
3250 eq(hdr.encode(),
3251 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3252
3253 def test_broken_base64_header(self):
3254 raises = self.assertRaises
R. David Murraye06528c2010-08-03 23:35:44 +00003255 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003256 raises(errors.HeaderParseError, decode_header, s)
3257
3258
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003259
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003260# Test RFC 2231 header parameters (en/de)coding
3261class TestRFC2231(TestEmailBase):
3262 def test_get_param(self):
3263 eq = self.assertEqual
3264 msg = self._msgobj('msg_29.txt')
3265 eq(msg.get_param('title'),
3266 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3267 eq(msg.get_param('title', unquote=False),
3268 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3269
3270 def test_set_param(self):
3271 eq = self.ndiffAssertEqual
3272 msg = Message()
3273 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3274 charset='us-ascii')
3275 eq(msg.get_param('title'),
3276 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3277 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3278 charset='us-ascii', language='en')
3279 eq(msg.get_param('title'),
3280 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3281 msg = self._msgobj('msg_01.txt')
3282 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3283 charset='us-ascii', language='en')
3284 eq(msg.as_string(maxheaderlen=78), """\
3285Return-Path: <bbb@zzz.org>
3286Delivered-To: bbb@zzz.org
3287Received: by mail.zzz.org (Postfix, from userid 889)
3288\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3289MIME-Version: 1.0
3290Content-Transfer-Encoding: 7bit
3291Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3292From: bbb@ddd.com (John X. Doe)
3293To: bbb@zzz.org
3294Subject: This is a test message
3295Date: Fri, 4 May 2001 14:05:44 -0400
3296Content-Type: text/plain; charset=us-ascii;
3297 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3298
3299
3300Hi,
3301
3302Do you like this message?
3303
3304-Me
3305""")
3306
3307 def test_del_param(self):
3308 eq = self.ndiffAssertEqual
3309 msg = self._msgobj('msg_01.txt')
3310 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3311 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3312 charset='us-ascii', language='en')
3313 msg.del_param('foo', header='Content-Type')
3314 eq(msg.as_string(maxheaderlen=78), """\
3315Return-Path: <bbb@zzz.org>
3316Delivered-To: bbb@zzz.org
3317Received: by mail.zzz.org (Postfix, from userid 889)
3318\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3319MIME-Version: 1.0
3320Content-Transfer-Encoding: 7bit
3321Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3322From: bbb@ddd.com (John X. Doe)
3323To: bbb@zzz.org
3324Subject: This is a test message
3325Date: Fri, 4 May 2001 14:05:44 -0400
3326Content-Type: text/plain; charset="us-ascii";
3327 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3328
3329
3330Hi,
3331
3332Do you like this message?
3333
3334-Me
3335""")
3336
3337 def test_rfc2231_get_content_charset(self):
3338 eq = self.assertEqual
3339 msg = self._msgobj('msg_32.txt')
3340 eq(msg.get_content_charset(), 'us-ascii')
3341
3342 def test_rfc2231_no_language_or_charset(self):
3343 m = '''\
3344Content-Transfer-Encoding: 8bit
3345Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3346Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3347
3348'''
3349 msg = email.message_from_string(m)
3350 param = msg.get_param('NAME')
Georg Brandlab91fde2009-08-13 08:51:18 +00003351 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003352 self.assertEqual(
3353 param,
3354 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3355
3356 def test_rfc2231_no_language_or_charset_in_filename(self):
3357 m = '''\
3358Content-Disposition: inline;
3359\tfilename*0*="''This%20is%20even%20more%20";
3360\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3361\tfilename*2="is it not.pdf"
3362
3363'''
3364 msg = email.message_from_string(m)
3365 self.assertEqual(msg.get_filename(),
3366 'This is even more ***fun*** is it not.pdf')
3367
3368 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3369 m = '''\
3370Content-Disposition: inline;
3371\tfilename*0*="''This%20is%20even%20more%20";
3372\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3373\tfilename*2="is it not.pdf"
3374
3375'''
3376 msg = email.message_from_string(m)
3377 self.assertEqual(msg.get_filename(),
3378 'This is even more ***fun*** is it not.pdf')
3379
3380 def test_rfc2231_partly_encoded(self):
3381 m = '''\
3382Content-Disposition: inline;
3383\tfilename*0="''This%20is%20even%20more%20";
3384\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3385\tfilename*2="is it not.pdf"
3386
3387'''
3388 msg = email.message_from_string(m)
3389 self.assertEqual(
3390 msg.get_filename(),
3391 'This%20is%20even%20more%20***fun*** is it not.pdf')
3392
3393 def test_rfc2231_partly_nonencoded(self):
3394 m = '''\
3395Content-Disposition: inline;
3396\tfilename*0="This%20is%20even%20more%20";
3397\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3398\tfilename*2="is it not.pdf"
3399
3400'''
3401 msg = email.message_from_string(m)
3402 self.assertEqual(
3403 msg.get_filename(),
3404 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3405
3406 def test_rfc2231_no_language_or_charset_in_boundary(self):
3407 m = '''\
3408Content-Type: multipart/alternative;
3409\tboundary*0*="''This%20is%20even%20more%20";
3410\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3411\tboundary*2="is it not.pdf"
3412
3413'''
3414 msg = email.message_from_string(m)
3415 self.assertEqual(msg.get_boundary(),
3416 'This is even more ***fun*** is it not.pdf')
3417
3418 def test_rfc2231_no_language_or_charset_in_charset(self):
3419 # This is a nonsensical charset value, but tests the code anyway
3420 m = '''\
3421Content-Type: text/plain;
3422\tcharset*0*="This%20is%20even%20more%20";
3423\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3424\tcharset*2="is it not.pdf"
3425
3426'''
3427 msg = email.message_from_string(m)
3428 self.assertEqual(msg.get_content_charset(),
3429 'this is even more ***fun*** is it not.pdf')
3430
3431 def test_rfc2231_bad_encoding_in_filename(self):
3432 m = '''\
3433Content-Disposition: inline;
3434\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3435\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3436\tfilename*2="is it not.pdf"
3437
3438'''
3439 msg = email.message_from_string(m)
3440 self.assertEqual(msg.get_filename(),
3441 'This is even more ***fun*** is it not.pdf')
3442
3443 def test_rfc2231_bad_encoding_in_charset(self):
3444 m = """\
3445Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3446
3447"""
3448 msg = email.message_from_string(m)
3449 # This should return None because non-ascii characters in the charset
3450 # are not allowed.
3451 self.assertEqual(msg.get_content_charset(), None)
3452
3453 def test_rfc2231_bad_character_in_charset(self):
3454 m = """\
3455Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3456
3457"""
3458 msg = email.message_from_string(m)
3459 # This should return None because non-ascii characters in the charset
3460 # are not allowed.
3461 self.assertEqual(msg.get_content_charset(), None)
3462
3463 def test_rfc2231_bad_character_in_filename(self):
3464 m = '''\
3465Content-Disposition: inline;
3466\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3467\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3468\tfilename*2*="is it not.pdf%E2"
3469
3470'''
3471 msg = email.message_from_string(m)
3472 self.assertEqual(msg.get_filename(),
3473 'This is even more ***fun*** is it not.pdf\ufffd')
3474
3475 def test_rfc2231_unknown_encoding(self):
3476 m = """\
3477Content-Transfer-Encoding: 8bit
3478Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3479
3480"""
3481 msg = email.message_from_string(m)
3482 self.assertEqual(msg.get_filename(), 'myfile.txt')
3483
3484 def test_rfc2231_single_tick_in_filename_extended(self):
3485 eq = self.assertEqual
3486 m = """\
3487Content-Type: application/x-foo;
3488\tname*0*=\"Frank's\"; name*1*=\" Document\"
3489
3490"""
3491 msg = email.message_from_string(m)
3492 charset, language, s = msg.get_param('name')
3493 eq(charset, None)
3494 eq(language, None)
3495 eq(s, "Frank's Document")
3496
3497 def test_rfc2231_single_tick_in_filename(self):
3498 m = """\
3499Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3500
3501"""
3502 msg = email.message_from_string(m)
3503 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003504 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003505 self.assertEqual(param, "Frank's Document")
3506
3507 def test_rfc2231_tick_attack_extended(self):
3508 eq = self.assertEqual
3509 m = """\
3510Content-Type: application/x-foo;
3511\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3512
3513"""
3514 msg = email.message_from_string(m)
3515 charset, language, s = msg.get_param('name')
3516 eq(charset, 'us-ascii')
3517 eq(language, 'en-us')
3518 eq(s, "Frank's Document")
3519
3520 def test_rfc2231_tick_attack(self):
3521 m = """\
3522Content-Type: application/x-foo;
3523\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3524
3525"""
3526 msg = email.message_from_string(m)
3527 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003528 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003529 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3530
3531 def test_rfc2231_no_extended_values(self):
3532 eq = self.assertEqual
3533 m = """\
3534Content-Type: application/x-foo; name=\"Frank's Document\"
3535
3536"""
3537 msg = email.message_from_string(m)
3538 eq(msg.get_param('name'), "Frank's Document")
3539
3540 def test_rfc2231_encoded_then_unencoded_segments(self):
3541 eq = self.assertEqual
3542 m = """\
3543Content-Type: application/x-foo;
3544\tname*0*=\"us-ascii'en-us'My\";
3545\tname*1=\" Document\";
3546\tname*2*=\" For You\"
3547
3548"""
3549 msg = email.message_from_string(m)
3550 charset, language, s = msg.get_param('name')
3551 eq(charset, 'us-ascii')
3552 eq(language, 'en-us')
3553 eq(s, 'My Document For You')
3554
3555 def test_rfc2231_unencoded_then_encoded_segments(self):
3556 eq = self.assertEqual
3557 m = """\
3558Content-Type: application/x-foo;
3559\tname*0=\"us-ascii'en-us'My\";
3560\tname*1*=\" Document\";
3561\tname*2*=\" For You\"
3562
3563"""
3564 msg = email.message_from_string(m)
3565 charset, language, s = msg.get_param('name')
3566 eq(charset, 'us-ascii')
3567 eq(language, 'en-us')
3568 eq(s, 'My Document For You')
3569
3570
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003571
R. David Murrayfa606922010-01-16 18:41:00 +00003572# Tests to ensure that signed parts of an email are completely preserved, as
3573# required by RFC1847 section 2.1. Note that these are incomplete, because the
3574# email package does not currently always preserve the body. See issue 1670765.
3575class TestSigned(TestEmailBase):
3576
3577 def _msg_and_obj(self, filename):
3578 with openfile(findfile(filename)) as fp:
3579 original = fp.read()
3580 msg = email.message_from_string(original)
3581 return original, msg
3582
3583 def _signed_parts_eq(self, original, result):
3584 # Extract the first mime part of each message
3585 import re
3586 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3587 inpart = repart.search(original).group(2)
3588 outpart = repart.search(result).group(2)
3589 self.assertEqual(outpart, inpart)
3590
3591 def test_long_headers_as_string(self):
3592 original, msg = self._msg_and_obj('msg_45.txt')
3593 result = msg.as_string()
3594 self._signed_parts_eq(original, result)
3595
3596 def test_long_headers_as_string_maxheaderlen(self):
3597 original, msg = self._msg_and_obj('msg_45.txt')
3598 result = msg.as_string(maxheaderlen=60)
3599 self._signed_parts_eq(original, result)
3600
3601 def test_long_headers_flatten(self):
3602 original, msg = self._msg_and_obj('msg_45.txt')
3603 fp = StringIO()
3604 Generator(fp).flatten(msg)
3605 result = fp.getvalue()
3606 self._signed_parts_eq(original, result)
3607
3608
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003609
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003610def _testclasses():
3611 mod = sys.modules[__name__]
3612 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3613
3614
3615def suite():
3616 suite = unittest.TestSuite()
3617 for testclass in _testclasses():
3618 suite.addTest(unittest.makeSuite(testclass))
3619 return suite
3620
3621
3622def test_main():
3623 for testclass in _testclasses():
3624 run_unittest(testclass)
3625
3626
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003627
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003628if __name__ == '__main__':
3629 unittest.main(defaultTest='suite')