blob: ee991eef86b08097978ea29f6c3e0be44a5ffa1d [file] [log] [blame]
Benjamin Petersonffeda292010-01-09 18:48:46 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000046
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000052
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Georg Brandlab91fde2009-08-13 08:51:18 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
Ezio Melotti19f2aeb2010-11-21 01:30:29 +000070
Guido van Rossum8b3febe2007-08-30 01:15:14 +000071# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murrayd0a04ff2010-02-21 04:48:18 +0000181 def test_message_rfc822_only(self):
182 # Issue 7970: message/rfc822 not in multipart parsed by
183 # HeaderParser caused an exception when flattened.
184 fp = openfile(findfile('msg_46.txt'))
185 msgdata = fp.read()
186 parser = HeaderParser()
187 msg = parser.parsestr(msgdata)
188 out = StringIO()
189 gen = Generator(out, True, 0)
190 gen.flatten(msg, False)
191 self.assertEqual(out.getvalue(), msgdata)
192
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000193 def test_get_decoded_payload(self):
194 eq = self.assertEqual
195 msg = self._msgobj('msg_10.txt')
196 # The outer message is a multipart
197 eq(msg.get_payload(decode=True), None)
198 # Subpart 1 is 7bit encoded
199 eq(msg.get_payload(0).get_payload(decode=True),
200 b'This is a 7bit encoded message.\n')
201 # Subpart 2 is quopri
202 eq(msg.get_payload(1).get_payload(decode=True),
203 b'\xa1This is a Quoted Printable encoded message!\n')
204 # Subpart 3 is base64
205 eq(msg.get_payload(2).get_payload(decode=True),
206 b'This is a Base64 encoded message.')
207 # Subpart 4 has no Content-Transfer-Encoding: header.
208 eq(msg.get_payload(3).get_payload(decode=True),
209 b'This has no Content-Transfer-Encoding: header.\n')
210
211 def test_get_decoded_uu_payload(self):
212 eq = self.assertEqual
213 msg = Message()
214 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
215 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
216 msg['content-transfer-encoding'] = cte
217 eq(msg.get_payload(decode=True), b'hello world')
218 # Now try some bogus data
219 msg.set_payload('foo')
220 eq(msg.get_payload(decode=True), b'foo')
221
222 def test_decoded_generator(self):
223 eq = self.assertEqual
224 msg = self._msgobj('msg_07.txt')
225 with openfile('msg_17.txt') as fp:
226 text = fp.read()
227 s = StringIO()
228 g = DecodedGenerator(s)
229 g.flatten(msg)
230 eq(s.getvalue(), text)
231
232 def test__contains__(self):
233 msg = Message()
234 msg['From'] = 'Me'
235 msg['to'] = 'You'
236 # Check for case insensitivity
Georg Brandlab91fde2009-08-13 08:51:18 +0000237 self.assertTrue('from' in msg)
238 self.assertTrue('From' in msg)
239 self.assertTrue('FROM' in msg)
240 self.assertTrue('to' in msg)
241 self.assertTrue('To' in msg)
242 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243
244 def test_as_string(self):
245 eq = self.ndiffAssertEqual
246 msg = self._msgobj('msg_01.txt')
247 with openfile('msg_01.txt') as fp:
248 text = fp.read()
249 eq(text, str(msg))
250 fullrepr = msg.as_string(unixfrom=True)
251 lines = fullrepr.split('\n')
Georg Brandlab91fde2009-08-13 08:51:18 +0000252 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000253 eq(text, NL.join(lines[1:]))
254
255 def test_bad_param(self):
256 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
257 self.assertEqual(msg.get_param('baz'), '')
258
259 def test_missing_filename(self):
260 msg = email.message_from_string("From: foo\n")
261 self.assertEqual(msg.get_filename(), None)
262
263 def test_bogus_filename(self):
264 msg = email.message_from_string(
265 "Content-Disposition: blarg; filename\n")
266 self.assertEqual(msg.get_filename(), '')
267
268 def test_missing_boundary(self):
269 msg = email.message_from_string("From: foo\n")
270 self.assertEqual(msg.get_boundary(), None)
271
272 def test_get_params(self):
273 eq = self.assertEqual
274 msg = email.message_from_string(
275 'X-Header: foo=one; bar=two; baz=three\n')
276 eq(msg.get_params(header='x-header'),
277 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
278 msg = email.message_from_string(
279 'X-Header: foo; bar=one; baz=two\n')
280 eq(msg.get_params(header='x-header'),
281 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
282 eq(msg.get_params(), None)
283 msg = email.message_from_string(
284 'X-Header: foo; bar="one"; baz=two\n')
285 eq(msg.get_params(header='x-header'),
286 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
287
288 def test_get_param_liberal(self):
289 msg = Message()
290 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
291 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
292
293 def test_get_param(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 "X-Header: foo=one; bar=two; baz=three\n")
297 eq(msg.get_param('bar', header='x-header'), 'two')
298 eq(msg.get_param('quuz', header='x-header'), None)
299 eq(msg.get_param('quuz'), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_param('foo', header='x-header'), '')
303 eq(msg.get_param('bar', header='x-header'), 'one')
304 eq(msg.get_param('baz', header='x-header'), 'two')
305 # XXX: We are not RFC-2045 compliant! We cannot parse:
306 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
307 # msg.get_param("weird")
308 # yet.
309
310 def test_get_param_funky_continuation_lines(self):
311 msg = self._msgobj('msg_22.txt')
312 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
313
314 def test_get_param_with_semis_in_quotes(self):
315 msg = email.message_from_string(
316 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
317 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
318 self.assertEqual(msg.get_param('name', unquote=False),
319 '"Jim&amp;&amp;Jill"')
320
R. David Murray84ee3102010-04-14 19:05:38 +0000321 def test_get_param_with_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
324 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
325 msg = email.message_from_string(
326 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
327 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
328
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000329 def test_field_containment(self):
Georg Brandlab91fde2009-08-13 08:51:18 +0000330 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000331 msg = email.message_from_string('Header: exists')
332 unless('header' in msg)
333 unless('Header' in msg)
334 unless('HEADER' in msg)
Georg Brandlab91fde2009-08-13 08:51:18 +0000335 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336
337 def test_set_param(self):
338 eq = self.assertEqual
339 msg = Message()
340 msg.set_param('charset', 'iso-2022-jp')
341 eq(msg.get_param('charset'), 'iso-2022-jp')
342 msg.set_param('importance', 'high value')
343 eq(msg.get_param('importance'), 'high value')
344 eq(msg.get_param('importance', unquote=False), '"high value"')
345 eq(msg.get_params(), [('text/plain', ''),
346 ('charset', 'iso-2022-jp'),
347 ('importance', 'high value')])
348 eq(msg.get_params(unquote=False), [('text/plain', ''),
349 ('charset', '"iso-2022-jp"'),
350 ('importance', '"high value"')])
351 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
352 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
353
354 def test_del_param(self):
355 eq = self.assertEqual
356 msg = self._msgobj('msg_05.txt')
357 eq(msg.get_params(),
358 [('multipart/report', ''), ('report-type', 'delivery-status'),
359 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
360 old_val = msg.get_param("report-type")
361 msg.del_param("report-type")
362 eq(msg.get_params(),
363 [('multipart/report', ''),
364 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
365 msg.set_param("report-type", old_val)
366 eq(msg.get_params(),
367 [('multipart/report', ''),
368 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
369 ('report-type', old_val)])
370
371 def test_del_param_on_other_header(self):
372 msg = Message()
373 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
374 msg.del_param('filename', 'content-disposition')
375 self.assertEqual(msg['content-disposition'], 'attachment')
376
377 def test_set_type(self):
378 eq = self.assertEqual
379 msg = Message()
380 self.assertRaises(ValueError, msg.set_type, 'text')
381 msg.set_type('text/plain')
382 eq(msg['content-type'], 'text/plain')
383 msg.set_param('charset', 'us-ascii')
384 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
385 msg.set_type('text/html')
386 eq(msg['content-type'], 'text/html; charset="us-ascii"')
387
388 def test_set_type_on_other_header(self):
389 msg = Message()
390 msg['X-Content-Type'] = 'text/plain'
391 msg.set_type('application/octet-stream', 'X-Content-Type')
392 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
393
394 def test_get_content_type_missing(self):
395 msg = Message()
396 self.assertEqual(msg.get_content_type(), 'text/plain')
397
398 def test_get_content_type_missing_with_default_type(self):
399 msg = Message()
400 msg.set_default_type('message/rfc822')
401 self.assertEqual(msg.get_content_type(), 'message/rfc822')
402
403 def test_get_content_type_from_message_implicit(self):
404 msg = self._msgobj('msg_30.txt')
405 self.assertEqual(msg.get_payload(0).get_content_type(),
406 'message/rfc822')
407
408 def test_get_content_type_from_message_explicit(self):
409 msg = self._msgobj('msg_28.txt')
410 self.assertEqual(msg.get_payload(0).get_content_type(),
411 'message/rfc822')
412
413 def test_get_content_type_from_message_text_plain_implicit(self):
414 msg = self._msgobj('msg_03.txt')
415 self.assertEqual(msg.get_content_type(), 'text/plain')
416
417 def test_get_content_type_from_message_text_plain_explicit(self):
418 msg = self._msgobj('msg_01.txt')
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_maintype_missing(self):
422 msg = Message()
423 self.assertEqual(msg.get_content_maintype(), 'text')
424
425 def test_get_content_maintype_missing_with_default_type(self):
426 msg = Message()
427 msg.set_default_type('message/rfc822')
428 self.assertEqual(msg.get_content_maintype(), 'message')
429
430 def test_get_content_maintype_from_message_implicit(self):
431 msg = self._msgobj('msg_30.txt')
432 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
433
434 def test_get_content_maintype_from_message_explicit(self):
435 msg = self._msgobj('msg_28.txt')
436 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
437
438 def test_get_content_maintype_from_message_text_plain_implicit(self):
439 msg = self._msgobj('msg_03.txt')
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_from_message_text_plain_explicit(self):
443 msg = self._msgobj('msg_01.txt')
444 self.assertEqual(msg.get_content_maintype(), 'text')
445
446 def test_get_content_subtype_missing(self):
447 msg = Message()
448 self.assertEqual(msg.get_content_subtype(), 'plain')
449
450 def test_get_content_subtype_missing_with_default_type(self):
451 msg = Message()
452 msg.set_default_type('message/rfc822')
453 self.assertEqual(msg.get_content_subtype(), 'rfc822')
454
455 def test_get_content_subtype_from_message_implicit(self):
456 msg = self._msgobj('msg_30.txt')
457 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
458
459 def test_get_content_subtype_from_message_explicit(self):
460 msg = self._msgobj('msg_28.txt')
461 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
462
463 def test_get_content_subtype_from_message_text_plain_implicit(self):
464 msg = self._msgobj('msg_03.txt')
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_from_message_text_plain_explicit(self):
468 msg = self._msgobj('msg_01.txt')
469 self.assertEqual(msg.get_content_subtype(), 'plain')
470
471 def test_get_content_maintype_error(self):
472 msg = Message()
473 msg['Content-Type'] = 'no-slash-in-this-string'
474 self.assertEqual(msg.get_content_maintype(), 'text')
475
476 def test_get_content_subtype_error(self):
477 msg = Message()
478 msg['Content-Type'] = 'no-slash-in-this-string'
479 self.assertEqual(msg.get_content_subtype(), 'plain')
480
481 def test_replace_header(self):
482 eq = self.assertEqual
483 msg = Message()
484 msg.add_header('First', 'One')
485 msg.add_header('Second', 'Two')
486 msg.add_header('Third', 'Three')
487 eq(msg.keys(), ['First', 'Second', 'Third'])
488 eq(msg.values(), ['One', 'Two', 'Three'])
489 msg.replace_header('Second', 'Twenty')
490 eq(msg.keys(), ['First', 'Second', 'Third'])
491 eq(msg.values(), ['One', 'Twenty', 'Three'])
492 msg.add_header('First', 'Eleven')
493 msg.replace_header('First', 'One Hundred')
494 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
495 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
496 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
497
498 def test_broken_base64_payload(self):
499 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
500 msg = Message()
501 msg['content-type'] = 'audio/x-midi'
502 msg['content-transfer-encoding'] = 'base64'
503 msg.set_payload(x)
504 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000505 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000506
R. David Murrayccb9d052010-12-13 23:57:01 +0000507 # Issue 1078919
508 def test_ascii_add_header(self):
509 msg = Message()
510 msg.add_header('Content-Disposition', 'attachment',
511 filename='bud.gif')
512 self.assertEqual('attachment; filename="bud.gif"',
513 msg['Content-Disposition'])
514
515 def test_noascii_add_header(self):
516 msg = Message()
517 msg.add_header('Content-Disposition', 'attachment',
518 filename="Fußballer.ppt")
519 self.assertEqual(
520 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
521 msg['Content-Disposition'])
522
523 def test_nonascii_add_header_via_triple(self):
524 msg = Message()
525 msg.add_header('Content-Disposition', 'attachment',
526 filename=('iso-8859-1', '', 'Fußballer.ppt'))
527 self.assertEqual(
528 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
529 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000530
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000531
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000532# Test the email.encoders module
533class TestEncoders(unittest.TestCase):
534 def test_encode_empty_payload(self):
535 eq = self.assertEqual
536 msg = Message()
537 msg.set_charset('us-ascii')
538 eq(msg['content-transfer-encoding'], '7bit')
539
540 def test_default_cte(self):
541 eq = self.assertEqual
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000542 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000543 msg = MIMEText('hello world')
544 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic30bb7d2010-04-22 11:58:06 +0000545 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000546 msg = MIMEText('hello \xf8 world')
547 eq(msg['content-transfer-encoding'], '8bit')
548 # And now with a different charset
549 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
550 eq(msg['content-transfer-encoding'], 'quoted-printable')
551
R. David Murrayf870d872010-05-06 01:53:03 +0000552 def test_encode7or8bit(self):
553 # Make sure a charset whose input character set is 8bit but
554 # whose output character set is 7bit gets a transfer-encoding
555 # of 7bit.
556 eq = self.assertEqual
R. David Murrayd2d08c62010-06-03 02:05:47 +0000557 msg = MIMEText('æ–‡', _charset='euc-jp')
R. David Murrayf870d872010-05-06 01:53:03 +0000558 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000559
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000560
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000561# Test long header wrapping
562class TestLongHeaders(TestEmailBase):
563 def test_split_long_continuation(self):
564 eq = self.ndiffAssertEqual
565 msg = email.message_from_string("""\
566Subject: bug demonstration
567\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
568\tmore text
569
570test
571""")
572 sfp = StringIO()
573 g = Generator(sfp)
574 g.flatten(msg)
575 eq(sfp.getvalue(), """\
576Subject: bug demonstration
577\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
578\tmore text
579
580test
581""")
582
583 def test_another_long_almost_unsplittable_header(self):
584 eq = self.ndiffAssertEqual
585 hstr = """\
586bug demonstration
587\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
588\tmore text"""
589 h = Header(hstr, continuation_ws='\t')
590 eq(h.encode(), """\
591bug demonstration
592\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
593\tmore text""")
594 h = Header(hstr.replace('\t', ' '))
595 eq(h.encode(), """\
596bug demonstration
597 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
598 more text""")
599
600 def test_long_nonstring(self):
601 eq = self.ndiffAssertEqual
602 g = Charset("iso-8859-1")
603 cz = Charset("iso-8859-2")
604 utf8 = Charset("utf-8")
605 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
606 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
607 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
608 b'bef\xf6rdert. ')
609 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
610 b'd\xf9vtipu.. ')
611 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
612 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
613 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
614 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
615 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
616 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
617 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
618 '\u3044\u307e\u3059\u3002')
619 h = Header(g_head, g, header_name='Subject')
620 h.append(cz_head, cz)
621 h.append(utf8_head, utf8)
622 msg = Message()
623 msg['Subject'] = h
624 sfp = StringIO()
625 g = Generator(sfp)
626 g.flatten(msg)
627 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000628Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
629 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
630 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
631 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
632 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
633 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
634 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
635 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
636 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
637 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
638 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000639
640""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000641 eq(h.encode(maxlinelen=76), """\
642=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
643 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
644 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
645 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
646 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
647 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
648 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
649 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
650 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
651 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
652 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000653
654 def test_long_header_encode(self):
655 eq = self.ndiffAssertEqual
656 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
657 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
658 header_name='X-Foobar-Spoink-Defrobnit')
659 eq(h.encode(), '''\
660wasnipoop; giraffes="very-long-necked-animals";
661 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
662
663 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
664 eq = self.ndiffAssertEqual
665 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
666 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
667 header_name='X-Foobar-Spoink-Defrobnit',
668 continuation_ws='\t')
669 eq(h.encode(), '''\
670wasnipoop; giraffes="very-long-necked-animals";
671 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
672
673 def test_long_header_encode_with_tab_continuation(self):
674 eq = self.ndiffAssertEqual
675 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
676 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
677 header_name='X-Foobar-Spoink-Defrobnit',
678 continuation_ws='\t')
679 eq(h.encode(), '''\
680wasnipoop; giraffes="very-long-necked-animals";
681\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
682
683 def test_header_splitter(self):
684 eq = self.ndiffAssertEqual
685 msg = MIMEText('')
686 # It'd be great if we could use add_header() here, but that doesn't
687 # guarantee an order of the parameters.
688 msg['X-Foobar-Spoink-Defrobnit'] = (
689 'wasnipoop; giraffes="very-long-necked-animals"; '
690 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
691 sfp = StringIO()
692 g = Generator(sfp)
693 g.flatten(msg)
694 eq(sfp.getvalue(), '''\
695Content-Type: text/plain; charset="us-ascii"
696MIME-Version: 1.0
697Content-Transfer-Encoding: 7bit
698X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
699 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
700
701''')
702
703 def test_no_semis_header_splitter(self):
704 eq = self.ndiffAssertEqual
705 msg = Message()
706 msg['From'] = 'test@dom.ain'
707 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
708 msg.set_payload('Test')
709 sfp = StringIO()
710 g = Generator(sfp)
711 g.flatten(msg)
712 eq(sfp.getvalue(), """\
713From: test@dom.ain
714References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
715 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
716
717Test""")
718
719 def test_no_split_long_header(self):
720 eq = self.ndiffAssertEqual
721 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000722 h = Header(hstr)
723 # These come on two lines because Headers are really field value
724 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000725 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000726References:
727 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
728 h = Header('x' * 80)
729 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000730
731 def test_splitting_multiple_long_lines(self):
732 eq = self.ndiffAssertEqual
733 hstr = """\
734from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
735\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
736\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
737"""
738 h = Header(hstr, continuation_ws='\t')
739 eq(h.encode(), """\
740from babylon.socal-raves.org (localhost [127.0.0.1]);
741 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
742 for <mailman-admin@babylon.socal-raves.org>;
743 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
744\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
745 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
746 for <mailman-admin@babylon.socal-raves.org>;
747 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
748\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
749 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
750 for <mailman-admin@babylon.socal-raves.org>;
751 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
752
753 def test_splitting_first_line_only_is_long(self):
754 eq = self.ndiffAssertEqual
755 hstr = """\
756from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
757\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
758\tid 17k4h5-00034i-00
759\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
760 h = Header(hstr, maxlinelen=78, header_name='Received',
761 continuation_ws='\t')
762 eq(h.encode(), """\
763from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
764 helo=cthulhu.gerg.ca)
765\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
766\tid 17k4h5-00034i-00
767\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
768
769 def test_long_8bit_header(self):
770 eq = self.ndiffAssertEqual
771 msg = Message()
772 h = Header('Britische Regierung gibt', 'iso-8859-1',
773 header_name='Subject')
774 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000775 eq(h.encode(maxlinelen=76), """\
776=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
777 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000778 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000779 eq(msg.as_string(maxheaderlen=76), """\
780Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
781 =?iso-8859-1?q?hore-Windkraftprojekte?=
782
783""")
784 eq(msg.as_string(maxheaderlen=0), """\
785Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000786
787""")
788
789 def test_long_8bit_header_no_charset(self):
790 eq = self.ndiffAssertEqual
791 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000792 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
793 'f\xfcr Offshore-Windkraftprojekte '
794 '<a-very-long-address@example.com>')
795 msg['Reply-To'] = header_string
796 self.assertRaises(UnicodeEncodeError, msg.as_string)
797 msg = Message()
798 msg['Reply-To'] = Header(header_string, 'utf-8',
799 header_name='Reply-To')
800 eq(msg.as_string(maxheaderlen=78), """\
801Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
802 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000803
804""")
805
806 def test_long_to_header(self):
807 eq = self.ndiffAssertEqual
808 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
809 '<someone@eecs.umich.edu>,'
810 '"Someone Test #B" <someone@umich.edu>, '
811 '"Someone Test #C" <someone@eecs.umich.edu>, '
812 '"Someone Test #D" <someone@eecs.umich.edu>')
813 msg = Message()
814 msg['To'] = to
815 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000816To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000817 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000818 "Someone Test #C" <someone@eecs.umich.edu>,
819 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820
821''')
822
823 def test_long_line_after_append(self):
824 eq = self.ndiffAssertEqual
825 s = 'This is an example of string which has almost the limit of header length.'
826 h = Header(s)
827 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000828 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000829This is an example of string which has almost the limit of header length.
830 Add another line.""")
831
832 def test_shorter_line_with_append(self):
833 eq = self.ndiffAssertEqual
834 s = 'This is a shorter line.'
835 h = Header(s)
836 h.append('Add another sentence. (Surprise?)')
837 eq(h.encode(),
838 'This is a shorter line. Add another sentence. (Surprise?)')
839
840 def test_long_field_name(self):
841 eq = self.ndiffAssertEqual
842 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000843 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
844 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
845 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
846 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000847 h = Header(gs, 'iso-8859-1', header_name=fn)
848 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000849 eq(h.encode(maxlinelen=76), """\
850=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
851 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
852 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
853 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000854
855 def test_long_received_header(self):
856 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
857 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
858 'Wed, 05 Mar 2003 18:10:18 -0700')
859 msg = Message()
860 msg['Received-1'] = Header(h, continuation_ws='\t')
861 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000862 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000863 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000864Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
865 Wed, 05 Mar 2003 18:10:18 -0700
866Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
867 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000868
869""")
870
871 def test_string_headerinst_eq(self):
872 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
873 'tu-muenchen.de> (David Bremner\'s message of '
874 '"Thu, 6 Mar 2003 13:58:21 +0100")')
875 msg = Message()
876 msg['Received-1'] = Header(h, header_name='Received-1',
877 continuation_ws='\t')
878 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000879 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000881Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
882 6 Mar 2003 13:58:21 +0100\")
883Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
884 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000885
886""")
887
888 def test_long_unbreakable_lines_with_continuation(self):
889 eq = self.ndiffAssertEqual
890 msg = Message()
891 t = """\
892iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
893 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
894 msg['Face-1'] = t
895 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000896 # XXX This splitting is all wrong. It the first value line should be
897 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000898 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000899Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000900 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000901 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000902Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000903 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000904 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
905
906""")
907
908 def test_another_long_multiline_header(self):
909 eq = self.ndiffAssertEqual
910 m = ('Received: from siimage.com '
911 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000912 'Microsoft SMTPSVC(5.0.2195.4905); '
913 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000914 msg = email.message_from_string(m)
915 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000916Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
917 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918
919''')
920
921 def test_long_lines_with_different_header(self):
922 eq = self.ndiffAssertEqual
923 h = ('List-Unsubscribe: '
924 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
925 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
926 '?subject=unsubscribe>')
927 msg = Message()
928 msg['List'] = h
929 msg['List'] = Header(h, header_name='List')
930 eq(msg.as_string(maxheaderlen=78), """\
931List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000932 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000933List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000934 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000935
936""")
937
938
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000939
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000940# Test mangling of "From " lines in the body of a message
941class TestFromMangling(unittest.TestCase):
942 def setUp(self):
943 self.msg = Message()
944 self.msg['From'] = 'aaa@bbb.org'
945 self.msg.set_payload("""\
946From the desk of A.A.A.:
947Blah blah blah
948""")
949
950 def test_mangled_from(self):
951 s = StringIO()
952 g = Generator(s, mangle_from_=True)
953 g.flatten(self.msg)
954 self.assertEqual(s.getvalue(), """\
955From: aaa@bbb.org
956
957>From the desk of A.A.A.:
958Blah blah blah
959""")
960
961 def test_dont_mangle_from(self):
962 s = StringIO()
963 g = Generator(s, mangle_from_=False)
964 g.flatten(self.msg)
965 self.assertEqual(s.getvalue(), """\
966From: aaa@bbb.org
967
968From the desk of A.A.A.:
969Blah blah blah
970""")
971
972
Ezio Melotti19f2aeb2010-11-21 01:30:29 +0000973
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000974# Test the basic MIMEAudio class
975class TestMIMEAudio(unittest.TestCase):
976 def setUp(self):
977 # Make sure we pick up the audiotest.au that lives in email/test/data.
978 # In Python, there's an audiotest.au living in Lib/test but that isn't
979 # included in some binary distros that don't include the test
980 # package. The trailing empty string on the .join() is significant
981 # since findfile() will do a dirname().
982 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
983 with open(findfile('audiotest.au', datadir), 'rb') as fp:
984 self._audiodata = fp.read()
985 self._au = MIMEAudio(self._audiodata)
986
987 def test_guess_minor_type(self):
988 self.assertEqual(self._au.get_content_type(), 'audio/basic')
989
990 def test_encoding(self):
991 payload = self._au.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +0000992 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
993 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000994
995 def test_checkSetMinor(self):
996 au = MIMEAudio(self._audiodata, 'fish')
997 self.assertEqual(au.get_content_type(), 'audio/fish')
998
999 def test_add_header(self):
1000 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001001 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001002 self._au.add_header('Content-Disposition', 'attachment',
1003 filename='audiotest.au')
1004 eq(self._au['content-disposition'],
1005 'attachment; filename="audiotest.au"')
1006 eq(self._au.get_params(header='content-disposition'),
1007 [('attachment', ''), ('filename', 'audiotest.au')])
1008 eq(self._au.get_param('filename', header='content-disposition'),
1009 'audiotest.au')
1010 missing = []
1011 eq(self._au.get_param('attachment', header='content-disposition'), '')
1012 unless(self._au.get_param('foo', failobj=missing,
1013 header='content-disposition') is missing)
1014 # Try some missing stuff
1015 unless(self._au.get_param('foobar', missing) is missing)
1016 unless(self._au.get_param('attachment', missing,
1017 header='foobar') is missing)
1018
1019
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001020
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001021# Test the basic MIMEImage class
1022class TestMIMEImage(unittest.TestCase):
1023 def setUp(self):
1024 with openfile('PyBanner048.gif', 'rb') as fp:
1025 self._imgdata = fp.read()
1026 self._im = MIMEImage(self._imgdata)
1027
1028 def test_guess_minor_type(self):
1029 self.assertEqual(self._im.get_content_type(), 'image/gif')
1030
1031 def test_encoding(self):
1032 payload = self._im.get_payload()
R. David Murray99147c42010-06-04 16:15:34 +00001033 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1034 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001035
1036 def test_checkSetMinor(self):
1037 im = MIMEImage(self._imgdata, 'fish')
1038 self.assertEqual(im.get_content_type(), 'image/fish')
1039
1040 def test_add_header(self):
1041 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001042 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001043 self._im.add_header('Content-Disposition', 'attachment',
1044 filename='dingusfish.gif')
1045 eq(self._im['content-disposition'],
1046 'attachment; filename="dingusfish.gif"')
1047 eq(self._im.get_params(header='content-disposition'),
1048 [('attachment', ''), ('filename', 'dingusfish.gif')])
1049 eq(self._im.get_param('filename', header='content-disposition'),
1050 'dingusfish.gif')
1051 missing = []
1052 eq(self._im.get_param('attachment', header='content-disposition'), '')
1053 unless(self._im.get_param('foo', failobj=missing,
1054 header='content-disposition') is missing)
1055 # Try some missing stuff
1056 unless(self._im.get_param('foobar', missing) is missing)
1057 unless(self._im.get_param('attachment', missing,
1058 header='foobar') is missing)
1059
1060
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001061
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001062# Test the basic MIMEApplication class
1063class TestMIMEApplication(unittest.TestCase):
1064 def test_headers(self):
1065 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001066 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001067 eq(msg.get_content_type(), 'application/octet-stream')
1068 eq(msg['content-transfer-encoding'], 'base64')
1069
1070 def test_body(self):
1071 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001072 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073 msg = MIMEApplication(bytes)
R. David Murray99147c42010-06-04 16:15:34 +00001074 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001075 eq(msg.get_payload(decode=True), bytes)
1076
1077
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079# Test the basic MIMEText class
1080class TestMIMEText(unittest.TestCase):
1081 def setUp(self):
1082 self._msg = MIMEText('hello there')
1083
1084 def test_types(self):
1085 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001086 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087 eq(self._msg.get_content_type(), 'text/plain')
1088 eq(self._msg.get_param('charset'), 'us-ascii')
1089 missing = []
1090 unless(self._msg.get_param('foobar', missing) is missing)
1091 unless(self._msg.get_param('charset', missing, header='foobar')
1092 is missing)
1093
1094 def test_payload(self):
1095 self.assertEqual(self._msg.get_payload(), 'hello there')
Georg Brandlab91fde2009-08-13 08:51:18 +00001096 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001097
1098 def test_charset(self):
1099 eq = self.assertEqual
1100 msg = MIMEText('hello there', _charset='us-ascii')
1101 eq(msg.get_charset().input_charset, 'us-ascii')
1102 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1103
R. David Murrayd2d08c62010-06-03 02:05:47 +00001104 def test_7bit_input(self):
1105 eq = self.assertEqual
1106 msg = MIMEText('hello there', _charset='us-ascii')
1107 eq(msg.get_charset().input_charset, 'us-ascii')
1108 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1109
1110 def test_7bit_input_no_charset(self):
1111 eq = self.assertEqual
1112 msg = MIMEText('hello there')
1113 eq(msg.get_charset(), 'us-ascii')
1114 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1115 self.assertTrue('hello there' in msg.as_string())
1116
1117 def test_utf8_input(self):
1118 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1119 eq = self.assertEqual
1120 msg = MIMEText(teststr, _charset='utf-8')
1121 eq(msg.get_charset().output_charset, 'utf-8')
1122 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1123 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1124
1125 @unittest.skip("can't fix because of backward compat in email5, "
1126 "will fix in email6")
1127 def test_utf8_input_no_charset(self):
1128 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1129 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1130
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001131
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001132
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133# Test complicated multipart/* messages
1134class TestMultipart(TestEmailBase):
1135 def setUp(self):
1136 with openfile('PyBanner048.gif', 'rb') as fp:
1137 data = fp.read()
1138 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1139 image = MIMEImage(data, name='dingusfish.gif')
1140 image.add_header('content-disposition', 'attachment',
1141 filename='dingusfish.gif')
1142 intro = MIMEText('''\
1143Hi there,
1144
1145This is the dingus fish.
1146''')
1147 container.attach(intro)
1148 container.attach(image)
1149 container['From'] = 'Barry <barry@digicool.com>'
1150 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1151 container['Subject'] = 'Here is your dingus fish'
1152
1153 now = 987809702.54848599
1154 timetuple = time.localtime(now)
1155 if timetuple[-1] == 0:
1156 tzsecs = time.timezone
1157 else:
1158 tzsecs = time.altzone
1159 if tzsecs > 0:
1160 sign = '-'
1161 else:
1162 sign = '+'
1163 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1164 container['Date'] = time.strftime(
1165 '%a, %d %b %Y %H:%M:%S',
1166 time.localtime(now)) + tzoffset
1167 self._msg = container
1168 self._im = image
1169 self._txt = intro
1170
1171 def test_hierarchy(self):
1172 # convenience
1173 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001174 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175 raises = self.assertRaises
1176 # tests
1177 m = self._msg
1178 unless(m.is_multipart())
1179 eq(m.get_content_type(), 'multipart/mixed')
1180 eq(len(m.get_payload()), 2)
1181 raises(IndexError, m.get_payload, 2)
1182 m0 = m.get_payload(0)
1183 m1 = m.get_payload(1)
1184 unless(m0 is self._txt)
1185 unless(m1 is self._im)
1186 eq(m.get_payload(), [m0, m1])
1187 unless(not m0.is_multipart())
1188 unless(not m1.is_multipart())
1189
1190 def test_empty_multipart_idempotent(self):
1191 text = """\
1192Content-Type: multipart/mixed; boundary="BOUNDARY"
1193MIME-Version: 1.0
1194Subject: A subject
1195To: aperson@dom.ain
1196From: bperson@dom.ain
1197
1198
1199--BOUNDARY
1200
1201
1202--BOUNDARY--
1203"""
1204 msg = Parser().parsestr(text)
1205 self.ndiffAssertEqual(text, msg.as_string())
1206
1207 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1208 outer = MIMEBase('multipart', 'mixed')
1209 outer['Subject'] = 'A subject'
1210 outer['To'] = 'aperson@dom.ain'
1211 outer['From'] = 'bperson@dom.ain'
1212 outer.set_boundary('BOUNDARY')
1213 self.ndiffAssertEqual(outer.as_string(), '''\
1214Content-Type: multipart/mixed; boundary="BOUNDARY"
1215MIME-Version: 1.0
1216Subject: A subject
1217To: aperson@dom.ain
1218From: bperson@dom.ain
1219
1220--BOUNDARY
1221
1222--BOUNDARY--''')
1223
1224 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1225 outer = MIMEBase('multipart', 'mixed')
1226 outer['Subject'] = 'A subject'
1227 outer['To'] = 'aperson@dom.ain'
1228 outer['From'] = 'bperson@dom.ain'
1229 outer.preamble = ''
1230 outer.epilogue = ''
1231 outer.set_boundary('BOUNDARY')
1232 self.ndiffAssertEqual(outer.as_string(), '''\
1233Content-Type: multipart/mixed; boundary="BOUNDARY"
1234MIME-Version: 1.0
1235Subject: A subject
1236To: aperson@dom.ain
1237From: bperson@dom.ain
1238
1239
1240--BOUNDARY
1241
1242--BOUNDARY--
1243''')
1244
1245 def test_one_part_in_a_multipart(self):
1246 eq = self.ndiffAssertEqual
1247 outer = MIMEBase('multipart', 'mixed')
1248 outer['Subject'] = 'A subject'
1249 outer['To'] = 'aperson@dom.ain'
1250 outer['From'] = 'bperson@dom.ain'
1251 outer.set_boundary('BOUNDARY')
1252 msg = MIMEText('hello world')
1253 outer.attach(msg)
1254 eq(outer.as_string(), '''\
1255Content-Type: multipart/mixed; boundary="BOUNDARY"
1256MIME-Version: 1.0
1257Subject: A subject
1258To: aperson@dom.ain
1259From: bperson@dom.ain
1260
1261--BOUNDARY
1262Content-Type: text/plain; charset="us-ascii"
1263MIME-Version: 1.0
1264Content-Transfer-Encoding: 7bit
1265
1266hello world
1267--BOUNDARY--''')
1268
1269 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1270 eq = self.ndiffAssertEqual
1271 outer = MIMEBase('multipart', 'mixed')
1272 outer['Subject'] = 'A subject'
1273 outer['To'] = 'aperson@dom.ain'
1274 outer['From'] = 'bperson@dom.ain'
1275 outer.preamble = ''
1276 msg = MIMEText('hello world')
1277 outer.attach(msg)
1278 outer.set_boundary('BOUNDARY')
1279 eq(outer.as_string(), '''\
1280Content-Type: multipart/mixed; boundary="BOUNDARY"
1281MIME-Version: 1.0
1282Subject: A subject
1283To: aperson@dom.ain
1284From: bperson@dom.ain
1285
1286
1287--BOUNDARY
1288Content-Type: text/plain; charset="us-ascii"
1289MIME-Version: 1.0
1290Content-Transfer-Encoding: 7bit
1291
1292hello world
1293--BOUNDARY--''')
1294
1295
1296 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1297 eq = self.ndiffAssertEqual
1298 outer = MIMEBase('multipart', 'mixed')
1299 outer['Subject'] = 'A subject'
1300 outer['To'] = 'aperson@dom.ain'
1301 outer['From'] = 'bperson@dom.ain'
1302 outer.preamble = None
1303 msg = MIMEText('hello world')
1304 outer.attach(msg)
1305 outer.set_boundary('BOUNDARY')
1306 eq(outer.as_string(), '''\
1307Content-Type: multipart/mixed; boundary="BOUNDARY"
1308MIME-Version: 1.0
1309Subject: A subject
1310To: aperson@dom.ain
1311From: bperson@dom.ain
1312
1313--BOUNDARY
1314Content-Type: text/plain; charset="us-ascii"
1315MIME-Version: 1.0
1316Content-Transfer-Encoding: 7bit
1317
1318hello world
1319--BOUNDARY--''')
1320
1321
1322 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1323 eq = self.ndiffAssertEqual
1324 outer = MIMEBase('multipart', 'mixed')
1325 outer['Subject'] = 'A subject'
1326 outer['To'] = 'aperson@dom.ain'
1327 outer['From'] = 'bperson@dom.ain'
1328 outer.epilogue = None
1329 msg = MIMEText('hello world')
1330 outer.attach(msg)
1331 outer.set_boundary('BOUNDARY')
1332 eq(outer.as_string(), '''\
1333Content-Type: multipart/mixed; boundary="BOUNDARY"
1334MIME-Version: 1.0
1335Subject: A subject
1336To: aperson@dom.ain
1337From: bperson@dom.ain
1338
1339--BOUNDARY
1340Content-Type: text/plain; charset="us-ascii"
1341MIME-Version: 1.0
1342Content-Transfer-Encoding: 7bit
1343
1344hello world
1345--BOUNDARY--''')
1346
1347
1348 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1349 eq = self.ndiffAssertEqual
1350 outer = MIMEBase('multipart', 'mixed')
1351 outer['Subject'] = 'A subject'
1352 outer['To'] = 'aperson@dom.ain'
1353 outer['From'] = 'bperson@dom.ain'
1354 outer.epilogue = ''
1355 msg = MIMEText('hello world')
1356 outer.attach(msg)
1357 outer.set_boundary('BOUNDARY')
1358 eq(outer.as_string(), '''\
1359Content-Type: multipart/mixed; boundary="BOUNDARY"
1360MIME-Version: 1.0
1361Subject: A subject
1362To: aperson@dom.ain
1363From: bperson@dom.ain
1364
1365--BOUNDARY
1366Content-Type: text/plain; charset="us-ascii"
1367MIME-Version: 1.0
1368Content-Transfer-Encoding: 7bit
1369
1370hello world
1371--BOUNDARY--
1372''')
1373
1374
1375 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1376 eq = self.ndiffAssertEqual
1377 outer = MIMEBase('multipart', 'mixed')
1378 outer['Subject'] = 'A subject'
1379 outer['To'] = 'aperson@dom.ain'
1380 outer['From'] = 'bperson@dom.ain'
1381 outer.epilogue = '\n'
1382 msg = MIMEText('hello world')
1383 outer.attach(msg)
1384 outer.set_boundary('BOUNDARY')
1385 eq(outer.as_string(), '''\
1386Content-Type: multipart/mixed; boundary="BOUNDARY"
1387MIME-Version: 1.0
1388Subject: A subject
1389To: aperson@dom.ain
1390From: bperson@dom.ain
1391
1392--BOUNDARY
1393Content-Type: text/plain; charset="us-ascii"
1394MIME-Version: 1.0
1395Content-Transfer-Encoding: 7bit
1396
1397hello world
1398--BOUNDARY--
1399
1400''')
1401
1402 def test_message_external_body(self):
1403 eq = self.assertEqual
1404 msg = self._msgobj('msg_36.txt')
1405 eq(len(msg.get_payload()), 2)
1406 msg1 = msg.get_payload(1)
1407 eq(msg1.get_content_type(), 'multipart/alternative')
1408 eq(len(msg1.get_payload()), 2)
1409 for subpart in msg1.get_payload():
1410 eq(subpart.get_content_type(), 'message/external-body')
1411 eq(len(subpart.get_payload()), 1)
1412 subsubpart = subpart.get_payload(0)
1413 eq(subsubpart.get_content_type(), 'text/plain')
1414
1415 def test_double_boundary(self):
1416 # msg_37.txt is a multipart that contains two dash-boundary's in a
1417 # row. Our interpretation of RFC 2046 calls for ignoring the second
1418 # and subsequent boundaries.
1419 msg = self._msgobj('msg_37.txt')
1420 self.assertEqual(len(msg.get_payload()), 3)
1421
1422 def test_nested_inner_contains_outer_boundary(self):
1423 eq = self.ndiffAssertEqual
1424 # msg_38.txt has an inner part that contains outer boundaries. My
1425 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1426 # these are illegal and should be interpreted as unterminated inner
1427 # parts.
1428 msg = self._msgobj('msg_38.txt')
1429 sfp = StringIO()
1430 iterators._structure(msg, sfp)
1431 eq(sfp.getvalue(), """\
1432multipart/mixed
1433 multipart/mixed
1434 multipart/alternative
1435 text/plain
1436 text/plain
1437 text/plain
1438 text/plain
1439""")
1440
1441 def test_nested_with_same_boundary(self):
1442 eq = self.ndiffAssertEqual
1443 # msg 39.txt is similarly evil in that it's got inner parts that use
1444 # the same boundary as outer parts. Again, I believe the way this is
1445 # parsed is closest to the spirit of RFC 2046
1446 msg = self._msgobj('msg_39.txt')
1447 sfp = StringIO()
1448 iterators._structure(msg, sfp)
1449 eq(sfp.getvalue(), """\
1450multipart/mixed
1451 multipart/mixed
1452 multipart/alternative
1453 application/octet-stream
1454 application/octet-stream
1455 text/plain
1456""")
1457
1458 def test_boundary_in_non_multipart(self):
1459 msg = self._msgobj('msg_40.txt')
1460 self.assertEqual(msg.as_string(), '''\
1461MIME-Version: 1.0
1462Content-Type: text/html; boundary="--961284236552522269"
1463
1464----961284236552522269
1465Content-Type: text/html;
1466Content-Transfer-Encoding: 7Bit
1467
1468<html></html>
1469
1470----961284236552522269--
1471''')
1472
1473 def test_boundary_with_leading_space(self):
1474 eq = self.assertEqual
1475 msg = email.message_from_string('''\
1476MIME-Version: 1.0
1477Content-Type: multipart/mixed; boundary=" XXXX"
1478
1479-- XXXX
1480Content-Type: text/plain
1481
1482
1483-- XXXX
1484Content-Type: text/plain
1485
1486-- XXXX--
1487''')
Georg Brandlab91fde2009-08-13 08:51:18 +00001488 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001489 eq(msg.get_boundary(), ' XXXX')
1490 eq(len(msg.get_payload()), 2)
1491
1492 def test_boundary_without_trailing_newline(self):
1493 m = Parser().parsestr("""\
1494Content-Type: multipart/mixed; boundary="===============0012394164=="
1495MIME-Version: 1.0
1496
1497--===============0012394164==
1498Content-Type: image/file1.jpg
1499MIME-Version: 1.0
1500Content-Transfer-Encoding: base64
1501
1502YXNkZg==
1503--===============0012394164==--""")
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001504 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001505
1506
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001507
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001508# Test some badly formatted messages
1509class TestNonConformant(TestEmailBase):
1510 def test_parse_missing_minor_type(self):
1511 eq = self.assertEqual
1512 msg = self._msgobj('msg_14.txt')
1513 eq(msg.get_content_type(), 'text/plain')
1514 eq(msg.get_content_maintype(), 'text')
1515 eq(msg.get_content_subtype(), 'plain')
1516
1517 def test_same_boundary_inner_outer(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001518 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001519 msg = self._msgobj('msg_15.txt')
1520 # XXX We can probably eventually do better
1521 inner = msg.get_payload(0)
1522 unless(hasattr(inner, 'defects'))
1523 self.assertEqual(len(inner.defects), 1)
1524 unless(isinstance(inner.defects[0],
1525 errors.StartBoundaryNotFoundDefect))
1526
1527 def test_multipart_no_boundary(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001528 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001529 msg = self._msgobj('msg_25.txt')
1530 unless(isinstance(msg.get_payload(), str))
1531 self.assertEqual(len(msg.defects), 2)
1532 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1533 unless(isinstance(msg.defects[1],
1534 errors.MultipartInvariantViolationDefect))
1535
1536 def test_invalid_content_type(self):
1537 eq = self.assertEqual
1538 neq = self.ndiffAssertEqual
1539 msg = Message()
1540 # RFC 2045, $5.2 says invalid yields text/plain
1541 msg['Content-Type'] = 'text'
1542 eq(msg.get_content_maintype(), 'text')
1543 eq(msg.get_content_subtype(), 'plain')
1544 eq(msg.get_content_type(), 'text/plain')
1545 # Clear the old value and try something /really/ invalid
1546 del msg['content-type']
1547 msg['Content-Type'] = 'foo'
1548 eq(msg.get_content_maintype(), 'text')
1549 eq(msg.get_content_subtype(), 'plain')
1550 eq(msg.get_content_type(), 'text/plain')
1551 # Still, make sure that the message is idempotently generated
1552 s = StringIO()
1553 g = Generator(s)
1554 g.flatten(msg)
1555 neq(s.getvalue(), 'Content-Type: foo\n\n')
1556
1557 def test_no_start_boundary(self):
1558 eq = self.ndiffAssertEqual
1559 msg = self._msgobj('msg_31.txt')
1560 eq(msg.get_payload(), """\
1561--BOUNDARY
1562Content-Type: text/plain
1563
1564message 1
1565
1566--BOUNDARY
1567Content-Type: text/plain
1568
1569message 2
1570
1571--BOUNDARY--
1572""")
1573
1574 def test_no_separating_blank_line(self):
1575 eq = self.ndiffAssertEqual
1576 msg = self._msgobj('msg_35.txt')
1577 eq(msg.as_string(), """\
1578From: aperson@dom.ain
1579To: bperson@dom.ain
1580Subject: here's something interesting
1581
1582counter to RFC 2822, there's no separating newline here
1583""")
1584
1585 def test_lying_multipart(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00001586 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001587 msg = self._msgobj('msg_41.txt')
1588 unless(hasattr(msg, 'defects'))
1589 self.assertEqual(len(msg.defects), 2)
1590 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1591 unless(isinstance(msg.defects[1],
1592 errors.MultipartInvariantViolationDefect))
1593
1594 def test_missing_start_boundary(self):
1595 outer = self._msgobj('msg_42.txt')
1596 # The message structure is:
1597 #
1598 # multipart/mixed
1599 # text/plain
1600 # message/rfc822
1601 # multipart/mixed [*]
1602 #
1603 # [*] This message is missing its start boundary
1604 bad = outer.get_payload(1).get_payload(0)
1605 self.assertEqual(len(bad.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001606 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001607 errors.StartBoundaryNotFoundDefect))
1608
1609 def test_first_line_is_continuation_header(self):
1610 eq = self.assertEqual
1611 m = ' Line 1\nLine 2\nLine 3'
1612 msg = email.message_from_string(m)
1613 eq(msg.keys(), [])
1614 eq(msg.get_payload(), 'Line 2\nLine 3')
1615 eq(len(msg.defects), 1)
Georg Brandlab91fde2009-08-13 08:51:18 +00001616 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001617 errors.FirstHeaderLineIsContinuationDefect))
1618 eq(msg.defects[0].line, ' Line 1\n')
1619
1620
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001621
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001622# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001623class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001624 def test_rfc2047_multiline(self):
1625 eq = self.assertEqual
1626 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1627 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1628 dh = decode_header(s)
1629 eq(dh, [
1630 (b'Re:', None),
1631 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1632 (b'baz foo bar', None),
1633 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1634 header = make_header(dh)
1635 eq(str(header),
1636 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001637 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001638Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1639 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001640
1641 def test_whitespace_eater_unicode(self):
1642 eq = self.assertEqual
1643 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1644 dh = decode_header(s)
1645 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1646 (b'Pirard <pirard@dom.ain>', None)])
1647 header = str(make_header(dh))
1648 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1649
1650 def test_whitespace_eater_unicode_2(self):
1651 eq = self.assertEqual
1652 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1653 dh = decode_header(s)
1654 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1655 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1656 hu = str(make_header(dh))
1657 eq(hu, 'The quick brown fox jumped over the lazy dog')
1658
1659 def test_rfc2047_missing_whitespace(self):
1660 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1661 dh = decode_header(s)
1662 self.assertEqual(dh, [(s, None)])
1663
1664 def test_rfc2047_with_whitespace(self):
1665 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1666 dh = decode_header(s)
1667 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1668 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1669 (b'sbord', None)])
1670
R. David Murraye06528c2010-08-03 23:35:44 +00001671 def test_rfc2047_B_bad_padding(self):
1672 s = '=?iso-8859-1?B?%s?='
1673 data = [ # only test complete bytes
1674 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1675 ('dmk=', b'vi'), ('dmk', b'vi')
1676 ]
1677 for q, a in data:
1678 dh = decode_header(s % q)
1679 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001680
R. David Murrayf9c957f2010-10-01 15:45:48 +00001681 def test_rfc2047_Q_invalid_digits(self):
1682 # issue 10004.
1683 s = '=?iso-8659-1?Q?andr=e9=zz?='
1684 self.assertEqual(decode_header(s),
1685 [(b'andr\xe9=zz', 'iso-8659-1')])
1686
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001687
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001688# Test the MIMEMessage class
1689class TestMIMEMessage(TestEmailBase):
1690 def setUp(self):
1691 with openfile('msg_11.txt') as fp:
1692 self._text = fp.read()
1693
1694 def test_type_error(self):
1695 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1696
1697 def test_valid_argument(self):
1698 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001699 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001700 subject = 'A sub-message'
1701 m = Message()
1702 m['Subject'] = subject
1703 r = MIMEMessage(m)
1704 eq(r.get_content_type(), 'message/rfc822')
1705 payload = r.get_payload()
1706 unless(isinstance(payload, list))
1707 eq(len(payload), 1)
1708 subpart = payload[0]
1709 unless(subpart is m)
1710 eq(subpart['subject'], subject)
1711
1712 def test_bad_multipart(self):
1713 eq = self.assertEqual
1714 msg1 = Message()
1715 msg1['Subject'] = 'subpart 1'
1716 msg2 = Message()
1717 msg2['Subject'] = 'subpart 2'
1718 r = MIMEMessage(msg1)
1719 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1720
1721 def test_generate(self):
1722 # First craft the message to be encapsulated
1723 m = Message()
1724 m['Subject'] = 'An enclosed message'
1725 m.set_payload('Here is the body of the message.\n')
1726 r = MIMEMessage(m)
1727 r['Subject'] = 'The enclosing message'
1728 s = StringIO()
1729 g = Generator(s)
1730 g.flatten(r)
1731 self.assertEqual(s.getvalue(), """\
1732Content-Type: message/rfc822
1733MIME-Version: 1.0
1734Subject: The enclosing message
1735
1736Subject: An enclosed message
1737
1738Here is the body of the message.
1739""")
1740
1741 def test_parse_message_rfc822(self):
1742 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001743 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001744 msg = self._msgobj('msg_11.txt')
1745 eq(msg.get_content_type(), 'message/rfc822')
1746 payload = msg.get_payload()
1747 unless(isinstance(payload, list))
1748 eq(len(payload), 1)
1749 submsg = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00001750 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001751 eq(submsg['subject'], 'An enclosed message')
1752 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1753
1754 def test_dsn(self):
1755 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00001756 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001757 # msg 16 is a Delivery Status Notification, see RFC 1894
1758 msg = self._msgobj('msg_16.txt')
1759 eq(msg.get_content_type(), 'multipart/report')
1760 unless(msg.is_multipart())
1761 eq(len(msg.get_payload()), 3)
1762 # Subpart 1 is a text/plain, human readable section
1763 subpart = msg.get_payload(0)
1764 eq(subpart.get_content_type(), 'text/plain')
1765 eq(subpart.get_payload(), """\
1766This report relates to a message you sent with the following header fields:
1767
1768 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1769 Date: Sun, 23 Sep 2001 20:10:55 -0700
1770 From: "Ian T. Henry" <henryi@oxy.edu>
1771 To: SoCal Raves <scr@socal-raves.org>
1772 Subject: [scr] yeah for Ians!!
1773
1774Your message cannot be delivered to the following recipients:
1775
1776 Recipient address: jangel1@cougar.noc.ucla.edu
1777 Reason: recipient reached disk quota
1778
1779""")
1780 # Subpart 2 contains the machine parsable DSN information. It
1781 # consists of two blocks of headers, represented by two nested Message
1782 # objects.
1783 subpart = msg.get_payload(1)
1784 eq(subpart.get_content_type(), 'message/delivery-status')
1785 eq(len(subpart.get_payload()), 2)
1786 # message/delivery-status should treat each block as a bunch of
1787 # headers, i.e. a bunch of Message objects.
1788 dsn1 = subpart.get_payload(0)
1789 unless(isinstance(dsn1, Message))
1790 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1791 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1792 # Try a missing one <wink>
1793 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1794 dsn2 = subpart.get_payload(1)
1795 unless(isinstance(dsn2, Message))
1796 eq(dsn2['action'], 'failed')
1797 eq(dsn2.get_params(header='original-recipient'),
1798 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1799 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1800 # Subpart 3 is the original message
1801 subpart = msg.get_payload(2)
1802 eq(subpart.get_content_type(), 'message/rfc822')
1803 payload = subpart.get_payload()
1804 unless(isinstance(payload, list))
1805 eq(len(payload), 1)
1806 subsubpart = payload[0]
1807 unless(isinstance(subsubpart, Message))
1808 eq(subsubpart.get_content_type(), 'text/plain')
1809 eq(subsubpart['message-id'],
1810 '<002001c144a6$8752e060$56104586@oxy.edu>')
1811
1812 def test_epilogue(self):
1813 eq = self.ndiffAssertEqual
1814 with openfile('msg_21.txt') as fp:
1815 text = fp.read()
1816 msg = Message()
1817 msg['From'] = 'aperson@dom.ain'
1818 msg['To'] = 'bperson@dom.ain'
1819 msg['Subject'] = 'Test'
1820 msg.preamble = 'MIME message'
1821 msg.epilogue = 'End of MIME message\n'
1822 msg1 = MIMEText('One')
1823 msg2 = MIMEText('Two')
1824 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1825 msg.attach(msg1)
1826 msg.attach(msg2)
1827 sfp = StringIO()
1828 g = Generator(sfp)
1829 g.flatten(msg)
1830 eq(sfp.getvalue(), text)
1831
1832 def test_no_nl_preamble(self):
1833 eq = self.ndiffAssertEqual
1834 msg = Message()
1835 msg['From'] = 'aperson@dom.ain'
1836 msg['To'] = 'bperson@dom.ain'
1837 msg['Subject'] = 'Test'
1838 msg.preamble = 'MIME message'
1839 msg.epilogue = ''
1840 msg1 = MIMEText('One')
1841 msg2 = MIMEText('Two')
1842 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1843 msg.attach(msg1)
1844 msg.attach(msg2)
1845 eq(msg.as_string(), """\
1846From: aperson@dom.ain
1847To: bperson@dom.ain
1848Subject: Test
1849Content-Type: multipart/mixed; boundary="BOUNDARY"
1850
1851MIME message
1852--BOUNDARY
1853Content-Type: text/plain; charset="us-ascii"
1854MIME-Version: 1.0
1855Content-Transfer-Encoding: 7bit
1856
1857One
1858--BOUNDARY
1859Content-Type: text/plain; charset="us-ascii"
1860MIME-Version: 1.0
1861Content-Transfer-Encoding: 7bit
1862
1863Two
1864--BOUNDARY--
1865""")
1866
1867 def test_default_type(self):
1868 eq = self.assertEqual
1869 with openfile('msg_30.txt') as fp:
1870 msg = email.message_from_file(fp)
1871 container1 = msg.get_payload(0)
1872 eq(container1.get_default_type(), 'message/rfc822')
1873 eq(container1.get_content_type(), 'message/rfc822')
1874 container2 = msg.get_payload(1)
1875 eq(container2.get_default_type(), 'message/rfc822')
1876 eq(container2.get_content_type(), 'message/rfc822')
1877 container1a = container1.get_payload(0)
1878 eq(container1a.get_default_type(), 'text/plain')
1879 eq(container1a.get_content_type(), 'text/plain')
1880 container2a = container2.get_payload(0)
1881 eq(container2a.get_default_type(), 'text/plain')
1882 eq(container2a.get_content_type(), 'text/plain')
1883
1884 def test_default_type_with_explicit_container_type(self):
1885 eq = self.assertEqual
1886 with openfile('msg_28.txt') as fp:
1887 msg = email.message_from_file(fp)
1888 container1 = msg.get_payload(0)
1889 eq(container1.get_default_type(), 'message/rfc822')
1890 eq(container1.get_content_type(), 'message/rfc822')
1891 container2 = msg.get_payload(1)
1892 eq(container2.get_default_type(), 'message/rfc822')
1893 eq(container2.get_content_type(), 'message/rfc822')
1894 container1a = container1.get_payload(0)
1895 eq(container1a.get_default_type(), 'text/plain')
1896 eq(container1a.get_content_type(), 'text/plain')
1897 container2a = container2.get_payload(0)
1898 eq(container2a.get_default_type(), 'text/plain')
1899 eq(container2a.get_content_type(), 'text/plain')
1900
1901 def test_default_type_non_parsed(self):
1902 eq = self.assertEqual
1903 neq = self.ndiffAssertEqual
1904 # Set up container
1905 container = MIMEMultipart('digest', 'BOUNDARY')
1906 container.epilogue = ''
1907 # Set up subparts
1908 subpart1a = MIMEText('message 1\n')
1909 subpart2a = MIMEText('message 2\n')
1910 subpart1 = MIMEMessage(subpart1a)
1911 subpart2 = MIMEMessage(subpart2a)
1912 container.attach(subpart1)
1913 container.attach(subpart2)
1914 eq(subpart1.get_content_type(), 'message/rfc822')
1915 eq(subpart1.get_default_type(), 'message/rfc822')
1916 eq(subpart2.get_content_type(), 'message/rfc822')
1917 eq(subpart2.get_default_type(), 'message/rfc822')
1918 neq(container.as_string(0), '''\
1919Content-Type: multipart/digest; boundary="BOUNDARY"
1920MIME-Version: 1.0
1921
1922--BOUNDARY
1923Content-Type: message/rfc822
1924MIME-Version: 1.0
1925
1926Content-Type: text/plain; charset="us-ascii"
1927MIME-Version: 1.0
1928Content-Transfer-Encoding: 7bit
1929
1930message 1
1931
1932--BOUNDARY
1933Content-Type: message/rfc822
1934MIME-Version: 1.0
1935
1936Content-Type: text/plain; charset="us-ascii"
1937MIME-Version: 1.0
1938Content-Transfer-Encoding: 7bit
1939
1940message 2
1941
1942--BOUNDARY--
1943''')
1944 del subpart1['content-type']
1945 del subpart1['mime-version']
1946 del subpart2['content-type']
1947 del subpart2['mime-version']
1948 eq(subpart1.get_content_type(), 'message/rfc822')
1949 eq(subpart1.get_default_type(), 'message/rfc822')
1950 eq(subpart2.get_content_type(), 'message/rfc822')
1951 eq(subpart2.get_default_type(), 'message/rfc822')
1952 neq(container.as_string(0), '''\
1953Content-Type: multipart/digest; boundary="BOUNDARY"
1954MIME-Version: 1.0
1955
1956--BOUNDARY
1957
1958Content-Type: text/plain; charset="us-ascii"
1959MIME-Version: 1.0
1960Content-Transfer-Encoding: 7bit
1961
1962message 1
1963
1964--BOUNDARY
1965
1966Content-Type: text/plain; charset="us-ascii"
1967MIME-Version: 1.0
1968Content-Transfer-Encoding: 7bit
1969
1970message 2
1971
1972--BOUNDARY--
1973''')
1974
1975 def test_mime_attachments_in_constructor(self):
1976 eq = self.assertEqual
1977 text1 = MIMEText('')
1978 text2 = MIMEText('')
1979 msg = MIMEMultipart(_subparts=(text1, text2))
1980 eq(len(msg.get_payload()), 2)
1981 eq(msg.get_payload(0), text1)
1982 eq(msg.get_payload(1), text2)
1983
Christian Heimes587c2bf2008-01-19 16:21:02 +00001984 def test_default_multipart_constructor(self):
1985 msg = MIMEMultipart()
1986 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001987
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00001988
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001989# A general test of parser->model->generator idempotency. IOW, read a message
1990# in, parse it into a message object tree, then without touching the tree,
1991# regenerate the plain text. The original text and the transformed text
1992# should be identical. Note: that we ignore the Unix-From since that may
1993# contain a changed date.
1994class TestIdempotent(TestEmailBase):
1995 def _msgobj(self, filename):
1996 with openfile(filename) as fp:
1997 data = fp.read()
1998 msg = email.message_from_string(data)
1999 return msg, data
2000
2001 def _idempotent(self, msg, text):
2002 eq = self.ndiffAssertEqual
2003 s = StringIO()
2004 g = Generator(s, maxheaderlen=0)
2005 g.flatten(msg)
2006 eq(text, s.getvalue())
2007
2008 def test_parse_text_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002009 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002010 msg, text = self._msgobj('msg_01.txt')
2011 eq(msg.get_content_type(), 'text/plain')
2012 eq(msg.get_content_maintype(), 'text')
2013 eq(msg.get_content_subtype(), 'plain')
2014 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2015 eq(msg.get_param('charset'), 'us-ascii')
2016 eq(msg.preamble, None)
2017 eq(msg.epilogue, None)
2018 self._idempotent(msg, text)
2019
2020 def test_parse_untyped_message(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002021 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 msg, text = self._msgobj('msg_03.txt')
2023 eq(msg.get_content_type(), 'text/plain')
2024 eq(msg.get_params(), None)
2025 eq(msg.get_param('charset'), None)
2026 self._idempotent(msg, text)
2027
2028 def test_simple_multipart(self):
2029 msg, text = self._msgobj('msg_04.txt')
2030 self._idempotent(msg, text)
2031
2032 def test_MIME_digest(self):
2033 msg, text = self._msgobj('msg_02.txt')
2034 self._idempotent(msg, text)
2035
2036 def test_long_header(self):
2037 msg, text = self._msgobj('msg_27.txt')
2038 self._idempotent(msg, text)
2039
2040 def test_MIME_digest_with_part_headers(self):
2041 msg, text = self._msgobj('msg_28.txt')
2042 self._idempotent(msg, text)
2043
2044 def test_mixed_with_image(self):
2045 msg, text = self._msgobj('msg_06.txt')
2046 self._idempotent(msg, text)
2047
2048 def test_multipart_report(self):
2049 msg, text = self._msgobj('msg_05.txt')
2050 self._idempotent(msg, text)
2051
2052 def test_dsn(self):
2053 msg, text = self._msgobj('msg_16.txt')
2054 self._idempotent(msg, text)
2055
2056 def test_preamble_epilogue(self):
2057 msg, text = self._msgobj('msg_21.txt')
2058 self._idempotent(msg, text)
2059
2060 def test_multipart_one_part(self):
2061 msg, text = self._msgobj('msg_23.txt')
2062 self._idempotent(msg, text)
2063
2064 def test_multipart_no_parts(self):
2065 msg, text = self._msgobj('msg_24.txt')
2066 self._idempotent(msg, text)
2067
2068 def test_no_start_boundary(self):
2069 msg, text = self._msgobj('msg_31.txt')
2070 self._idempotent(msg, text)
2071
2072 def test_rfc2231_charset(self):
2073 msg, text = self._msgobj('msg_32.txt')
2074 self._idempotent(msg, text)
2075
2076 def test_more_rfc2231_parameters(self):
2077 msg, text = self._msgobj('msg_33.txt')
2078 self._idempotent(msg, text)
2079
2080 def test_text_plain_in_a_multipart_digest(self):
2081 msg, text = self._msgobj('msg_34.txt')
2082 self._idempotent(msg, text)
2083
2084 def test_nested_multipart_mixeds(self):
2085 msg, text = self._msgobj('msg_12a.txt')
2086 self._idempotent(msg, text)
2087
2088 def test_message_external_body_idempotent(self):
2089 msg, text = self._msgobj('msg_36.txt')
2090 self._idempotent(msg, text)
2091
2092 def test_content_type(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002093 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002094 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002095 # Get a message object and reset the seek pointer for other tests
2096 msg, text = self._msgobj('msg_05.txt')
2097 eq(msg.get_content_type(), 'multipart/report')
2098 # Test the Content-Type: parameters
2099 params = {}
2100 for pk, pv in msg.get_params():
2101 params[pk] = pv
2102 eq(params['report-type'], 'delivery-status')
2103 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2104 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2105 eq(msg.epilogue, '\n')
2106 eq(len(msg.get_payload()), 3)
2107 # Make sure the subparts are what we expect
2108 msg1 = msg.get_payload(0)
2109 eq(msg1.get_content_type(), 'text/plain')
2110 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2111 msg2 = msg.get_payload(1)
2112 eq(msg2.get_content_type(), 'text/plain')
2113 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2114 msg3 = msg.get_payload(2)
2115 eq(msg3.get_content_type(), 'message/rfc822')
Georg Brandlab91fde2009-08-13 08:51:18 +00002116 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002117 payload = msg3.get_payload()
2118 unless(isinstance(payload, list))
2119 eq(len(payload), 1)
2120 msg4 = payload[0]
2121 unless(isinstance(msg4, Message))
2122 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2123
2124 def test_parser(self):
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002125 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002126 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002127 msg, text = self._msgobj('msg_06.txt')
2128 # Check some of the outer headers
2129 eq(msg.get_content_type(), 'message/rfc822')
2130 # Make sure the payload is a list of exactly one sub-Message, and that
2131 # that submessage has a type of text/plain
2132 payload = msg.get_payload()
2133 unless(isinstance(payload, list))
2134 eq(len(payload), 1)
2135 msg1 = payload[0]
Georg Brandlab91fde2009-08-13 08:51:18 +00002136 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002137 eq(msg1.get_content_type(), 'text/plain')
Georg Brandlab91fde2009-08-13 08:51:18 +00002138 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 eq(msg1.get_payload(), '\n')
2140
2141
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002142
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002143# Test various other bits of the package's functionality
2144class TestMiscellaneous(TestEmailBase):
2145 def test_message_from_string(self):
2146 with openfile('msg_01.txt') as fp:
2147 text = fp.read()
2148 msg = email.message_from_string(text)
2149 s = StringIO()
2150 # Don't wrap/continue long headers since we're trying to test
2151 # idempotency.
2152 g = Generator(s, maxheaderlen=0)
2153 g.flatten(msg)
2154 self.assertEqual(text, s.getvalue())
2155
2156 def test_message_from_file(self):
2157 with openfile('msg_01.txt') as fp:
2158 text = fp.read()
2159 fp.seek(0)
2160 msg = email.message_from_file(fp)
2161 s = StringIO()
2162 # Don't wrap/continue long headers since we're trying to test
2163 # idempotency.
2164 g = Generator(s, maxheaderlen=0)
2165 g.flatten(msg)
2166 self.assertEqual(text, s.getvalue())
2167
2168 def test_message_from_string_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002169 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002170 with openfile('msg_01.txt') as fp:
2171 text = fp.read()
2172
2173 # Create a subclass
2174 class MyMessage(Message):
2175 pass
2176
2177 msg = email.message_from_string(text, MyMessage)
2178 unless(isinstance(msg, MyMessage))
2179 # Try something more complicated
2180 with openfile('msg_02.txt') as fp:
2181 text = fp.read()
2182 msg = email.message_from_string(text, MyMessage)
2183 for subpart in msg.walk():
2184 unless(isinstance(subpart, MyMessage))
2185
2186 def test_message_from_file_with_class(self):
Georg Brandlab91fde2009-08-13 08:51:18 +00002187 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002188 # Create a subclass
2189 class MyMessage(Message):
2190 pass
2191
2192 with openfile('msg_01.txt') as fp:
2193 msg = email.message_from_file(fp, MyMessage)
2194 unless(isinstance(msg, MyMessage))
2195 # Try something more complicated
2196 with openfile('msg_02.txt') as fp:
2197 msg = email.message_from_file(fp, MyMessage)
2198 for subpart in msg.walk():
2199 unless(isinstance(subpart, MyMessage))
2200
2201 def test__all__(self):
2202 module = __import__('email')
2203 # Can't use sorted() here due to Python 2.3 compatibility
2204 all = module.__all__[:]
2205 all.sort()
2206 self.assertEqual(all, [
2207 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2208 'header', 'iterators', 'message', 'message_from_file',
2209 'message_from_string', 'mime', 'parser',
2210 'quoprimime', 'utils',
2211 ])
2212
2213 def test_formatdate(self):
2214 now = time.time()
2215 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2216 time.gmtime(now)[:6])
2217
2218 def test_formatdate_localtime(self):
2219 now = time.time()
2220 self.assertEqual(
2221 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2222 time.localtime(now)[:6])
2223
2224 def test_formatdate_usegmt(self):
2225 now = time.time()
2226 self.assertEqual(
2227 utils.formatdate(now, localtime=False),
2228 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2229 self.assertEqual(
2230 utils.formatdate(now, localtime=False, usegmt=True),
2231 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2232
2233 def test_parsedate_none(self):
2234 self.assertEqual(utils.parsedate(''), None)
2235
2236 def test_parsedate_compact(self):
2237 # The FWS after the comma is optional
2238 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2239 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2240
2241 def test_parsedate_no_dayofweek(self):
2242 eq = self.assertEqual
2243 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2244 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2245
2246 def test_parsedate_compact_no_dayofweek(self):
2247 eq = self.assertEqual
2248 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2249 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2250
2251 def test_parsedate_acceptable_to_time_functions(self):
2252 eq = self.assertEqual
2253 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2254 t = int(time.mktime(timetup))
2255 eq(time.localtime(t)[:6], timetup[:6])
2256 eq(int(time.strftime('%Y', timetup)), 2003)
2257 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2258 t = int(time.mktime(timetup[:9]))
2259 eq(time.localtime(t)[:6], timetup[:6])
2260 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2261
R. David Murray1061f182010-08-25 01:55:24 +00002262 def test_parsedate_y2k(self):
2263 """Test for parsing a date with a two-digit year.
2264
2265 Parsing a date with a two-digit year should return the correct
2266 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2267 obsoletes RFC822) requires four-digit years.
2268
2269 """
2270 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2271 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2272 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2273 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2274
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002275 def test_parseaddr_empty(self):
2276 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2277 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2278
2279 def test_noquote_dump(self):
2280 self.assertEqual(
2281 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2282 'A Silly Person <person@dom.ain>')
2283
2284 def test_escape_dump(self):
2285 self.assertEqual(
2286 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2287 r'"A \(Very\) Silly Person" <person@dom.ain>')
2288 a = r'A \(Special\) Person'
2289 b = 'person@dom.ain'
2290 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2291
2292 def test_escape_backslashes(self):
2293 self.assertEqual(
2294 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2295 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2296 a = r'Arthur \Backslash\ Foobar'
2297 b = 'person@dom.ain'
2298 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2299
2300 def test_name_with_dot(self):
2301 x = 'John X. Doe <jxd@example.com>'
2302 y = '"John X. Doe" <jxd@example.com>'
2303 a, b = ('John X. Doe', 'jxd@example.com')
2304 self.assertEqual(utils.parseaddr(x), (a, b))
2305 self.assertEqual(utils.parseaddr(y), (a, b))
2306 # formataddr() quotes the name if there's a dot in it
2307 self.assertEqual(utils.formataddr((a, b)), y)
2308
R. David Murray7f8199a2010-10-02 16:04:44 +00002309 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2310 # issue 10005. Note that in the third test the second pair of
2311 # backslashes is not actually a quoted pair because it is not inside a
2312 # comment or quoted string: the address being parsed has a quoted
2313 # string containing a quoted backslash, followed by 'example' and two
2314 # backslashes, followed by another quoted string containing a space and
2315 # the word 'example'. parseaddr copies those two backslashes
2316 # literally. Per rfc5322 this is not technically correct since a \ may
2317 # not appear in an address outside of a quoted string. It is probably
2318 # a sensible Postel interpretation, though.
2319 eq = self.assertEqual
2320 eq(utils.parseaddr('""example" example"@example.com'),
2321 ('', '""example" example"@example.com'))
2322 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2323 ('', '"\\"example\\" example"@example.com'))
2324 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2325 ('', '"\\\\"example\\\\" example"@example.com'))
2326
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002327 def test_multiline_from_comment(self):
2328 x = """\
2329Foo
2330\tBar <foo@example.com>"""
2331 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2332
2333 def test_quote_dump(self):
2334 self.assertEqual(
2335 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2336 r'"A Silly; Person" <person@dom.ain>')
2337
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002338 def test_charset_richcomparisons(self):
2339 eq = self.assertEqual
Georg Brandlab91fde2009-08-13 08:51:18 +00002340 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002341 cset1 = Charset()
2342 cset2 = Charset()
2343 eq(cset1, 'us-ascii')
2344 eq(cset1, 'US-ASCII')
2345 eq(cset1, 'Us-AsCiI')
2346 eq('us-ascii', cset1)
2347 eq('US-ASCII', cset1)
2348 eq('Us-AsCiI', cset1)
2349 ne(cset1, 'usascii')
2350 ne(cset1, 'USASCII')
2351 ne(cset1, 'UsAsCiI')
2352 ne('usascii', cset1)
2353 ne('USASCII', cset1)
2354 ne('UsAsCiI', cset1)
2355 eq(cset1, cset2)
2356 eq(cset2, cset1)
2357
2358 def test_getaddresses(self):
2359 eq = self.assertEqual
2360 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2361 'Bud Person <bperson@dom.ain>']),
2362 [('Al Person', 'aperson@dom.ain'),
2363 ('Bud Person', 'bperson@dom.ain')])
2364
2365 def test_getaddresses_nasty(self):
2366 eq = self.assertEqual
2367 eq(utils.getaddresses(['foo: ;']), [('', '')])
2368 eq(utils.getaddresses(
2369 ['[]*-- =~$']),
2370 [('', ''), ('', ''), ('', '*--')])
2371 eq(utils.getaddresses(
2372 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2373 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2374
2375 def test_getaddresses_embedded_comment(self):
2376 """Test proper handling of a nested comment"""
2377 eq = self.assertEqual
2378 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2379 eq(addrs[0][1], 'foo@bar.com')
2380
2381 def test_utils_quote_unquote(self):
2382 eq = self.assertEqual
2383 msg = Message()
2384 msg.add_header('content-disposition', 'attachment',
2385 filename='foo\\wacky"name')
2386 eq(msg.get_filename(), 'foo\\wacky"name')
2387
2388 def test_get_body_encoding_with_bogus_charset(self):
2389 charset = Charset('not a charset')
2390 self.assertEqual(charset.get_body_encoding(), 'base64')
2391
2392 def test_get_body_encoding_with_uppercase_charset(self):
2393 eq = self.assertEqual
2394 msg = Message()
2395 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2396 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2397 charsets = msg.get_charsets()
2398 eq(len(charsets), 1)
2399 eq(charsets[0], 'utf-8')
2400 charset = Charset(charsets[0])
2401 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002402 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002403 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2404 eq(msg.get_payload(decode=True), b'hello world')
2405 eq(msg['content-transfer-encoding'], 'base64')
2406 # Try another one
2407 msg = Message()
2408 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2409 charsets = msg.get_charsets()
2410 eq(len(charsets), 1)
2411 eq(charsets[0], 'us-ascii')
2412 charset = Charset(charsets[0])
2413 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2414 msg.set_payload('hello world', charset=charset)
2415 eq(msg.get_payload(), 'hello world')
2416 eq(msg['content-transfer-encoding'], '7bit')
2417
2418 def test_charsets_case_insensitive(self):
2419 lc = Charset('us-ascii')
2420 uc = Charset('US-ASCII')
2421 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2422
2423 def test_partial_falls_inside_message_delivery_status(self):
2424 eq = self.ndiffAssertEqual
2425 # The Parser interface provides chunks of data to FeedParser in 8192
2426 # byte gulps. SF bug #1076485 found one of those chunks inside
2427 # message/delivery-status header block, which triggered an
2428 # unreadline() of NeedMoreData.
2429 msg = self._msgobj('msg_43.txt')
2430 sfp = StringIO()
2431 iterators._structure(msg, sfp)
2432 eq(sfp.getvalue(), """\
2433multipart/report
2434 text/plain
2435 message/delivery-status
2436 text/plain
2437 text/plain
2438 text/plain
2439 text/plain
2440 text/plain
2441 text/plain
2442 text/plain
2443 text/plain
2444 text/plain
2445 text/plain
2446 text/plain
2447 text/plain
2448 text/plain
2449 text/plain
2450 text/plain
2451 text/plain
2452 text/plain
2453 text/plain
2454 text/plain
2455 text/plain
2456 text/plain
2457 text/plain
2458 text/plain
2459 text/plain
2460 text/plain
2461 text/plain
2462 text/rfc822-headers
2463""")
2464
2465
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002466
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002467# Test the iterator/generators
2468class TestIterators(TestEmailBase):
2469 def test_body_line_iterator(self):
2470 eq = self.assertEqual
2471 neq = self.ndiffAssertEqual
2472 # First a simple non-multipart message
2473 msg = self._msgobj('msg_01.txt')
2474 it = iterators.body_line_iterator(msg)
2475 lines = list(it)
2476 eq(len(lines), 6)
2477 neq(EMPTYSTRING.join(lines), msg.get_payload())
2478 # Now a more complicated multipart
2479 msg = self._msgobj('msg_02.txt')
2480 it = iterators.body_line_iterator(msg)
2481 lines = list(it)
2482 eq(len(lines), 43)
2483 with openfile('msg_19.txt') as fp:
2484 neq(EMPTYSTRING.join(lines), fp.read())
2485
2486 def test_typed_subpart_iterator(self):
2487 eq = self.assertEqual
2488 msg = self._msgobj('msg_04.txt')
2489 it = iterators.typed_subpart_iterator(msg, 'text')
2490 lines = []
2491 subparts = 0
2492 for subpart in it:
2493 subparts += 1
2494 lines.append(subpart.get_payload())
2495 eq(subparts, 2)
2496 eq(EMPTYSTRING.join(lines), """\
2497a simple kind of mirror
2498to reflect upon our own
2499a simple kind of mirror
2500to reflect upon our own
2501""")
2502
2503 def test_typed_subpart_iterator_default_type(self):
2504 eq = self.assertEqual
2505 msg = self._msgobj('msg_03.txt')
2506 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2507 lines = []
2508 subparts = 0
2509 for subpart in it:
2510 subparts += 1
2511 lines.append(subpart.get_payload())
2512 eq(subparts, 1)
2513 eq(EMPTYSTRING.join(lines), """\
2514
2515Hi,
2516
2517Do you like this message?
2518
2519-Me
2520""")
2521
R. David Murray6d4a06c2010-07-17 01:28:04 +00002522 def test_pushCR_LF(self):
2523 '''FeedParser BufferedSubFile.push() assumed it received complete
2524 line endings. A CR ending one push() followed by a LF starting
2525 the next push() added an empty line.
2526 '''
2527 imt = [
2528 ("a\r \n", 2),
2529 ("b", 0),
2530 ("c\n", 1),
2531 ("", 0),
2532 ("d\r\n", 1),
2533 ("e\r", 0),
2534 ("\nf", 1),
2535 ("\r\n", 1),
2536 ]
2537 from email.feedparser import BufferedSubFile, NeedMoreData
2538 bsf = BufferedSubFile()
2539 om = []
2540 nt = 0
2541 for il, n in imt:
2542 bsf.push(il)
2543 nt += n
2544 n1 = 0
2545 while True:
2546 ol = bsf.readline()
2547 if ol == NeedMoreData:
2548 break
2549 om.append(ol)
2550 n1 += 1
2551 self.assertTrue(n == n1)
2552 self.assertTrue(len(om) == nt)
2553 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2554
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002556
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002557class TestParsers(TestEmailBase):
2558 def test_header_parser(self):
2559 eq = self.assertEqual
2560 # Parse only the headers of a complex multipart MIME document
2561 with openfile('msg_02.txt') as fp:
2562 msg = HeaderParser().parse(fp)
2563 eq(msg['from'], 'ppp-request@zzz.org')
2564 eq(msg['to'], 'ppp@zzz.org')
2565 eq(msg.get_content_type(), 'multipart/mixed')
Georg Brandlab91fde2009-08-13 08:51:18 +00002566 self.assertFalse(msg.is_multipart())
2567 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568
2569 def test_whitespace_continuation(self):
2570 eq = self.assertEqual
2571 # This message contains a line after the Subject: header that has only
2572 # whitespace, but it is not empty!
2573 msg = email.message_from_string("""\
2574From: aperson@dom.ain
2575To: bperson@dom.ain
2576Subject: the next line has a space on it
2577\x20
2578Date: Mon, 8 Apr 2002 15:09:19 -0400
2579Message-ID: spam
2580
2581Here's the message body
2582""")
2583 eq(msg['subject'], 'the next line has a space on it\n ')
2584 eq(msg['message-id'], 'spam')
2585 eq(msg.get_payload(), "Here's the message body\n")
2586
2587 def test_whitespace_continuation_last_header(self):
2588 eq = self.assertEqual
2589 # Like the previous test, but the subject line is the last
2590 # header.
2591 msg = email.message_from_string("""\
2592From: aperson@dom.ain
2593To: bperson@dom.ain
2594Date: Mon, 8 Apr 2002 15:09:19 -0400
2595Message-ID: spam
2596Subject: the next line has a space on it
2597\x20
2598
2599Here's the message body
2600""")
2601 eq(msg['subject'], 'the next line has a space on it\n ')
2602 eq(msg['message-id'], 'spam')
2603 eq(msg.get_payload(), "Here's the message body\n")
2604
2605 def test_crlf_separation(self):
2606 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002607 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002608 msg = Parser().parse(fp)
2609 eq(len(msg.get_payload()), 2)
2610 part1 = msg.get_payload(0)
2611 eq(part1.get_content_type(), 'text/plain')
2612 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2613 part2 = msg.get_payload(1)
2614 eq(part2.get_content_type(), 'application/riscos')
2615
2616 def test_multipart_digest_with_extra_mime_headers(self):
2617 eq = self.assertEqual
2618 neq = self.ndiffAssertEqual
2619 with openfile('msg_28.txt') as fp:
2620 msg = email.message_from_file(fp)
2621 # Structure is:
2622 # multipart/digest
2623 # message/rfc822
2624 # text/plain
2625 # message/rfc822
2626 # text/plain
2627 eq(msg.is_multipart(), 1)
2628 eq(len(msg.get_payload()), 2)
2629 part1 = msg.get_payload(0)
2630 eq(part1.get_content_type(), 'message/rfc822')
2631 eq(part1.is_multipart(), 1)
2632 eq(len(part1.get_payload()), 1)
2633 part1a = part1.get_payload(0)
2634 eq(part1a.is_multipart(), 0)
2635 eq(part1a.get_content_type(), 'text/plain')
2636 neq(part1a.get_payload(), 'message 1\n')
2637 # next message/rfc822
2638 part2 = msg.get_payload(1)
2639 eq(part2.get_content_type(), 'message/rfc822')
2640 eq(part2.is_multipart(), 1)
2641 eq(len(part2.get_payload()), 1)
2642 part2a = part2.get_payload(0)
2643 eq(part2a.is_multipart(), 0)
2644 eq(part2a.get_content_type(), 'text/plain')
2645 neq(part2a.get_payload(), 'message 2\n')
2646
2647 def test_three_lines(self):
2648 # A bug report by Andrew McNamara
2649 lines = ['From: Andrew Person <aperson@dom.ain',
2650 'Subject: Test',
2651 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2652 msg = email.message_from_string(NL.join(lines))
2653 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2654
2655 def test_strip_line_feed_and_carriage_return_in_headers(self):
2656 eq = self.assertEqual
2657 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2658 value1 = 'text'
2659 value2 = 'more text'
2660 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2661 value1, value2)
2662 msg = email.message_from_string(m)
2663 eq(msg.get('Header'), value1)
2664 eq(msg.get('Next-Header'), value2)
2665
2666 def test_rfc2822_header_syntax(self):
2667 eq = self.assertEqual
2668 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2669 msg = email.message_from_string(m)
2670 eq(len(msg), 3)
2671 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2672 eq(msg.get_payload(), 'body')
2673
2674 def test_rfc2822_space_not_allowed_in_header(self):
2675 eq = self.assertEqual
2676 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2677 msg = email.message_from_string(m)
2678 eq(len(msg.keys()), 0)
2679
2680 def test_rfc2822_one_character_header(self):
2681 eq = self.assertEqual
2682 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2683 msg = email.message_from_string(m)
2684 headers = msg.keys()
2685 headers.sort()
2686 eq(headers, ['A', 'B', 'CC'])
2687 eq(msg.get_payload(), 'body')
2688
R. David Murray71df9d92010-06-16 02:22:56 +00002689 def test_CRLFLF_at_end_of_part(self):
2690 # issue 5610: feedparser should not eat two chars from body part ending
2691 # with "\r\n\n".
2692 m = (
2693 "From: foo@bar.com\n"
2694 "To: baz\n"
2695 "Mime-Version: 1.0\n"
2696 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2697 "\n"
2698 "--BOUNDARY\n"
2699 "Content-Type: text/plain\n"
2700 "\n"
2701 "body ending with CRLF newline\r\n"
2702 "\n"
2703 "--BOUNDARY--\n"
2704 )
2705 msg = email.message_from_string(m)
2706 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002707
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002708
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002709class TestBase64(unittest.TestCase):
2710 def test_len(self):
2711 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002712 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002713 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002714 for size in range(15):
2715 if size == 0 : bsize = 0
2716 elif size <= 3 : bsize = 4
2717 elif size <= 6 : bsize = 8
2718 elif size <= 9 : bsize = 12
2719 elif size <= 12: bsize = 16
2720 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002721 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002722
2723 def test_decode(self):
2724 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002725 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002726 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002727
2728 def test_encode(self):
2729 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002730 eq(base64mime.body_encode(b''), b'')
2731 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002732 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002733 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002734 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002735 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2737eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2738eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2739eHh4eCB4eHh4IA==
2740""")
2741 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002742 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002743 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2745eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2746eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2747eHh4eCB4eHh4IA==\r
2748""")
2749
2750 def test_header_encode(self):
2751 eq = self.assertEqual
2752 he = base64mime.header_encode
2753 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002754 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2755 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002756 # Test the charset option
2757 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2758 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002759
2760
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002761
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002762class TestQuopri(unittest.TestCase):
2763 def setUp(self):
2764 # Set of characters (as byte integers) that don't need to be encoded
2765 # in headers.
2766 self.hlit = list(chain(
2767 range(ord('a'), ord('z') + 1),
2768 range(ord('A'), ord('Z') + 1),
2769 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002770 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002771 # Set of characters (as byte integers) that do need to be encoded in
2772 # headers.
2773 self.hnon = [c for c in range(256) if c not in self.hlit]
2774 assert len(self.hlit) + len(self.hnon) == 256
2775 # Set of characters (as byte integers) that don't need to be encoded
2776 # in bodies.
2777 self.blit = list(range(ord(' '), ord('~') + 1))
2778 self.blit.append(ord('\t'))
2779 self.blit.remove(ord('='))
2780 # Set of characters (as byte integers) that do need to be encoded in
2781 # bodies.
2782 self.bnon = [c for c in range(256) if c not in self.blit]
2783 assert len(self.blit) + len(self.bnon) == 256
2784
Guido van Rossum9604e662007-08-30 03:46:43 +00002785 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002786 for c in self.hlit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002787 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002788 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002789 for c in self.hnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002790 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002791 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002792
Guido van Rossum9604e662007-08-30 03:46:43 +00002793 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002794 for c in self.blit:
Georg Brandlab91fde2009-08-13 08:51:18 +00002795 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002796 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002797 for c in self.bnon:
Georg Brandlab91fde2009-08-13 08:51:18 +00002798 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002799 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002800
2801 def test_header_quopri_len(self):
2802 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002803 eq(quoprimime.header_length(b'hello'), 5)
2804 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002805 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002806 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002807 # =?xxx?q?...?= means 10 extra characters
2808 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002809 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2810 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002811 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002812 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002813 # =?xxx?q?...?= means 10 extra characters
2814 10)
2815 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002816 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817 'expected length 1 for %r' % chr(c))
2818 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002819 # Space is special; it's encoded to _
2820 if c == ord(' '):
2821 continue
2822 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002823 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002824 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002825
2826 def test_body_quopri_len(self):
2827 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002828 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002829 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002830 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002831 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002832
2833 def test_quote_unquote_idempotent(self):
2834 for x in range(256):
2835 c = chr(x)
2836 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2837
2838 def test_header_encode(self):
2839 eq = self.assertEqual
2840 he = quoprimime.header_encode
2841 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2842 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2843 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2844 # Test a non-ASCII character
2845 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2846
2847 def test_decode(self):
2848 eq = self.assertEqual
2849 eq(quoprimime.decode(''), '')
2850 eq(quoprimime.decode('hello'), 'hello')
2851 eq(quoprimime.decode('hello', 'X'), 'hello')
2852 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2853
2854 def test_encode(self):
2855 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002856 eq(quoprimime.body_encode(''), '')
2857 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002858 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002859 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002860 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002861 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002862xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2863 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2864x xxxx xxxx xxxx xxxx=20""")
2865 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002866 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2867 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002868xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2869 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2870x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002871 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002872one line
2873
2874two line"""), """\
2875one line
2876
2877two line""")
2878
2879
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002880
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002881# Test the Charset class
2882class TestCharset(unittest.TestCase):
2883 def tearDown(self):
2884 from email import charset as CharsetModule
2885 try:
2886 del CharsetModule.CHARSETS['fake']
2887 except KeyError:
2888 pass
2889
Guido van Rossum9604e662007-08-30 03:46:43 +00002890 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002891 eq = self.assertEqual
2892 # Make sure us-ascii = no Unicode conversion
2893 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002894 eq(c.header_encode('Hello World!'), 'Hello World!')
2895 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002896 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002897 self.assertRaises(UnicodeError, c.header_encode, s)
2898 c = Charset('utf-8')
2899 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002900
2901 def test_body_encode(self):
2902 eq = self.assertEqual
2903 # Try a charset with QP body encoding
2904 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002905 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002906 # Try a charset with Base64 body encoding
2907 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002908 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002909 # Try a charset with None body encoding
2910 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002911 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002912 # Try the convert argument, where input codec != output codec
2913 c = Charset('euc-jp')
2914 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002915 # XXX FIXME
2916## try:
2917## eq('\x1b$B5FCO;~IW\x1b(B',
2918## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2919## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2920## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2921## except LookupError:
2922## # We probably don't have the Japanese codecs installed
2923## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002924 # Testing SF bug #625509, which we have to fake, since there are no
2925 # built-in encodings where the header encoding is QP but the body
2926 # encoding is not.
2927 from email import charset as CharsetModule
2928 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2929 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002930 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002931
2932 def test_unicode_charset_name(self):
2933 charset = Charset('us-ascii')
2934 self.assertEqual(str(charset), 'us-ascii')
2935 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2936
2937
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00002938
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002939# Test multilingual MIME headers.
2940class TestHeader(TestEmailBase):
2941 def test_simple(self):
2942 eq = self.ndiffAssertEqual
2943 h = Header('Hello World!')
2944 eq(h.encode(), 'Hello World!')
2945 h.append(' Goodbye World!')
2946 eq(h.encode(), 'Hello World! Goodbye World!')
2947
2948 def test_simple_surprise(self):
2949 eq = self.ndiffAssertEqual
2950 h = Header('Hello World!')
2951 eq(h.encode(), 'Hello World!')
2952 h.append('Goodbye World!')
2953 eq(h.encode(), 'Hello World! Goodbye World!')
2954
2955 def test_header_needs_no_decoding(self):
2956 h = 'no decoding needed'
2957 self.assertEqual(decode_header(h), [(h, None)])
2958
2959 def test_long(self):
2960 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2961 maxlinelen=76)
2962 for l in h.encode(splitchars=' ').split('\n '):
Georg Brandlab91fde2009-08-13 08:51:18 +00002963 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002964
2965 def test_multilingual(self):
2966 eq = self.ndiffAssertEqual
2967 g = Charset("iso-8859-1")
2968 cz = Charset("iso-8859-2")
2969 utf8 = Charset("utf-8")
2970 g_head = (b'Die Mieter treten hier ein werden mit einem '
2971 b'Foerderband komfortabel den Korridor entlang, '
2972 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2973 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2974 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2975 b'd\xf9vtipu.. ')
2976 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
2977 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
2978 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
2979 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
2980 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
2981 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
2982 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
2983 '\u3044\u307e\u3059\u3002')
2984 h = Header(g_head, g)
2985 h.append(cz_head, cz)
2986 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00002987 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002988 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002989=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
2990 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
2991 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
2992 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002993 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
2994 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
2995 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
2996 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00002997 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
2998 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
2999 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3000 decoded = decode_header(enc)
3001 eq(len(decoded), 3)
3002 eq(decoded[0], (g_head, 'iso-8859-1'))
3003 eq(decoded[1], (cz_head, 'iso-8859-2'))
3004 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003005 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003006 eq(ustr,
3007 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3008 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3009 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3010 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3011 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3012 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3013 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3014 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3015 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3016 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3017 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3018 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3019 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3020 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3021 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3022 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3023 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003024 # Test make_header()
3025 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003026 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003027
3028 def test_empty_header_encode(self):
3029 h = Header()
3030 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003031
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003032 def test_header_ctor_default_args(self):
3033 eq = self.ndiffAssertEqual
3034 h = Header()
3035 eq(h, '')
3036 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003037 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003038
3039 def test_explicit_maxlinelen(self):
3040 eq = self.ndiffAssertEqual
3041 hstr = ('A very long line that must get split to something other '
3042 'than at the 76th character boundary to test the non-default '
3043 'behavior')
3044 h = Header(hstr)
3045 eq(h.encode(), '''\
3046A very long line that must get split to something other than at the 76th
3047 character boundary to test the non-default behavior''')
3048 eq(str(h), hstr)
3049 h = Header(hstr, header_name='Subject')
3050 eq(h.encode(), '''\
3051A very long line that must get split to something other than at the
3052 76th character boundary to test the non-default behavior''')
3053 eq(str(h), hstr)
3054 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3055 eq(h.encode(), hstr)
3056 eq(str(h), hstr)
3057
Guido van Rossum9604e662007-08-30 03:46:43 +00003058 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003059 eq = self.ndiffAssertEqual
3060 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003061 x = 'xxxx ' * 20
3062 h.append(x)
3063 s = h.encode()
3064 eq(s, """\
3065=?iso-8859-1?q?xxx?=
3066 =?iso-8859-1?q?x_?=
3067 =?iso-8859-1?q?xx?=
3068 =?iso-8859-1?q?xx?=
3069 =?iso-8859-1?q?_x?=
3070 =?iso-8859-1?q?xx?=
3071 =?iso-8859-1?q?x_?=
3072 =?iso-8859-1?q?xx?=
3073 =?iso-8859-1?q?xx?=
3074 =?iso-8859-1?q?_x?=
3075 =?iso-8859-1?q?xx?=
3076 =?iso-8859-1?q?x_?=
3077 =?iso-8859-1?q?xx?=
3078 =?iso-8859-1?q?xx?=
3079 =?iso-8859-1?q?_x?=
3080 =?iso-8859-1?q?xx?=
3081 =?iso-8859-1?q?x_?=
3082 =?iso-8859-1?q?xx?=
3083 =?iso-8859-1?q?xx?=
3084 =?iso-8859-1?q?_x?=
3085 =?iso-8859-1?q?xx?=
3086 =?iso-8859-1?q?x_?=
3087 =?iso-8859-1?q?xx?=
3088 =?iso-8859-1?q?xx?=
3089 =?iso-8859-1?q?_x?=
3090 =?iso-8859-1?q?xx?=
3091 =?iso-8859-1?q?x_?=
3092 =?iso-8859-1?q?xx?=
3093 =?iso-8859-1?q?xx?=
3094 =?iso-8859-1?q?_x?=
3095 =?iso-8859-1?q?xx?=
3096 =?iso-8859-1?q?x_?=
3097 =?iso-8859-1?q?xx?=
3098 =?iso-8859-1?q?xx?=
3099 =?iso-8859-1?q?_x?=
3100 =?iso-8859-1?q?xx?=
3101 =?iso-8859-1?q?x_?=
3102 =?iso-8859-1?q?xx?=
3103 =?iso-8859-1?q?xx?=
3104 =?iso-8859-1?q?_x?=
3105 =?iso-8859-1?q?xx?=
3106 =?iso-8859-1?q?x_?=
3107 =?iso-8859-1?q?xx?=
3108 =?iso-8859-1?q?xx?=
3109 =?iso-8859-1?q?_x?=
3110 =?iso-8859-1?q?xx?=
3111 =?iso-8859-1?q?x_?=
3112 =?iso-8859-1?q?xx?=
3113 =?iso-8859-1?q?xx?=
3114 =?iso-8859-1?q?_?=""")
3115 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003116 h = Header(charset='iso-8859-1', maxlinelen=40)
3117 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003118 s = h.encode()
3119 eq(s, """\
3120=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3121 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3122 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3123 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3124 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3125 eq(x, str(make_header(decode_header(s))))
3126
3127 def test_base64_splittable(self):
3128 eq = self.ndiffAssertEqual
3129 h = Header(charset='koi8-r', maxlinelen=20)
3130 x = 'xxxx ' * 20
3131 h.append(x)
3132 s = h.encode()
3133 eq(s, """\
3134=?koi8-r?b?eHh4?=
3135 =?koi8-r?b?eCB4?=
3136 =?koi8-r?b?eHh4?=
3137 =?koi8-r?b?IHh4?=
3138 =?koi8-r?b?eHgg?=
3139 =?koi8-r?b?eHh4?=
3140 =?koi8-r?b?eCB4?=
3141 =?koi8-r?b?eHh4?=
3142 =?koi8-r?b?IHh4?=
3143 =?koi8-r?b?eHgg?=
3144 =?koi8-r?b?eHh4?=
3145 =?koi8-r?b?eCB4?=
3146 =?koi8-r?b?eHh4?=
3147 =?koi8-r?b?IHh4?=
3148 =?koi8-r?b?eHgg?=
3149 =?koi8-r?b?eHh4?=
3150 =?koi8-r?b?eCB4?=
3151 =?koi8-r?b?eHh4?=
3152 =?koi8-r?b?IHh4?=
3153 =?koi8-r?b?eHgg?=
3154 =?koi8-r?b?eHh4?=
3155 =?koi8-r?b?eCB4?=
3156 =?koi8-r?b?eHh4?=
3157 =?koi8-r?b?IHh4?=
3158 =?koi8-r?b?eHgg?=
3159 =?koi8-r?b?eHh4?=
3160 =?koi8-r?b?eCB4?=
3161 =?koi8-r?b?eHh4?=
3162 =?koi8-r?b?IHh4?=
3163 =?koi8-r?b?eHgg?=
3164 =?koi8-r?b?eHh4?=
3165 =?koi8-r?b?eCB4?=
3166 =?koi8-r?b?eHh4?=
3167 =?koi8-r?b?IA==?=""")
3168 eq(x, str(make_header(decode_header(s))))
3169 h = Header(charset='koi8-r', maxlinelen=40)
3170 h.append(x)
3171 s = h.encode()
3172 eq(s, """\
3173=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3174 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3175 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3176 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3177 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3178 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3179 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003180
3181 def test_us_ascii_header(self):
3182 eq = self.assertEqual
3183 s = 'hello'
3184 x = decode_header(s)
3185 eq(x, [('hello', None)])
3186 h = make_header(x)
3187 eq(s, h.encode())
3188
3189 def test_string_charset(self):
3190 eq = self.assertEqual
3191 h = Header()
3192 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003193 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003194
3195## def test_unicode_error(self):
3196## raises = self.assertRaises
3197## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3198## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3199## h = Header()
3200## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3201## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3202## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3203
3204 def test_utf8_shortest(self):
3205 eq = self.assertEqual
3206 h = Header('p\xf6stal', 'utf-8')
3207 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3208 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3209 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3210
3211 def test_bad_8bit_header(self):
3212 raises = self.assertRaises
3213 eq = self.assertEqual
3214 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3215 raises(UnicodeError, Header, x)
3216 h = Header()
3217 raises(UnicodeError, h.append, x)
3218 e = x.decode('utf-8', 'replace')
3219 eq(str(Header(x, errors='replace')), e)
3220 h.append(x, errors='replace')
3221 eq(str(h), e)
3222
3223 def test_encoded_adjacent_nonencoded(self):
3224 eq = self.assertEqual
3225 h = Header()
3226 h.append('hello', 'iso-8859-1')
3227 h.append('world')
3228 s = h.encode()
3229 eq(s, '=?iso-8859-1?q?hello?= world')
3230 h = make_header(decode_header(s))
3231 eq(h.encode(), s)
3232
3233 def test_whitespace_eater(self):
3234 eq = self.assertEqual
3235 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3236 parts = decode_header(s)
3237 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3238 hdr = make_header(parts)
3239 eq(hdr.encode(),
3240 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3241
3242 def test_broken_base64_header(self):
3243 raises = self.assertRaises
R. David Murraye06528c2010-08-03 23:35:44 +00003244 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003245 raises(errors.HeaderParseError, decode_header, s)
3246
3247
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003248
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003249# Test RFC 2231 header parameters (en/de)coding
3250class TestRFC2231(TestEmailBase):
3251 def test_get_param(self):
3252 eq = self.assertEqual
3253 msg = self._msgobj('msg_29.txt')
3254 eq(msg.get_param('title'),
3255 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3256 eq(msg.get_param('title', unquote=False),
3257 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3258
3259 def test_set_param(self):
3260 eq = self.ndiffAssertEqual
3261 msg = Message()
3262 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3263 charset='us-ascii')
3264 eq(msg.get_param('title'),
3265 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3266 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3267 charset='us-ascii', language='en')
3268 eq(msg.get_param('title'),
3269 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3270 msg = self._msgobj('msg_01.txt')
3271 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3272 charset='us-ascii', language='en')
3273 eq(msg.as_string(maxheaderlen=78), """\
3274Return-Path: <bbb@zzz.org>
3275Delivered-To: bbb@zzz.org
3276Received: by mail.zzz.org (Postfix, from userid 889)
3277\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3278MIME-Version: 1.0
3279Content-Transfer-Encoding: 7bit
3280Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3281From: bbb@ddd.com (John X. Doe)
3282To: bbb@zzz.org
3283Subject: This is a test message
3284Date: Fri, 4 May 2001 14:05:44 -0400
3285Content-Type: text/plain; charset=us-ascii;
3286 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3287
3288
3289Hi,
3290
3291Do you like this message?
3292
3293-Me
3294""")
3295
3296 def test_del_param(self):
3297 eq = self.ndiffAssertEqual
3298 msg = self._msgobj('msg_01.txt')
3299 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3300 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3301 charset='us-ascii', language='en')
3302 msg.del_param('foo', header='Content-Type')
3303 eq(msg.as_string(maxheaderlen=78), """\
3304Return-Path: <bbb@zzz.org>
3305Delivered-To: bbb@zzz.org
3306Received: by mail.zzz.org (Postfix, from userid 889)
3307\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3308MIME-Version: 1.0
3309Content-Transfer-Encoding: 7bit
3310Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3311From: bbb@ddd.com (John X. Doe)
3312To: bbb@zzz.org
3313Subject: This is a test message
3314Date: Fri, 4 May 2001 14:05:44 -0400
3315Content-Type: text/plain; charset="us-ascii";
3316 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3317
3318
3319Hi,
3320
3321Do you like this message?
3322
3323-Me
3324""")
3325
3326 def test_rfc2231_get_content_charset(self):
3327 eq = self.assertEqual
3328 msg = self._msgobj('msg_32.txt')
3329 eq(msg.get_content_charset(), 'us-ascii')
3330
3331 def test_rfc2231_no_language_or_charset(self):
3332 m = '''\
3333Content-Transfer-Encoding: 8bit
3334Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3335Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3336
3337'''
3338 msg = email.message_from_string(m)
3339 param = msg.get_param('NAME')
Georg Brandlab91fde2009-08-13 08:51:18 +00003340 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003341 self.assertEqual(
3342 param,
3343 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3344
3345 def test_rfc2231_no_language_or_charset_in_filename(self):
3346 m = '''\
3347Content-Disposition: inline;
3348\tfilename*0*="''This%20is%20even%20more%20";
3349\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3350\tfilename*2="is it not.pdf"
3351
3352'''
3353 msg = email.message_from_string(m)
3354 self.assertEqual(msg.get_filename(),
3355 'This is even more ***fun*** is it not.pdf')
3356
3357 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3358 m = '''\
3359Content-Disposition: inline;
3360\tfilename*0*="''This%20is%20even%20more%20";
3361\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3362\tfilename*2="is it not.pdf"
3363
3364'''
3365 msg = email.message_from_string(m)
3366 self.assertEqual(msg.get_filename(),
3367 'This is even more ***fun*** is it not.pdf')
3368
3369 def test_rfc2231_partly_encoded(self):
3370 m = '''\
3371Content-Disposition: inline;
3372\tfilename*0="''This%20is%20even%20more%20";
3373\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3374\tfilename*2="is it not.pdf"
3375
3376'''
3377 msg = email.message_from_string(m)
3378 self.assertEqual(
3379 msg.get_filename(),
3380 'This%20is%20even%20more%20***fun*** is it not.pdf')
3381
3382 def test_rfc2231_partly_nonencoded(self):
3383 m = '''\
3384Content-Disposition: inline;
3385\tfilename*0="This%20is%20even%20more%20";
3386\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3387\tfilename*2="is it not.pdf"
3388
3389'''
3390 msg = email.message_from_string(m)
3391 self.assertEqual(
3392 msg.get_filename(),
3393 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3394
3395 def test_rfc2231_no_language_or_charset_in_boundary(self):
3396 m = '''\
3397Content-Type: multipart/alternative;
3398\tboundary*0*="''This%20is%20even%20more%20";
3399\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3400\tboundary*2="is it not.pdf"
3401
3402'''
3403 msg = email.message_from_string(m)
3404 self.assertEqual(msg.get_boundary(),
3405 'This is even more ***fun*** is it not.pdf')
3406
3407 def test_rfc2231_no_language_or_charset_in_charset(self):
3408 # This is a nonsensical charset value, but tests the code anyway
3409 m = '''\
3410Content-Type: text/plain;
3411\tcharset*0*="This%20is%20even%20more%20";
3412\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3413\tcharset*2="is it not.pdf"
3414
3415'''
3416 msg = email.message_from_string(m)
3417 self.assertEqual(msg.get_content_charset(),
3418 'this is even more ***fun*** is it not.pdf')
3419
3420 def test_rfc2231_bad_encoding_in_filename(self):
3421 m = '''\
3422Content-Disposition: inline;
3423\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3424\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3425\tfilename*2="is it not.pdf"
3426
3427'''
3428 msg = email.message_from_string(m)
3429 self.assertEqual(msg.get_filename(),
3430 'This is even more ***fun*** is it not.pdf')
3431
3432 def test_rfc2231_bad_encoding_in_charset(self):
3433 m = """\
3434Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3435
3436"""
3437 msg = email.message_from_string(m)
3438 # This should return None because non-ascii characters in the charset
3439 # are not allowed.
3440 self.assertEqual(msg.get_content_charset(), None)
3441
3442 def test_rfc2231_bad_character_in_charset(self):
3443 m = """\
3444Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3445
3446"""
3447 msg = email.message_from_string(m)
3448 # This should return None because non-ascii characters in the charset
3449 # are not allowed.
3450 self.assertEqual(msg.get_content_charset(), None)
3451
3452 def test_rfc2231_bad_character_in_filename(self):
3453 m = '''\
3454Content-Disposition: inline;
3455\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3456\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3457\tfilename*2*="is it not.pdf%E2"
3458
3459'''
3460 msg = email.message_from_string(m)
3461 self.assertEqual(msg.get_filename(),
3462 'This is even more ***fun*** is it not.pdf\ufffd')
3463
3464 def test_rfc2231_unknown_encoding(self):
3465 m = """\
3466Content-Transfer-Encoding: 8bit
3467Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3468
3469"""
3470 msg = email.message_from_string(m)
3471 self.assertEqual(msg.get_filename(), 'myfile.txt')
3472
3473 def test_rfc2231_single_tick_in_filename_extended(self):
3474 eq = self.assertEqual
3475 m = """\
3476Content-Type: application/x-foo;
3477\tname*0*=\"Frank's\"; name*1*=\" Document\"
3478
3479"""
3480 msg = email.message_from_string(m)
3481 charset, language, s = msg.get_param('name')
3482 eq(charset, None)
3483 eq(language, None)
3484 eq(s, "Frank's Document")
3485
3486 def test_rfc2231_single_tick_in_filename(self):
3487 m = """\
3488Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3489
3490"""
3491 msg = email.message_from_string(m)
3492 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003493 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003494 self.assertEqual(param, "Frank's Document")
3495
3496 def test_rfc2231_tick_attack_extended(self):
3497 eq = self.assertEqual
3498 m = """\
3499Content-Type: application/x-foo;
3500\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3501
3502"""
3503 msg = email.message_from_string(m)
3504 charset, language, s = msg.get_param('name')
3505 eq(charset, 'us-ascii')
3506 eq(language, 'en-us')
3507 eq(s, "Frank's Document")
3508
3509 def test_rfc2231_tick_attack(self):
3510 m = """\
3511Content-Type: application/x-foo;
3512\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3513
3514"""
3515 msg = email.message_from_string(m)
3516 param = msg.get_param('name')
Georg Brandlab91fde2009-08-13 08:51:18 +00003517 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003518 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3519
3520 def test_rfc2231_no_extended_values(self):
3521 eq = self.assertEqual
3522 m = """\
3523Content-Type: application/x-foo; name=\"Frank's Document\"
3524
3525"""
3526 msg = email.message_from_string(m)
3527 eq(msg.get_param('name'), "Frank's Document")
3528
3529 def test_rfc2231_encoded_then_unencoded_segments(self):
3530 eq = self.assertEqual
3531 m = """\
3532Content-Type: application/x-foo;
3533\tname*0*=\"us-ascii'en-us'My\";
3534\tname*1=\" Document\";
3535\tname*2*=\" For You\"
3536
3537"""
3538 msg = email.message_from_string(m)
3539 charset, language, s = msg.get_param('name')
3540 eq(charset, 'us-ascii')
3541 eq(language, 'en-us')
3542 eq(s, 'My Document For You')
3543
3544 def test_rfc2231_unencoded_then_encoded_segments(self):
3545 eq = self.assertEqual
3546 m = """\
3547Content-Type: application/x-foo;
3548\tname*0=\"us-ascii'en-us'My\";
3549\tname*1*=\" Document\";
3550\tname*2*=\" For You\"
3551
3552"""
3553 msg = email.message_from_string(m)
3554 charset, language, s = msg.get_param('name')
3555 eq(charset, 'us-ascii')
3556 eq(language, 'en-us')
3557 eq(s, 'My Document For You')
3558
3559
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003560
R. David Murrayfa606922010-01-16 18:41:00 +00003561# Tests to ensure that signed parts of an email are completely preserved, as
3562# required by RFC1847 section 2.1. Note that these are incomplete, because the
3563# email package does not currently always preserve the body. See issue 1670765.
3564class TestSigned(TestEmailBase):
3565
3566 def _msg_and_obj(self, filename):
3567 with openfile(findfile(filename)) as fp:
3568 original = fp.read()
3569 msg = email.message_from_string(original)
3570 return original, msg
3571
3572 def _signed_parts_eq(self, original, result):
3573 # Extract the first mime part of each message
3574 import re
3575 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3576 inpart = repart.search(original).group(2)
3577 outpart = repart.search(result).group(2)
3578 self.assertEqual(outpart, inpart)
3579
3580 def test_long_headers_as_string(self):
3581 original, msg = self._msg_and_obj('msg_45.txt')
3582 result = msg.as_string()
3583 self._signed_parts_eq(original, result)
3584
3585 def test_long_headers_as_string_maxheaderlen(self):
3586 original, msg = self._msg_and_obj('msg_45.txt')
3587 result = msg.as_string(maxheaderlen=60)
3588 self._signed_parts_eq(original, result)
3589
3590 def test_long_headers_flatten(self):
3591 original, msg = self._msg_and_obj('msg_45.txt')
3592 fp = StringIO()
3593 Generator(fp).flatten(msg)
3594 result = fp.getvalue()
3595 self._signed_parts_eq(original, result)
3596
3597
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003598
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003599def _testclasses():
3600 mod = sys.modules[__name__]
3601 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3602
3603
3604def suite():
3605 suite = unittest.TestSuite()
3606 for testclass in _testclasses():
3607 suite.addTest(unittest.makeSuite(testclass))
3608 return suite
3609
3610
3611def test_main():
3612 for testclass in _testclasses():
3613 run_unittest(testclass)
3614
3615
Ezio Melotti19f2aeb2010-11-21 01:30:29 +00003616
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003617if __name__ == '__main__':
3618 unittest.main(defaultTest='suite')