blob: 39a7c56019ba6a34785cdb00452a784ef21327eb [file] [log] [blame]
Benjamin Petersond75fcb42009-02-19 04:22:03 +00001# -*- coding: koi8-r -*-
Neal Norwitz40d37812005-10-02 01:48:49 +00002
Benjamin Peterson130786f2009-02-14 17:00:16 +00003import unittest
Victor Stinner047b7ae2014-10-05 17:37:41 +02004from test.support import TESTFN, unlink, unload, rmtree
Serhiy Storchaka9702a172013-06-13 10:08:00 +03005import importlib
6import os
7import sys
Martin v. Löwis78f1e4c2014-02-28 15:43:36 +01008import subprocess
Benjamin Peterson130786f2009-02-14 17:00:16 +00009
Serhiy Storchaka9702a172013-06-13 10:08:00 +030010class SourceEncodingTest(unittest.TestCase):
Benjamin Peterson130786f2009-02-14 17:00:16 +000011
12 def test_pep263(self):
13 self.assertEqual(
14 "ðÉÔÏÎ".encode("utf-8"),
15 b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
16 )
17 self.assertEqual(
18 "\ð".encode("utf-8"),
19 b'\\\xd0\x9f'
20 )
21
22 def test_compilestring(self):
23 # see #1882
24 c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec")
25 d = {}
26 exec(c, d)
27 self.assertEqual(d['u'], '\xf3')
28
29 def test_issue2301(self):
30 try:
31 compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
32 except SyntaxError as v:
Ezio Melottib3aedd42010-11-20 19:04:17 +000033 self.assertEqual(v.text, "print '\u5e74'\n")
Benjamin Peterson130786f2009-02-14 17:00:16 +000034 else:
35 self.fail()
36
Benjamin Petersonf5b52242009-03-02 23:31:26 +000037 def test_issue4626(self):
38 c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
39 d = {}
40 exec(c, d)
Ezio Melottib3aedd42010-11-20 19:04:17 +000041 self.assertEqual(d['\xc6'], '\xc6')
Benjamin Petersonf5b52242009-03-02 23:31:26 +000042
Benjamin Petersonb2e796a2009-10-28 21:59:39 +000043 def test_issue3297(self):
44 c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
45 d = {}
46 exec(c, d)
47 self.assertEqual(d['a'], d['b'])
48 self.assertEqual(len(d['a']), len(d['b']))
49 self.assertEqual(ascii(d['a']), ascii(d['b']))
50
Victor Stinner6aa278e2010-03-03 00:18:49 +000051 def test_issue7820(self):
52 # Ensure that check_bom() restores all bytes in the right order if
53 # check_bom() fails in pydebug mode: a buffer starts with the first
54 # byte of a valid BOM, but next bytes are different
55
56 # one byte in common with the UTF-16-LE BOM
57 self.assertRaises(SyntaxError, eval, b'\xff\x20')
58
59 # two bytes in common with the UTF-8 BOM
60 self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
61
Martin v. Löwis78f1e4c2014-02-28 15:43:36 +010062 def test_20731(self):
Martin v. Löwis80875fe2014-02-28 15:49:06 +010063 sub = subprocess.Popen([sys.executable,
Martin v. Löwis78f1e4c2014-02-28 15:43:36 +010064 os.path.join(os.path.dirname(__file__),
65 'coding20731.py')],
66 stderr=subprocess.PIPE)
67 err = sub.communicate()[1]
Benjamin Peterson12604182014-03-01 10:34:22 -050068 self.assertEqual(sub.returncode, 0)
69 self.assertNotIn(b'SyntaxError', err)
Martin v. Löwis78f1e4c2014-02-28 15:43:36 +010070
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030071 def test_error_message(self):
72 compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec')
73 compile(b'\xef\xbb\xbf\n', 'dummy', 'exec')
74 compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec')
Brett Cannon39295e72013-06-16 11:37:57 -040075 with self.assertRaisesRegex(SyntaxError, 'fake'):
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030076 compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec')
Brett Cannon39295e72013-06-16 11:37:57 -040077 with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'):
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030078 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
79 'dummy', 'exec')
Brett Cannon39295e72013-06-16 11:37:57 -040080 with self.assertRaisesRegex(SyntaxError, 'BOM'):
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030081 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
82 'dummy', 'exec')
Brett Cannon39295e72013-06-16 11:37:57 -040083 with self.assertRaisesRegex(SyntaxError, 'fake'):
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030084 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
Brett Cannon39295e72013-06-16 11:37:57 -040085 with self.assertRaisesRegex(SyntaxError, 'BOM'):
Serhiy Storchaka3af14aa2013-06-09 16:51:52 +030086 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
87
Neal Norwitz40d37812005-10-02 01:48:49 +000088 def test_bad_coding(self):
89 module_name = 'bad_coding'
Neal Norwitzdb83eb32005-12-18 05:29:30 +000090 self.verify_bad_module(module_name)
91
92 def test_bad_coding2(self):
93 module_name = 'bad_coding2'
94 self.verify_bad_module(module_name)
95
96 def verify_bad_module(self, module_name):
Neal Norwitz40d37812005-10-02 01:48:49 +000097 self.assertRaises(SyntaxError, __import__, 'test.' + module_name)
98
99 path = os.path.dirname(__file__)
100 filename = os.path.join(path, module_name + '.py')
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000101 with open(filename, "rb") as fp:
102 bytes = fp.read()
Benjamin Petersonf5b52242009-03-02 23:31:26 +0000103 self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
Neal Norwitz40d37812005-10-02 01:48:49 +0000104
Neal Norwitzf7f28fc2007-08-11 21:31:25 +0000105 def test_exec_valid_coding(self):
106 d = {}
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300107 exec(b'# coding: cp949\na = "\xaa\xa7"\n', d)
108 self.assertEqual(d['a'], '\u3047')
Neal Norwitzf7f28fc2007-08-11 21:31:25 +0000109
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000110 def test_file_parse(self):
111 # issue1134: all encodings outside latin-1 and utf-8 fail on
112 # multiline strings and long lines (>512 columns)
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000113 unload(TESTFN)
Amaury Forgeot d'Arcdd9e3b82007-11-16 00:56:23 +0000114 filename = TESTFN + ".py"
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300115 f = open(filename, "w", encoding="cp1252")
116 sys.path.insert(0, os.curdir)
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000117 try:
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300118 with f:
119 f.write("# -*- coding: cp1252 -*-\n")
120 f.write("'''A short string\n")
121 f.write("'''\n")
122 f.write("'A very long string %s'\n" % ("X" * 1000))
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000123
Brett Cannonafbdc132012-04-14 15:06:17 -0400124 importlib.invalidate_caches()
Amaury Forgeot d'Arcdd9e3b82007-11-16 00:56:23 +0000125 __import__(TESTFN)
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000126 finally:
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300127 del sys.path[0]
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000128 unlink(filename)
129 unlink(filename + "c")
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300130 unlink(filename + "o")
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000131 unload(TESTFN)
Victor Stinner047b7ae2014-10-05 17:37:41 +0200132 rmtree('__pycache__')
Amaury Forgeot d'Arc65f9ace2007-11-15 23:19:43 +0000133
Benjamin Peterson0289b152009-06-28 17:22:03 +0000134 def test_error_from_string(self):
135 # See http://bugs.python.org/issue6289
136 input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8')
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000137 with self.assertRaises(SyntaxError) as c:
Benjamin Peterson0289b152009-06-28 17:22:03 +0000138 compile(input, "<string>", "exec")
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000139 expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \
140 "ordinal not in range(128)"
Serhiy Storchaka10dc0eb2013-06-19 10:29:58 +0300141 self.assertTrue(c.exception.args[0].startswith(expected),
142 msg=c.exception.args[0])
Florent Xicluna8fbddf12010-03-17 20:29:51 +0000143
Benjamin Peterson0289b152009-06-28 17:22:03 +0000144
Neal Norwitz40d37812005-10-02 01:48:49 +0000145if __name__ == "__main__":
Serhiy Storchaka9702a172013-06-13 10:08:00 +0300146 unittest.main()