blob: bdb93457cfc40594709c4747a7b27cadeb8fd18d [file] [log] [blame]
Victor Stinner91106cd2017-12-13 12:29:09 +01001"""
2Test the implementation of the PEP 540: the UTF-8 Mode.
3"""
4
5import locale
Victor Stinner91106cd2017-12-13 12:29:09 +01006import sys
7import textwrap
8import unittest
Victor Stinner94540602017-12-16 04:54:22 +01009from test import support
Victor Stinner91106cd2017-12-13 12:29:09 +010010from test.support.script_helper import assert_python_ok, assert_python_failure
11
12
Victor Stinner937ee9e2018-06-26 02:11:06 +020013MS_WINDOWS = (sys.platform == 'win32')
Victor Stinner5cb25892018-08-28 12:35:44 +020014POSIX_LOCALES = ('C', 'POSIX')
hliu0f4333d02019-03-27 23:11:12 +080015VXWORKS = (sys.platform == "vxworks")
Victor Stinner937ee9e2018-06-26 02:11:06 +020016
Victor Stinner91106cd2017-12-13 12:29:09 +010017class UTF8ModeTests(unittest.TestCase):
Victor Stinner94540602017-12-16 04:54:22 +010018 DEFAULT_ENV = {
19 'PYTHONUTF8': '',
20 'PYTHONLEGACYWINDOWSFSENCODING': '',
21 'PYTHONCOERCECLOCALE': '0',
22 }
Victor Stinner91106cd2017-12-13 12:29:09 +010023
24 def posix_locale(self):
25 loc = locale.setlocale(locale.LC_CTYPE, None)
Victor Stinner5cb25892018-08-28 12:35:44 +020026 return (loc in POSIX_LOCALES)
Victor Stinner91106cd2017-12-13 12:29:09 +010027
28 def get_output(self, *args, failure=False, **kw):
29 kw = dict(self.DEFAULT_ENV, **kw)
30 if failure:
31 out = assert_python_failure(*args, **kw)
32 out = out[2]
33 else:
34 out = assert_python_ok(*args, **kw)
35 out = out[1]
36 return out.decode().rstrip("\n\r")
37
Victor Stinner937ee9e2018-06-26 02:11:06 +020038 @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
Victor Stinner91106cd2017-12-13 12:29:09 +010039 def test_posix_locale(self):
40 code = 'import sys; print(sys.flags.utf8_mode)'
41
Victor Stinner5cb25892018-08-28 12:35:44 +020042 for loc in POSIX_LOCALES:
43 with self.subTest(LC_ALL=loc):
44 out = self.get_output('-c', code, LC_ALL=loc)
45 self.assertEqual(out, '1')
Victor Stinner91106cd2017-12-13 12:29:09 +010046
47 def test_xoption(self):
48 code = 'import sys; print(sys.flags.utf8_mode)'
49
50 out = self.get_output('-X', 'utf8', '-c', code)
51 self.assertEqual(out, '1')
52
53 # undocumented but accepted syntax: -X utf8=1
54 out = self.get_output('-X', 'utf8=1', '-c', code)
55 self.assertEqual(out, '1')
56
57 out = self.get_output('-X', 'utf8=0', '-c', code)
58 self.assertEqual(out, '0')
59
Victor Stinner937ee9e2018-06-26 02:11:06 +020060 if MS_WINDOWS:
Victor Stinner94540602017-12-16 04:54:22 +010061 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
Victor Stinner91106cd2017-12-13 12:29:09 +010062 # and has the priority over -X utf8
63 out = self.get_output('-X', 'utf8', '-c', code,
64 PYTHONLEGACYWINDOWSFSENCODING='1')
65 self.assertEqual(out, '0')
66
67 def test_env_var(self):
68 code = 'import sys; print(sys.flags.utf8_mode)'
69
70 out = self.get_output('-c', code, PYTHONUTF8='1')
71 self.assertEqual(out, '1')
72
73 out = self.get_output('-c', code, PYTHONUTF8='0')
74 self.assertEqual(out, '0')
75
76 # -X utf8 has the priority over PYTHONUTF8
77 out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
78 self.assertEqual(out, '0')
79
Victor Stinner937ee9e2018-06-26 02:11:06 +020080 if MS_WINDOWS:
Victor Stinner91106cd2017-12-13 12:29:09 +010081 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
82 # and has the priority over PYTHONUTF8
83 out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
84 PYTHONLEGACYWINDOWSFSENCODING='1')
85 self.assertEqual(out, '0')
86
87 # Cannot test with the POSIX locale, since the POSIX locale enables
88 # the UTF-8 mode
89 if not self.posix_locale():
90 # PYTHONUTF8 should be ignored if -E is used
91 out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
92 self.assertEqual(out, '0')
93
94 # invalid mode
95 out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
96 self.assertIn('invalid PYTHONUTF8 environment variable value',
97 out.rstrip())
98
99 def test_filesystemencoding(self):
100 code = textwrap.dedent('''
101 import sys
102 print("{}/{}".format(sys.getfilesystemencoding(),
103 sys.getfilesystemencodeerrors()))
104 ''')
105
Victor Stinner937ee9e2018-06-26 02:11:06 +0200106 if MS_WINDOWS:
Victor Stinner91106cd2017-12-13 12:29:09 +0100107 expected = 'utf-8/surrogatepass'
108 else:
109 expected = 'utf-8/surrogateescape'
110
111 out = self.get_output('-X', 'utf8', '-c', code)
112 self.assertEqual(out, expected)
113
Victor Stinner937ee9e2018-06-26 02:11:06 +0200114 if MS_WINDOWS:
Victor Stinner91106cd2017-12-13 12:29:09 +0100115 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
116 # and has the priority over -X utf8 and PYTHONUTF8
117 out = self.get_output('-X', 'utf8', '-c', code,
Victor Stinner06e76082018-09-19 14:56:36 -0700118 PYTHONUTF8='strict',
Victor Stinner91106cd2017-12-13 12:29:09 +0100119 PYTHONLEGACYWINDOWSFSENCODING='1')
120 self.assertEqual(out, 'mbcs/replace')
121
122 def test_stdio(self):
123 code = textwrap.dedent('''
124 import sys
125 print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
126 print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
127 print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
128 ''')
129
130 out = self.get_output('-X', 'utf8', '-c', code,
131 PYTHONIOENCODING='')
132 self.assertEqual(out.splitlines(),
133 ['stdin: utf-8/surrogateescape',
134 'stdout: utf-8/surrogateescape',
135 'stderr: utf-8/backslashreplace'])
136
137 # PYTHONIOENCODING has the priority over PYTHONUTF8
138 out = self.get_output('-X', 'utf8', '-c', code,
139 PYTHONIOENCODING="latin1")
140 self.assertEqual(out.splitlines(),
Victor Stinner9e4994d2018-08-28 23:26:33 +0200141 ['stdin: iso8859-1/strict',
142 'stdout: iso8859-1/strict',
143 'stderr: iso8859-1/backslashreplace'])
Victor Stinner91106cd2017-12-13 12:29:09 +0100144
145 out = self.get_output('-X', 'utf8', '-c', code,
146 PYTHONIOENCODING=":namereplace")
147 self.assertEqual(out.splitlines(),
Victor Stinner9e4994d2018-08-28 23:26:33 +0200148 ['stdin: utf-8/namereplace',
149 'stdout: utf-8/namereplace',
150 'stderr: utf-8/backslashreplace'])
Victor Stinner91106cd2017-12-13 12:29:09 +0100151
152 def test_io(self):
153 code = textwrap.dedent('''
154 import sys
155 filename = sys.argv[1]
156 with open(filename) as fp:
157 print(f"{fp.encoding}/{fp.errors}")
158 ''')
159 filename = __file__
160
161 out = self.get_output('-c', code, filename, PYTHONUTF8='1')
162 self.assertEqual(out, 'UTF-8/strict')
163
164 def _check_io_encoding(self, module, encoding=None, errors=None):
165 filename = __file__
166
167 # Encoding explicitly set
168 args = []
169 if encoding:
170 args.append(f'encoding={encoding!r}')
171 if errors:
172 args.append(f'errors={errors!r}')
173 code = textwrap.dedent('''
174 import sys
175 from %s import open
176 filename = sys.argv[1]
177 with open(filename, %s) as fp:
178 print(f"{fp.encoding}/{fp.errors}")
179 ''') % (module, ', '.join(args))
180 out = self.get_output('-c', code, filename,
181 PYTHONUTF8='1')
182
183 if not encoding:
184 encoding = 'UTF-8'
185 if not errors:
186 errors = 'strict'
187 self.assertEqual(out, f'{encoding}/{errors}')
188
189 def check_io_encoding(self, module):
190 self._check_io_encoding(module, encoding="latin1")
191 self._check_io_encoding(module, errors="namereplace")
192 self._check_io_encoding(module,
193 encoding="latin1", errors="namereplace")
194
195 def test_io_encoding(self):
196 self.check_io_encoding('io')
197
Windson yangf51dd4f2019-04-23 02:46:27 +0800198 def test_pyio_encoding(self):
Victor Stinner91106cd2017-12-13 12:29:09 +0100199 self.check_io_encoding('_pyio')
200
201 def test_locale_getpreferredencoding(self):
202 code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
203 out = self.get_output('-X', 'utf8', '-c', code)
204 self.assertEqual(out, 'UTF-8 UTF-8')
205
Victor Stinner5cb25892018-08-28 12:35:44 +0200206 for loc in POSIX_LOCALES:
207 with self.subTest(LC_ALL=loc):
208 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
209 self.assertEqual(out, 'UTF-8 UTF-8')
Victor Stinner91106cd2017-12-13 12:29:09 +0100210
Victor Stinner937ee9e2018-06-26 02:11:06 +0200211 @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
Victor Stinner94540602017-12-16 04:54:22 +0100212 def test_cmd_line(self):
213 arg = 'h\xe9\u20ac'.encode('utf-8')
214 arg_utf8 = arg.decode('utf-8')
215 arg_ascii = arg.decode('ascii', 'surrogateescape')
216 code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
217
218 def check(utf8_opt, expected, **kw):
219 out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
220 args = out.partition(':')[2].rstrip()
221 self.assertEqual(args, ascii(expected), out)
222
223 check('utf8', [arg_utf8])
Victor Stinner5cb25892018-08-28 12:35:44 +0200224 for loc in POSIX_LOCALES:
225 with self.subTest(LC_ALL=loc):
226 check('utf8', [arg_utf8], LC_ALL=loc)
227
hliu0f4333d02019-03-27 23:11:12 +0800228 if sys.platform == 'darwin' or support.is_android or VXWORKS:
Victor Stinner94540602017-12-16 04:54:22 +0100229 c_arg = arg_utf8
Michael Felt7ef16972018-08-27 15:40:17 +0200230 elif sys.platform.startswith("aix"):
231 c_arg = arg.decode('iso-8859-1')
Victor Stinner94540602017-12-16 04:54:22 +0100232 else:
233 c_arg = arg_ascii
Victor Stinner5cb25892018-08-28 12:35:44 +0200234 for loc in POSIX_LOCALES:
235 with self.subTest(LC_ALL=loc):
236 check('utf8=0', [c_arg], LC_ALL=loc)
Victor Stinner94540602017-12-16 04:54:22 +0100237
Victor Stinner2b822a02018-01-25 09:18:36 +0100238 def test_optim_level(self):
239 # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
240 # twice when -X utf8 requires to parse the configuration twice (when
241 # the encoding changes after reading the configuration, the
242 # configuration is read again with the new encoding).
243 code = 'import sys; print(sys.flags.optimize)'
244 out = self.get_output('-X', 'utf8', '-O', '-c', code)
245 self.assertEqual(out, '1')
246 out = self.get_output('-X', 'utf8', '-OO', '-c', code)
247 self.assertEqual(out, '2')
248
249 code = 'import sys; print(sys.flags.ignore_environment)'
250 out = self.get_output('-X', 'utf8', '-E', '-c', code)
251 self.assertEqual(out, '1')
252
Victor Stinner91106cd2017-12-13 12:29:09 +0100253
254if __name__ == "__main__":
255 unittest.main()