blob: 35272b5c15aca700dd90486717fb85ac38565655 [file] [log] [blame]
Nick Coghlan6ea41862017-06-11 13:16:15 +10001# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
2
Nick Coghlan18974c32017-06-30 00:48:14 +10003import locale
Nick Coghlan6ea41862017-06-11 13:16:15 +10004import os
Victor Stinner55e49802018-11-30 11:34:47 +01005import shutil
6import subprocess
Nick Coghlan6ea41862017-06-11 13:16:15 +10007import sys
8import sysconfig
Victor Stinner55e49802018-11-30 11:34:47 +01009import unittest
Nick Coghlan6ea41862017-06-11 13:16:15 +100010from collections import namedtuple
11
Victor Stinner7c2d5702018-11-21 12:21:25 +010012from test import support
Nick Coghlan6ea41862017-06-11 13:16:15 +100013from test.support.script_helper import (
14 run_python_until_end,
15 interpreter_requires_environment,
16)
17
Nick Coghlan9c19b022017-12-16 21:51:19 +130018# Set the list of ways we expect to be able to ask for the "C" locale
19EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"]
20
Nick Coghlan45630992017-06-13 22:49:44 +100021# Set our expectation for the default encoding used in the C locale
22# for the filesystem encoding and the standard streams
Nick Coghlan9c19b022017-12-16 21:51:19 +130023EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii"
24EXPECTED_C_LOCALE_FS_ENCODING = "ascii"
Nick Coghlanf0b6a262017-07-15 22:51:05 +100025
Nick Coghlan9c19b022017-12-16 21:51:19 +130026# Set our expectation for the default locale used when none is specified
27EXPECT_COERCION_IN_DEFAULT_LOCALE = True
Nick Coghlanf0b6a262017-07-15 22:51:05 +100028
Victor Stinner55e49802018-11-30 11:34:47 +010029TARGET_LOCALES = ["C.UTF-8", "C.utf8", "UTF-8"]
30
Nick Coghlan9c19b022017-12-16 21:51:19 +130031# Apply some platform dependent overrides
32if sys.platform.startswith("linux"):
Victor Stinner7c2d5702018-11-21 12:21:25 +010033 if support.is_android:
Nick Coghlan9c19b022017-12-16 21:51:19 +130034 # Android defaults to using UTF-8 for all system interfaces
35 EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
36 EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
37 else:
38 # Linux distros typically alias the POSIX locale directly to the C
39 # locale.
40 # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be
41 # able to check this case unconditionally
42 EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX")
43elif sys.platform.startswith("aix"):
44 # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
45 EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1"
46 EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1"
47elif sys.platform == "darwin":
48 # FS encoding is UTF-8 on macOS
49 EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
50elif sys.platform == "cygwin":
51 # Cygwin defaults to using C.UTF-8
52 # TODO: Work out a robust dynamic test for this that doesn't rely on
53 # CPython's own locale handling machinery
54 EXPECT_COERCION_IN_DEFAULT_LOCALE = False
Nick Coghlan45630992017-06-13 22:49:44 +100055
Nick Coghlan9c19b022017-12-16 21:51:19 +130056# Note that the above expectations are still wrong in some cases, such as:
Nick Coghlan45630992017-06-13 22:49:44 +100057# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
Nick Coghlan9c19b022017-12-16 21:51:19 +130058# * Any platform other than AIX that uses latin-1 in the C locale
59# * Any Linux distro where POSIX isn't a simple alias for the C locale
60# * Any Linux distro where the default locale is something other than "C"
Nick Coghlaneb817952017-06-18 12:29:42 +100061#
62# Options for dealing with this:
Nick Coghlan9c19b022017-12-16 21:51:19 +130063# * Don't set the PY_COERCE_C_LOCALE preprocessor definition on
64# such platforms (e.g. it isn't set on Windows)
Nick Coghlaneb817952017-06-18 12:29:42 +100065# * Fix the test expectations to match the actual platform behaviour
Nick Coghlan45630992017-06-13 22:49:44 +100066
Nick Coghlan6ea41862017-06-11 13:16:15 +100067# In order to get the warning messages to match up as expected, the candidate
68# order here must much the target locale order in Python/pylifecycle.c
Nick Coghlan18974c32017-06-30 00:48:14 +100069_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
Nick Coghlan6ea41862017-06-11 13:16:15 +100070
71# There's no reliable cross-platform way of checking locale alias
72# lists, so the only way of knowing which of these locales will work
73# is to try them with locale.setlocale(). We do that in a subprocess
Nick Coghlan9c19b022017-12-16 21:51:19 +130074# in setUpModule() below to avoid altering the locale of the test runner.
Nick Coghlan18974c32017-06-30 00:48:14 +100075#
76# If the relevant locale module attributes exist, and we're not on a platform
77# where we expect it to always succeed, we also check that
78# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
79# will skip locale coercion for that particular target locale
80_check_nl_langinfo_CODESET = bool(
81 sys.platform not in ("darwin", "linux") and
82 hasattr(locale, "nl_langinfo") and
83 hasattr(locale, "CODESET")
84)
85
Nick Coghlan6ea41862017-06-11 13:16:15 +100086def _set_locale_in_subprocess(locale_name):
87 cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
Nick Coghlan18974c32017-06-30 00:48:14 +100088 if _check_nl_langinfo_CODESET:
89 # If there's no valid CODESET, we expect coercion to be skipped
90 cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
Nick Coghlan6ea41862017-06-11 13:16:15 +100091 cmd = cmd_fmt.format(locale_name)
Victor Stinner94540602017-12-16 04:54:22 +010092 result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
Nick Coghlan6ea41862017-06-11 13:16:15 +100093 return result.rc == 0
94
Nick Coghlan18974c32017-06-30 00:48:14 +100095
96
Nick Coghlaneb817952017-06-18 12:29:42 +100097_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
98_EncodingDetails = namedtuple("EncodingDetails", _fields)
Nick Coghlan6ea41862017-06-11 13:16:15 +100099
100class EncodingDetails(_EncodingDetails):
Nick Coghlaneb817952017-06-18 12:29:42 +1000101 # XXX (ncoghlan): Using JSON for child state reporting may be less fragile
Nick Coghlan6ea41862017-06-11 13:16:15 +1000102 CHILD_PROCESS_SCRIPT = ";".join([
Nick Coghlaneb817952017-06-18 12:29:42 +1000103 "import sys, os",
Nick Coghlan6ea41862017-06-11 13:16:15 +1000104 "print(sys.getfilesystemencoding())",
105 "print(sys.stdin.encoding + ':' + sys.stdin.errors)",
106 "print(sys.stdout.encoding + ':' + sys.stdout.errors)",
107 "print(sys.stderr.encoding + ':' + sys.stderr.errors)",
Nick Coghlaneb817952017-06-18 12:29:42 +1000108 "print(os.environ.get('LANG', 'not set'))",
109 "print(os.environ.get('LC_CTYPE', 'not set'))",
110 "print(os.environ.get('LC_ALL', 'not set'))",
Nick Coghlan6ea41862017-06-11 13:16:15 +1000111 ])
112
113 @classmethod
Nick Coghlaneb817952017-06-18 12:29:42 +1000114 def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000115 """Returns expected child process details for a given encoding"""
Nick Coghlan79265162017-06-15 19:11:39 +1000116 _stream = stream_encoding + ":{}"
Nick Coghlan6ea41862017-06-11 13:16:15 +1000117 # stdin and stdout should use surrogateescape either because the
118 # coercion triggered, or because the C locale was detected
119 stream_info = 2*[_stream.format("surrogateescape")]
120 # stderr should always use backslashreplace
121 stream_info.append(_stream.format("backslashreplace"))
Nick Coghlaneb817952017-06-18 12:29:42 +1000122 expected_lang = env_vars.get("LANG", "not set").lower()
123 if coercion_expected:
124 expected_lc_ctype = CLI_COERCION_TARGET.lower()
125 else:
126 expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
127 expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
128 env_info = expected_lang, expected_lc_ctype, expected_lc_all
129 return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
Nick Coghlan6ea41862017-06-11 13:16:15 +1000130
131 @staticmethod
132 def _handle_output_variations(data):
133 """Adjust the output to handle platform specific idiosyncrasies
134
135 * Some platforms report ASCII as ANSI_X3.4-1968
136 * Some platforms report ASCII as US-ASCII
137 * Some platforms report UTF-8 instead of utf-8
138 """
139 data = data.replace(b"ANSI_X3.4-1968", b"ascii")
140 data = data.replace(b"US-ASCII", b"ascii")
141 data = data.lower()
142 return data
143
144 @classmethod
Victor Stinner06e76082018-09-19 14:56:36 -0700145 def get_child_details(cls, env_vars):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000146 """Retrieves fsencoding and standard stream details from a child process
147
148 Returns (encoding_details, stderr_lines):
149
150 - encoding_details: EncodingDetails for eager decoding
151 - stderr_lines: result of calling splitlines() on the stderr output
152
153 The child is run in isolated mode if the current interpreter supports
154 that.
155 """
Victor Stinner06e76082018-09-19 14:56:36 -0700156 result, py_cmd = run_python_until_end(
157 "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
158 **env_vars
159 )
Nick Coghlan6ea41862017-06-11 13:16:15 +1000160 if not result.rc == 0:
161 result.fail(py_cmd)
162 # All subprocess outputs in this test case should be pure ASCII
163 adjusted_output = cls._handle_output_variations(result.out)
Nick Coghlaneb817952017-06-18 12:29:42 +1000164 stdout_lines = adjusted_output.decode("ascii").splitlines()
Nick Coghlan6ea41862017-06-11 13:16:15 +1000165 child_encoding_details = dict(cls(*stdout_lines)._asdict())
166 stderr_lines = result.err.decode("ascii").rstrip().splitlines()
167 return child_encoding_details, stderr_lines
168
169
Nick Coghlan6ea41862017-06-11 13:16:15 +1000170# Details of the shared library warning emitted at runtime
Nick Coghlaneb817952017-06-18 12:29:42 +1000171LEGACY_LOCALE_WARNING = (
Nick Coghlan6ea41862017-06-11 13:16:15 +1000172 "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
173 "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
174 "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
175 "locales is recommended."
176)
177
Nick Coghlan6ea41862017-06-11 13:16:15 +1000178# Details of the CLI locale coercion warning emitted at runtime
179CLI_COERCION_WARNING_FMT = (
180 "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
181 "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)."
182)
183
Victor Stinner023564b2017-06-13 13:32:31 +0200184
185AVAILABLE_TARGETS = None
Nick Coghlaneb817952017-06-18 12:29:42 +1000186CLI_COERCION_TARGET = None
187CLI_COERCION_WARNING = None
Victor Stinner023564b2017-06-13 13:32:31 +0200188
189def setUpModule():
190 global AVAILABLE_TARGETS
Nick Coghlaneb817952017-06-18 12:29:42 +1000191 global CLI_COERCION_TARGET
192 global CLI_COERCION_WARNING
Victor Stinner023564b2017-06-13 13:32:31 +0200193
194 if AVAILABLE_TARGETS is not None:
195 # initialization already done
196 return
197 AVAILABLE_TARGETS = []
198
199 # Find the target locales available in the current system
200 for target_locale in _C_UTF8_LOCALES:
201 if _set_locale_in_subprocess(target_locale):
202 AVAILABLE_TARGETS.append(target_locale)
Victor Stinner023564b2017-06-13 13:32:31 +0200203
Nick Coghlaneb817952017-06-18 12:29:42 +1000204 if AVAILABLE_TARGETS:
205 # Coercion is expected to use the first available target locale
206 CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
207 CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
Victor Stinner023564b2017-06-13 13:32:31 +0200208
Victor Stinner7c2d5702018-11-21 12:21:25 +0100209 if support.verbose:
210 print(f"AVAILABLE_TARGETS = {AVAILABLE_TARGETS!r}")
211 print(f"EXPECTED_C_LOCALE_EQUIVALENTS = {EXPECTED_C_LOCALE_EQUIVALENTS!r}")
212 print(f"EXPECTED_C_LOCALE_STREAM_ENCODING = {EXPECTED_C_LOCALE_STREAM_ENCODING!r}")
213 print(f"EXPECTED_C_LOCALE_FS_ENCODING = {EXPECTED_C_LOCALE_FS_ENCODING!r}")
214 print(f"EXPECT_COERCION_IN_DEFAULT_LOCALE = {EXPECT_COERCION_IN_DEFAULT_LOCALE!r}")
215 print(f"_C_UTF8_LOCALES = {_C_UTF8_LOCALES!r}")
216 print(f"_check_nl_langinfo_CODESET = {_check_nl_langinfo_CODESET!r}")
217
Victor Stinner023564b2017-06-13 13:32:31 +0200218
Nick Coghlaneb817952017-06-18 12:29:42 +1000219class _LocaleHandlingTestCase(unittest.TestCase):
220 # Base class to check expected locale handling behaviour
Nick Coghlan45630992017-06-13 22:49:44 +1000221
Nick Coghlaneb817952017-06-18 12:29:42 +1000222 def _check_child_encoding_details(self,
223 env_vars,
224 expected_fs_encoding,
225 expected_stream_encoding,
226 expected_warnings,
Victor Stinner06e76082018-09-19 14:56:36 -0700227 coercion_expected):
Nick Coghlaneb817952017-06-18 12:29:42 +1000228 """Check the C locale handling for the given process environment
229
230 Parameters:
231 expected_fs_encoding: expected sys.getfilesystemencoding() result
232 expected_stream_encoding: expected encoding for standard streams
233 expected_warning: stderr output to expect (if any)
234 """
Victor Stinner06e76082018-09-19 14:56:36 -0700235 result = EncodingDetails.get_child_details(env_vars)
Nick Coghlaneb817952017-06-18 12:29:42 +1000236 encoding_details, stderr_lines = result
237 expected_details = EncodingDetails.get_expected_details(
238 coercion_expected,
239 expected_fs_encoding,
240 expected_stream_encoding,
241 env_vars
242 )
243 self.assertEqual(encoding_details, expected_details)
244 if expected_warnings is None:
245 expected_warnings = []
246 self.assertEqual(stderr_lines, expected_warnings)
247
248
249class LocaleConfigurationTests(_LocaleHandlingTestCase):
250 # Test explicit external configuration via the process environment
251
Nick Coghlan9c19b022017-12-16 21:51:19 +1300252 @classmethod
253 def setUpClass(cls):
254 # This relies on setUpModule() having been run, so it can't be
Nick Coghlaneb817952017-06-18 12:29:42 +1000255 # handled via the @unittest.skipUnless decorator
Nick Coghlan45630992017-06-13 22:49:44 +1000256 if not AVAILABLE_TARGETS:
257 raise unittest.SkipTest("No C-with-UTF-8 locale available")
Nick Coghlan6ea41862017-06-11 13:16:15 +1000258
Nick Coghlan6ea41862017-06-11 13:16:15 +1000259 def test_external_target_locale_configuration(self):
Nick Coghlaneb817952017-06-18 12:29:42 +1000260
Nick Coghlan6ea41862017-06-11 13:16:15 +1000261 # Explicitly setting a target locale should give the same behaviour as
262 # is seen when implicitly coercing to that target locale
263 self.maxDiff = None
264
Nick Coghlan79265162017-06-15 19:11:39 +1000265 expected_fs_encoding = "utf-8"
266 expected_stream_encoding = "utf-8"
Nick Coghlan6ea41862017-06-11 13:16:15 +1000267
268 base_var_dict = {
269 "LANG": "",
270 "LC_CTYPE": "",
271 "LC_ALL": "",
Victor Stinner94540602017-12-16 04:54:22 +0100272 "PYTHONCOERCECLOCALE": "",
Nick Coghlan6ea41862017-06-11 13:16:15 +1000273 }
274 for env_var in ("LANG", "LC_CTYPE"):
Victor Stinner023564b2017-06-13 13:32:31 +0200275 for locale_to_set in AVAILABLE_TARGETS:
Nick Coghlan45630992017-06-13 22:49:44 +1000276 # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
277 # expected, so skip that combination for now
Nick Coghlaneb817952017-06-18 12:29:42 +1000278 # See https://bugs.python.org/issue30672 for discussion
Nick Coghlan45630992017-06-13 22:49:44 +1000279 if env_var == "LANG" and locale_to_set == "UTF-8":
280 continue
281
Nick Coghlan6ea41862017-06-11 13:16:15 +1000282 with self.subTest(env_var=env_var,
283 configured_locale=locale_to_set):
284 var_dict = base_var_dict.copy()
285 var_dict[env_var] = locale_to_set
286 self._check_child_encoding_details(var_dict,
Nick Coghlan79265162017-06-15 19:11:39 +1000287 expected_fs_encoding,
288 expected_stream_encoding,
Nick Coghlaneb817952017-06-18 12:29:42 +1000289 expected_warnings=None,
290 coercion_expected=False)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000291
292
293
Victor Stinner7c2d5702018-11-21 12:21:25 +0100294@support.cpython_only
Nick Coghlan6ea41862017-06-11 13:16:15 +1000295@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
296 "C locale coercion disabled at build time")
Nick Coghlaneb817952017-06-18 12:29:42 +1000297class LocaleCoercionTests(_LocaleHandlingTestCase):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000298 # Test implicit reconfiguration of the environment during CLI startup
299
Nick Coghlaneb817952017-06-18 12:29:42 +1000300 def _check_c_locale_coercion(self,
301 fs_encoding, stream_encoding,
302 coerce_c_locale,
303 expected_warnings=None,
304 coercion_expected=True,
305 **extra_vars):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000306 """Check the C locale handling for various configurations
307
308 Parameters:
Nick Coghlan79265162017-06-15 19:11:39 +1000309 fs_encoding: expected sys.getfilesystemencoding() result
310 stream_encoding: expected encoding for standard streams
311 coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
Nick Coghlan6ea41862017-06-11 13:16:15 +1000312 None: don't set the variable at all
313 str: the value set in the child's environment
Nick Coghlaneb817952017-06-18 12:29:42 +1000314 expected_warnings: expected warning lines on stderr
315 extra_vars: additional environment variables to set in subprocess
Nick Coghlan6ea41862017-06-11 13:16:15 +1000316 """
Nick Coghlan6ea41862017-06-11 13:16:15 +1000317 self.maxDiff = None
318
Nick Coghlaneb817952017-06-18 12:29:42 +1000319 if not AVAILABLE_TARGETS:
320 # Locale coercion is disabled when there aren't any target locales
Nick Coghlan9c19b022017-12-16 21:51:19 +1300321 fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING
322 stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING
Nick Coghlaneb817952017-06-18 12:29:42 +1000323 coercion_expected = False
324 if expected_warnings:
325 expected_warnings = [LEGACY_LOCALE_WARNING]
Nick Coghlan6ea41862017-06-11 13:16:15 +1000326
327 base_var_dict = {
328 "LANG": "",
329 "LC_CTYPE": "",
330 "LC_ALL": "",
Victor Stinner94540602017-12-16 04:54:22 +0100331 "PYTHONCOERCECLOCALE": "",
Nick Coghlan6ea41862017-06-11 13:16:15 +1000332 }
Nick Coghlaneb817952017-06-18 12:29:42 +1000333 base_var_dict.update(extra_vars)
Nick Coghlan9c19b022017-12-16 21:51:19 +1300334 if coerce_c_locale is not None:
Victor Stinner06e76082018-09-19 14:56:36 -0700335 base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
xdegaye1588be62017-11-12 12:45:59 +0100336
Nick Coghlan9c19b022017-12-16 21:51:19 +1300337 # Check behaviour for the default locale
338 with self.subTest(default_locale=True,
339 PYTHONCOERCECLOCALE=coerce_c_locale):
340 if EXPECT_COERCION_IN_DEFAULT_LOCALE:
xdegaye1588be62017-11-12 12:45:59 +0100341 _expected_warnings = expected_warnings
Nick Coghlan9c19b022017-12-16 21:51:19 +1300342 _coercion_expected = coercion_expected
343 else:
344 _expected_warnings = None
345 _coercion_expected = False
346 # On Android CLI_COERCION_WARNING is not printed when all the
347 # locale environment variables are undefined or empty. When
348 # this code path is run with environ['LC_ALL'] == 'C', then
349 # LEGACY_LOCALE_WARNING is printed.
Victor Stinner7c2d5702018-11-21 12:21:25 +0100350 if (support.is_android and
Nick Coghlan9c19b022017-12-16 21:51:19 +1300351 _expected_warnings == [CLI_COERCION_WARNING]):
352 _expected_warnings = None
353 self._check_child_encoding_details(base_var_dict,
354 fs_encoding,
355 stream_encoding,
356 _expected_warnings,
Victor Stinner06e76082018-09-19 14:56:36 -0700357 _coercion_expected)
xdegaye1588be62017-11-12 12:45:59 +0100358
Nick Coghlan9c19b022017-12-16 21:51:19 +1300359 # Check behaviour for explicitly configured locales
360 for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
361 for env_var in ("LANG", "LC_CTYPE"):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000362 with self.subTest(env_var=env_var,
363 nominal_locale=locale_to_set,
364 PYTHONCOERCECLOCALE=coerce_c_locale):
365 var_dict = base_var_dict.copy()
366 var_dict[env_var] = locale_to_set
Nick Coghlaneb817952017-06-18 12:29:42 +1000367 # Check behaviour on successful coercion
Nick Coghlan6ea41862017-06-11 13:16:15 +1000368 self._check_child_encoding_details(var_dict,
Nick Coghlan79265162017-06-15 19:11:39 +1000369 fs_encoding,
370 stream_encoding,
Nick Coghlan9c19b022017-12-16 21:51:19 +1300371 expected_warnings,
Victor Stinner06e76082018-09-19 14:56:36 -0700372 coercion_expected)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000373
Nick Coghlan9c19b022017-12-16 21:51:19 +1300374 def test_PYTHONCOERCECLOCALE_not_set(self):
Nick Coghlan6ea41862017-06-11 13:16:15 +1000375 # This should coerce to the first available target locale by default
Nick Coghlan79265162017-06-15 19:11:39 +1000376 self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000377
378 def test_PYTHONCOERCECLOCALE_not_zero(self):
Nick Coghlaneb817952017-06-18 12:29:42 +1000379 # *Any* string other than "0" is considered "set" for our purposes
Nick Coghlan6ea41862017-06-11 13:16:15 +1000380 # and hence should result in the locale coercion being enabled
381 for setting in ("", "1", "true", "false"):
Nick Coghlan79265162017-06-15 19:11:39 +1000382 self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000383
Nick Coghlaneb817952017-06-18 12:29:42 +1000384 def test_PYTHONCOERCECLOCALE_set_to_warn(self):
385 # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
386 self._check_c_locale_coercion("utf-8", "utf-8",
387 coerce_c_locale="warn",
388 expected_warnings=[CLI_COERCION_WARNING])
389
390
Nick Coghlan6ea41862017-06-11 13:16:15 +1000391 def test_PYTHONCOERCECLOCALE_set_to_zero(self):
392 # The setting "0" should result in the locale coercion being disabled
Nick Coghlan9c19b022017-12-16 21:51:19 +1300393 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
394 EXPECTED_C_LOCALE_STREAM_ENCODING,
Nick Coghlaneb817952017-06-18 12:29:42 +1000395 coerce_c_locale="0",
396 coercion_expected=False)
397 # Setting LC_ALL=C shouldn't make any difference to the behaviour
Nick Coghlan9c19b022017-12-16 21:51:19 +1300398 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
399 EXPECTED_C_LOCALE_STREAM_ENCODING,
Nick Coghlaneb817952017-06-18 12:29:42 +1000400 coerce_c_locale="0",
401 LC_ALL="C",
402 coercion_expected=False)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000403
Nick Coghlaneb817952017-06-18 12:29:42 +1000404 def test_LC_ALL_set_to_C(self):
405 # Setting LC_ALL should render the locale coercion ineffective
Nick Coghlan9c19b022017-12-16 21:51:19 +1300406 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
407 EXPECTED_C_LOCALE_STREAM_ENCODING,
Nick Coghlaneb817952017-06-18 12:29:42 +1000408 coerce_c_locale=None,
409 LC_ALL="C",
410 coercion_expected=False)
411 # And result in a warning about a lack of locale compatibility
Nick Coghlan9c19b022017-12-16 21:51:19 +1300412 self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
413 EXPECTED_C_LOCALE_STREAM_ENCODING,
Nick Coghlaneb817952017-06-18 12:29:42 +1000414 coerce_c_locale="warn",
415 LC_ALL="C",
416 expected_warnings=[LEGACY_LOCALE_WARNING],
417 coercion_expected=False)
Nick Coghlan6ea41862017-06-11 13:16:15 +1000418
Victor Stinner55e49802018-11-30 11:34:47 +0100419 def test_PYTHONCOERCECLOCALE_set_to_one(self):
420 # skip the test if the LC_CTYPE locale is C or coerced
421 old_loc = locale.setlocale(locale.LC_CTYPE, None)
422 self.addCleanup(locale.setlocale, locale.LC_CTYPE, old_loc)
423 loc = locale.setlocale(locale.LC_CTYPE, "")
424 if loc == "C":
425 self.skipTest("test requires LC_CTYPE locale different than C")
426 if loc in TARGET_LOCALES :
427 self.skipTest("coerced LC_CTYPE locale: %s" % loc)
428
429 # bpo-35336: PYTHONCOERCECLOCALE=1 must not coerce the LC_CTYPE locale
430 # if it's not equal to "C"
431 code = 'import locale; print(locale.setlocale(locale.LC_CTYPE, None))'
432 env = dict(os.environ, PYTHONCOERCECLOCALE='1')
433 cmd = subprocess.run([sys.executable, '-c', code],
434 stdout=subprocess.PIPE,
435 env=env,
436 text=True)
437 self.assertEqual(cmd.stdout.rstrip(), loc)
438
439
Nick Coghlan6ea41862017-06-11 13:16:15 +1000440def test_main():
Victor Stinner7c2d5702018-11-21 12:21:25 +0100441 support.run_unittest(
Nick Coghlan6ea41862017-06-11 13:16:15 +1000442 LocaleConfigurationTests,
Nick Coghlaneb817952017-06-18 12:29:42 +1000443 LocaleCoercionTests
Nick Coghlan6ea41862017-06-11 13:16:15 +1000444 )
Victor Stinner7c2d5702018-11-21 12:21:25 +0100445 support.reap_children()
Nick Coghlan6ea41862017-06-11 13:16:15 +1000446
447if __name__ == "__main__":
448 test_main()