Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 1 | # Tests the attempted automatic coercion of the C locale to a UTF-8 locale |
| 2 | |
Nick Coghlan | 18974c3 | 2017-06-30 00:48:14 +1000 | [diff] [blame] | 3 | import locale |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 4 | import os |
Victor Stinner | 55e4980 | 2018-11-30 11:34:47 +0100 | [diff] [blame] | 5 | import shutil |
| 6 | import subprocess |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 7 | import sys |
| 8 | import sysconfig |
Victor Stinner | 55e4980 | 2018-11-30 11:34:47 +0100 | [diff] [blame] | 9 | import unittest |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 10 | from collections import namedtuple |
| 11 | |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 12 | from test import support |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 13 | from test.support.script_helper import ( |
| 14 | run_python_until_end, |
| 15 | interpreter_requires_environment, |
| 16 | ) |
| 17 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 18 | # Set the list of ways we expect to be able to ask for the "C" locale |
| 19 | EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"] |
| 20 | |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 21 | # Set our expectation for the default encoding used in the C locale |
| 22 | # for the filesystem encoding and the standard streams |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 23 | EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii" |
| 24 | EXPECTED_C_LOCALE_FS_ENCODING = "ascii" |
Nick Coghlan | f0b6a26 | 2017-07-15 22:51:05 +1000 | [diff] [blame] | 25 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 26 | # Set our expectation for the default locale used when none is specified |
| 27 | EXPECT_COERCION_IN_DEFAULT_LOCALE = True |
Nick Coghlan | f0b6a26 | 2017-07-15 22:51:05 +1000 | [diff] [blame] | 28 | |
Victor Stinner | 55e4980 | 2018-11-30 11:34:47 +0100 | [diff] [blame] | 29 | TARGET_LOCALES = ["C.UTF-8", "C.utf8", "UTF-8"] |
| 30 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 31 | # Apply some platform dependent overrides |
| 32 | if sys.platform.startswith("linux"): |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 33 | if support.is_android: |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 34 | # Android defaults to using UTF-8 for all system interfaces |
| 35 | EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" |
| 36 | EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" |
| 37 | else: |
| 38 | # Linux distros typically alias the POSIX locale directly to the C |
| 39 | # locale. |
| 40 | # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be |
| 41 | # able to check this case unconditionally |
| 42 | EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") |
| 43 | elif sys.platform.startswith("aix"): |
| 44 | # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII |
| 45 | EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1" |
| 46 | EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1" |
| 47 | elif sys.platform == "darwin": |
| 48 | # FS encoding is UTF-8 on macOS |
| 49 | EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" |
| 50 | elif sys.platform == "cygwin": |
| 51 | # Cygwin defaults to using C.UTF-8 |
| 52 | # TODO: Work out a robust dynamic test for this that doesn't rely on |
| 53 | # CPython's own locale handling machinery |
| 54 | EXPECT_COERCION_IN_DEFAULT_LOCALE = False |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 55 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 56 | # Note that the above expectations are still wrong in some cases, such as: |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 57 | # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 58 | # * Any platform other than AIX that uses latin-1 in the C locale |
| 59 | # * Any Linux distro where POSIX isn't a simple alias for the C locale |
| 60 | # * Any Linux distro where the default locale is something other than "C" |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 61 | # |
| 62 | # Options for dealing with this: |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 63 | # * Don't set the PY_COERCE_C_LOCALE preprocessor definition on |
| 64 | # such platforms (e.g. it isn't set on Windows) |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 65 | # * Fix the test expectations to match the actual platform behaviour |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 66 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 67 | # In order to get the warning messages to match up as expected, the candidate |
| 68 | # order here must much the target locale order in Python/pylifecycle.c |
Nick Coghlan | 18974c3 | 2017-06-30 00:48:14 +1000 | [diff] [blame] | 69 | _C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 70 | |
| 71 | # There's no reliable cross-platform way of checking locale alias |
| 72 | # lists, so the only way of knowing which of these locales will work |
| 73 | # is to try them with locale.setlocale(). We do that in a subprocess |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 74 | # in setUpModule() below to avoid altering the locale of the test runner. |
Nick Coghlan | 18974c3 | 2017-06-30 00:48:14 +1000 | [diff] [blame] | 75 | # |
| 76 | # If the relevant locale module attributes exist, and we're not on a platform |
| 77 | # where we expect it to always succeed, we also check that |
| 78 | # `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter |
| 79 | # will skip locale coercion for that particular target locale |
| 80 | _check_nl_langinfo_CODESET = bool( |
| 81 | sys.platform not in ("darwin", "linux") and |
| 82 | hasattr(locale, "nl_langinfo") and |
| 83 | hasattr(locale, "CODESET") |
| 84 | ) |
| 85 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 86 | def _set_locale_in_subprocess(locale_name): |
| 87 | cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" |
Nick Coghlan | 18974c3 | 2017-06-30 00:48:14 +1000 | [diff] [blame] | 88 | if _check_nl_langinfo_CODESET: |
| 89 | # If there's no valid CODESET, we expect coercion to be skipped |
| 90 | cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 91 | cmd = cmd_fmt.format(locale_name) |
Victor Stinner | 9454060 | 2017-12-16 04:54:22 +0100 | [diff] [blame] | 92 | result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='') |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 93 | return result.rc == 0 |
| 94 | |
Nick Coghlan | 18974c3 | 2017-06-30 00:48:14 +1000 | [diff] [blame] | 95 | |
| 96 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 97 | _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" |
| 98 | _EncodingDetails = namedtuple("EncodingDetails", _fields) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 99 | |
| 100 | class EncodingDetails(_EncodingDetails): |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 101 | # XXX (ncoghlan): Using JSON for child state reporting may be less fragile |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 102 | CHILD_PROCESS_SCRIPT = ";".join([ |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 103 | "import sys, os", |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 104 | "print(sys.getfilesystemencoding())", |
| 105 | "print(sys.stdin.encoding + ':' + sys.stdin.errors)", |
| 106 | "print(sys.stdout.encoding + ':' + sys.stdout.errors)", |
| 107 | "print(sys.stderr.encoding + ':' + sys.stderr.errors)", |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 108 | "print(os.environ.get('LANG', 'not set'))", |
| 109 | "print(os.environ.get('LC_CTYPE', 'not set'))", |
| 110 | "print(os.environ.get('LC_ALL', 'not set'))", |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 111 | ]) |
| 112 | |
| 113 | @classmethod |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 114 | def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 115 | """Returns expected child process details for a given encoding""" |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 116 | _stream = stream_encoding + ":{}" |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 117 | # stdin and stdout should use surrogateescape either because the |
| 118 | # coercion triggered, or because the C locale was detected |
| 119 | stream_info = 2*[_stream.format("surrogateescape")] |
| 120 | # stderr should always use backslashreplace |
| 121 | stream_info.append(_stream.format("backslashreplace")) |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 122 | expected_lang = env_vars.get("LANG", "not set").lower() |
| 123 | if coercion_expected: |
| 124 | expected_lc_ctype = CLI_COERCION_TARGET.lower() |
| 125 | else: |
| 126 | expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() |
| 127 | expected_lc_all = env_vars.get("LC_ALL", "not set").lower() |
| 128 | env_info = expected_lang, expected_lc_ctype, expected_lc_all |
| 129 | return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 130 | |
| 131 | @staticmethod |
| 132 | def _handle_output_variations(data): |
| 133 | """Adjust the output to handle platform specific idiosyncrasies |
| 134 | |
| 135 | * Some platforms report ASCII as ANSI_X3.4-1968 |
| 136 | * Some platforms report ASCII as US-ASCII |
| 137 | * Some platforms report UTF-8 instead of utf-8 |
| 138 | """ |
| 139 | data = data.replace(b"ANSI_X3.4-1968", b"ascii") |
| 140 | data = data.replace(b"US-ASCII", b"ascii") |
| 141 | data = data.lower() |
| 142 | return data |
| 143 | |
| 144 | @classmethod |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 145 | def get_child_details(cls, env_vars): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 146 | """Retrieves fsencoding and standard stream details from a child process |
| 147 | |
| 148 | Returns (encoding_details, stderr_lines): |
| 149 | |
| 150 | - encoding_details: EncodingDetails for eager decoding |
| 151 | - stderr_lines: result of calling splitlines() on the stderr output |
| 152 | |
| 153 | The child is run in isolated mode if the current interpreter supports |
| 154 | that. |
| 155 | """ |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 156 | result, py_cmd = run_python_until_end( |
| 157 | "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, |
| 158 | **env_vars |
| 159 | ) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 160 | if not result.rc == 0: |
| 161 | result.fail(py_cmd) |
| 162 | # All subprocess outputs in this test case should be pure ASCII |
| 163 | adjusted_output = cls._handle_output_variations(result.out) |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 164 | stdout_lines = adjusted_output.decode("ascii").splitlines() |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 165 | child_encoding_details = dict(cls(*stdout_lines)._asdict()) |
| 166 | stderr_lines = result.err.decode("ascii").rstrip().splitlines() |
| 167 | return child_encoding_details, stderr_lines |
| 168 | |
| 169 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 170 | # Details of the shared library warning emitted at runtime |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 171 | LEGACY_LOCALE_WARNING = ( |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 172 | "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " |
| 173 | "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " |
| 174 | "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " |
| 175 | "locales is recommended." |
| 176 | ) |
| 177 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 178 | # Details of the CLI locale coercion warning emitted at runtime |
| 179 | CLI_COERCION_WARNING_FMT = ( |
| 180 | "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " |
| 181 | "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)." |
| 182 | ) |
| 183 | |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 184 | |
| 185 | AVAILABLE_TARGETS = None |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 186 | CLI_COERCION_TARGET = None |
| 187 | CLI_COERCION_WARNING = None |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 188 | |
| 189 | def setUpModule(): |
| 190 | global AVAILABLE_TARGETS |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 191 | global CLI_COERCION_TARGET |
| 192 | global CLI_COERCION_WARNING |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 193 | |
| 194 | if AVAILABLE_TARGETS is not None: |
| 195 | # initialization already done |
| 196 | return |
| 197 | AVAILABLE_TARGETS = [] |
| 198 | |
| 199 | # Find the target locales available in the current system |
| 200 | for target_locale in _C_UTF8_LOCALES: |
| 201 | if _set_locale_in_subprocess(target_locale): |
| 202 | AVAILABLE_TARGETS.append(target_locale) |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 203 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 204 | if AVAILABLE_TARGETS: |
| 205 | # Coercion is expected to use the first available target locale |
| 206 | CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] |
| 207 | CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 208 | |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 209 | if support.verbose: |
| 210 | print(f"AVAILABLE_TARGETS = {AVAILABLE_TARGETS!r}") |
| 211 | print(f"EXPECTED_C_LOCALE_EQUIVALENTS = {EXPECTED_C_LOCALE_EQUIVALENTS!r}") |
| 212 | print(f"EXPECTED_C_LOCALE_STREAM_ENCODING = {EXPECTED_C_LOCALE_STREAM_ENCODING!r}") |
| 213 | print(f"EXPECTED_C_LOCALE_FS_ENCODING = {EXPECTED_C_LOCALE_FS_ENCODING!r}") |
| 214 | print(f"EXPECT_COERCION_IN_DEFAULT_LOCALE = {EXPECT_COERCION_IN_DEFAULT_LOCALE!r}") |
| 215 | print(f"_C_UTF8_LOCALES = {_C_UTF8_LOCALES!r}") |
| 216 | print(f"_check_nl_langinfo_CODESET = {_check_nl_langinfo_CODESET!r}") |
| 217 | |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 218 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 219 | class _LocaleHandlingTestCase(unittest.TestCase): |
| 220 | # Base class to check expected locale handling behaviour |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 221 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 222 | def _check_child_encoding_details(self, |
| 223 | env_vars, |
| 224 | expected_fs_encoding, |
| 225 | expected_stream_encoding, |
| 226 | expected_warnings, |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 227 | coercion_expected): |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 228 | """Check the C locale handling for the given process environment |
| 229 | |
| 230 | Parameters: |
| 231 | expected_fs_encoding: expected sys.getfilesystemencoding() result |
| 232 | expected_stream_encoding: expected encoding for standard streams |
| 233 | expected_warning: stderr output to expect (if any) |
| 234 | """ |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 235 | result = EncodingDetails.get_child_details(env_vars) |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 236 | encoding_details, stderr_lines = result |
| 237 | expected_details = EncodingDetails.get_expected_details( |
| 238 | coercion_expected, |
| 239 | expected_fs_encoding, |
| 240 | expected_stream_encoding, |
| 241 | env_vars |
| 242 | ) |
| 243 | self.assertEqual(encoding_details, expected_details) |
| 244 | if expected_warnings is None: |
| 245 | expected_warnings = [] |
| 246 | self.assertEqual(stderr_lines, expected_warnings) |
| 247 | |
| 248 | |
| 249 | class LocaleConfigurationTests(_LocaleHandlingTestCase): |
| 250 | # Test explicit external configuration via the process environment |
| 251 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 252 | @classmethod |
| 253 | def setUpClass(cls): |
| 254 | # This relies on setUpModule() having been run, so it can't be |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 255 | # handled via the @unittest.skipUnless decorator |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 256 | if not AVAILABLE_TARGETS: |
| 257 | raise unittest.SkipTest("No C-with-UTF-8 locale available") |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 258 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 259 | def test_external_target_locale_configuration(self): |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 260 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 261 | # Explicitly setting a target locale should give the same behaviour as |
| 262 | # is seen when implicitly coercing to that target locale |
| 263 | self.maxDiff = None |
| 264 | |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 265 | expected_fs_encoding = "utf-8" |
| 266 | expected_stream_encoding = "utf-8" |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 267 | |
| 268 | base_var_dict = { |
| 269 | "LANG": "", |
| 270 | "LC_CTYPE": "", |
| 271 | "LC_ALL": "", |
Victor Stinner | 9454060 | 2017-12-16 04:54:22 +0100 | [diff] [blame] | 272 | "PYTHONCOERCECLOCALE": "", |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 273 | } |
| 274 | for env_var in ("LANG", "LC_CTYPE"): |
Victor Stinner | 023564b | 2017-06-13 13:32:31 +0200 | [diff] [blame] | 275 | for locale_to_set in AVAILABLE_TARGETS: |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 276 | # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as |
| 277 | # expected, so skip that combination for now |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 278 | # See https://bugs.python.org/issue30672 for discussion |
Nick Coghlan | 4563099 | 2017-06-13 22:49:44 +1000 | [diff] [blame] | 279 | if env_var == "LANG" and locale_to_set == "UTF-8": |
| 280 | continue |
| 281 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 282 | with self.subTest(env_var=env_var, |
| 283 | configured_locale=locale_to_set): |
| 284 | var_dict = base_var_dict.copy() |
| 285 | var_dict[env_var] = locale_to_set |
| 286 | self._check_child_encoding_details(var_dict, |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 287 | expected_fs_encoding, |
| 288 | expected_stream_encoding, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 289 | expected_warnings=None, |
| 290 | coercion_expected=False) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 291 | |
| 292 | |
| 293 | |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 294 | @support.cpython_only |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 295 | @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), |
| 296 | "C locale coercion disabled at build time") |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 297 | class LocaleCoercionTests(_LocaleHandlingTestCase): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 298 | # Test implicit reconfiguration of the environment during CLI startup |
| 299 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 300 | def _check_c_locale_coercion(self, |
| 301 | fs_encoding, stream_encoding, |
| 302 | coerce_c_locale, |
| 303 | expected_warnings=None, |
| 304 | coercion_expected=True, |
| 305 | **extra_vars): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 306 | """Check the C locale handling for various configurations |
| 307 | |
| 308 | Parameters: |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 309 | fs_encoding: expected sys.getfilesystemencoding() result |
| 310 | stream_encoding: expected encoding for standard streams |
| 311 | coerce_c_locale: setting to use for PYTHONCOERCECLOCALE |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 312 | None: don't set the variable at all |
| 313 | str: the value set in the child's environment |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 314 | expected_warnings: expected warning lines on stderr |
| 315 | extra_vars: additional environment variables to set in subprocess |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 316 | """ |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 317 | self.maxDiff = None |
| 318 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 319 | if not AVAILABLE_TARGETS: |
| 320 | # Locale coercion is disabled when there aren't any target locales |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 321 | fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING |
| 322 | stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 323 | coercion_expected = False |
| 324 | if expected_warnings: |
| 325 | expected_warnings = [LEGACY_LOCALE_WARNING] |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 326 | |
| 327 | base_var_dict = { |
| 328 | "LANG": "", |
| 329 | "LC_CTYPE": "", |
| 330 | "LC_ALL": "", |
Victor Stinner | 9454060 | 2017-12-16 04:54:22 +0100 | [diff] [blame] | 331 | "PYTHONCOERCECLOCALE": "", |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 332 | } |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 333 | base_var_dict.update(extra_vars) |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 334 | if coerce_c_locale is not None: |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 335 | base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale |
xdegaye | 1588be6 | 2017-11-12 12:45:59 +0100 | [diff] [blame] | 336 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 337 | # Check behaviour for the default locale |
| 338 | with self.subTest(default_locale=True, |
| 339 | PYTHONCOERCECLOCALE=coerce_c_locale): |
| 340 | if EXPECT_COERCION_IN_DEFAULT_LOCALE: |
xdegaye | 1588be6 | 2017-11-12 12:45:59 +0100 | [diff] [blame] | 341 | _expected_warnings = expected_warnings |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 342 | _coercion_expected = coercion_expected |
| 343 | else: |
| 344 | _expected_warnings = None |
| 345 | _coercion_expected = False |
| 346 | # On Android CLI_COERCION_WARNING is not printed when all the |
| 347 | # locale environment variables are undefined or empty. When |
| 348 | # this code path is run with environ['LC_ALL'] == 'C', then |
| 349 | # LEGACY_LOCALE_WARNING is printed. |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 350 | if (support.is_android and |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 351 | _expected_warnings == [CLI_COERCION_WARNING]): |
| 352 | _expected_warnings = None |
| 353 | self._check_child_encoding_details(base_var_dict, |
| 354 | fs_encoding, |
| 355 | stream_encoding, |
| 356 | _expected_warnings, |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 357 | _coercion_expected) |
xdegaye | 1588be6 | 2017-11-12 12:45:59 +0100 | [diff] [blame] | 358 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 359 | # Check behaviour for explicitly configured locales |
| 360 | for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: |
| 361 | for env_var in ("LANG", "LC_CTYPE"): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 362 | with self.subTest(env_var=env_var, |
| 363 | nominal_locale=locale_to_set, |
| 364 | PYTHONCOERCECLOCALE=coerce_c_locale): |
| 365 | var_dict = base_var_dict.copy() |
| 366 | var_dict[env_var] = locale_to_set |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 367 | # Check behaviour on successful coercion |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 368 | self._check_child_encoding_details(var_dict, |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 369 | fs_encoding, |
| 370 | stream_encoding, |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 371 | expected_warnings, |
Victor Stinner | 06e7608 | 2018-09-19 14:56:36 -0700 | [diff] [blame] | 372 | coercion_expected) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 373 | |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 374 | def test_PYTHONCOERCECLOCALE_not_set(self): |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 375 | # This should coerce to the first available target locale by default |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 376 | self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 377 | |
| 378 | def test_PYTHONCOERCECLOCALE_not_zero(self): |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 379 | # *Any* string other than "0" is considered "set" for our purposes |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 380 | # and hence should result in the locale coercion being enabled |
| 381 | for setting in ("", "1", "true", "false"): |
Nick Coghlan | 7926516 | 2017-06-15 19:11:39 +1000 | [diff] [blame] | 382 | self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 383 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 384 | def test_PYTHONCOERCECLOCALE_set_to_warn(self): |
| 385 | # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales |
| 386 | self._check_c_locale_coercion("utf-8", "utf-8", |
| 387 | coerce_c_locale="warn", |
| 388 | expected_warnings=[CLI_COERCION_WARNING]) |
| 389 | |
| 390 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 391 | def test_PYTHONCOERCECLOCALE_set_to_zero(self): |
| 392 | # The setting "0" should result in the locale coercion being disabled |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 393 | self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, |
| 394 | EXPECTED_C_LOCALE_STREAM_ENCODING, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 395 | coerce_c_locale="0", |
| 396 | coercion_expected=False) |
| 397 | # Setting LC_ALL=C shouldn't make any difference to the behaviour |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 398 | self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, |
| 399 | EXPECTED_C_LOCALE_STREAM_ENCODING, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 400 | coerce_c_locale="0", |
| 401 | LC_ALL="C", |
| 402 | coercion_expected=False) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 403 | |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 404 | def test_LC_ALL_set_to_C(self): |
| 405 | # Setting LC_ALL should render the locale coercion ineffective |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 406 | self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, |
| 407 | EXPECTED_C_LOCALE_STREAM_ENCODING, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 408 | coerce_c_locale=None, |
| 409 | LC_ALL="C", |
| 410 | coercion_expected=False) |
| 411 | # And result in a warning about a lack of locale compatibility |
Nick Coghlan | 9c19b02 | 2017-12-16 21:51:19 +1300 | [diff] [blame] | 412 | self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, |
| 413 | EXPECTED_C_LOCALE_STREAM_ENCODING, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 414 | coerce_c_locale="warn", |
| 415 | LC_ALL="C", |
| 416 | expected_warnings=[LEGACY_LOCALE_WARNING], |
| 417 | coercion_expected=False) |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 418 | |
Victor Stinner | 55e4980 | 2018-11-30 11:34:47 +0100 | [diff] [blame] | 419 | def test_PYTHONCOERCECLOCALE_set_to_one(self): |
| 420 | # skip the test if the LC_CTYPE locale is C or coerced |
| 421 | old_loc = locale.setlocale(locale.LC_CTYPE, None) |
| 422 | self.addCleanup(locale.setlocale, locale.LC_CTYPE, old_loc) |
| 423 | loc = locale.setlocale(locale.LC_CTYPE, "") |
| 424 | if loc == "C": |
| 425 | self.skipTest("test requires LC_CTYPE locale different than C") |
| 426 | if loc in TARGET_LOCALES : |
| 427 | self.skipTest("coerced LC_CTYPE locale: %s" % loc) |
| 428 | |
| 429 | # bpo-35336: PYTHONCOERCECLOCALE=1 must not coerce the LC_CTYPE locale |
| 430 | # if it's not equal to "C" |
| 431 | code = 'import locale; print(locale.setlocale(locale.LC_CTYPE, None))' |
| 432 | env = dict(os.environ, PYTHONCOERCECLOCALE='1') |
| 433 | cmd = subprocess.run([sys.executable, '-c', code], |
| 434 | stdout=subprocess.PIPE, |
| 435 | env=env, |
| 436 | text=True) |
| 437 | self.assertEqual(cmd.stdout.rstrip(), loc) |
| 438 | |
| 439 | |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 440 | def test_main(): |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 441 | support.run_unittest( |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 442 | LocaleConfigurationTests, |
Nick Coghlan | eb81795 | 2017-06-18 12:29:42 +1000 | [diff] [blame] | 443 | LocaleCoercionTests |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 444 | ) |
Victor Stinner | 7c2d570 | 2018-11-21 12:21:25 +0100 | [diff] [blame] | 445 | support.reap_children() |
Nick Coghlan | 6ea4186 | 2017-06-11 13:16:15 +1000 | [diff] [blame] | 446 | |
| 447 | if __name__ == "__main__": |
| 448 | test_main() |