[3.7] bpo-34589: Add -X coerce_c_locale option; C locale coercion off by default (GH-9379)
* bpo-34589: Make _PyCoreConfig.coerce_c_locale private (GH-9371)
_PyCoreConfig:
* Rename coerce_c_locale to _coerce_c_locale
* Rename coerce_c_locale_warn to _coerce_c_locale_warn
These fields are now private (name prefixed by "_").
(cherry picked from commit 188ebfa475a6f6aa2d0ea14ca8e1fbe7865b6d27)
* bpo-34589: C locale coercion off by default (GH-9073)
Py_Initialize() and Py_Main() cannot enable the C locale coercion
(PEP 538) anymore: it is always disabled. It can now only be enabled
by the Python program ("python3).
test_embed: get_filesystem_encoding() doesn't have to set PYTHONUTF8
nor PYTHONCOERCECLOCALE, these variables are already set in the
parent.
(cherry picked from commit 7a0791b6992d420dc52536257f2f093851ed7215)
* bpo-34589: Add -X coerce_c_locale command line option (GH-9378)
Add a new -X coerce_c_locale command line option to control C locale
coercion (PEP 538).
(cherry picked from commit dbdee0073cf0b88fe541980ace1f650900f455cc)
diff --git a/Modules/main.c b/Modules/main.c
index 0f7498d..c6ffb15 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -1834,6 +1834,17 @@
return _Py_INIT_OK();
}
+#ifndef MS_WINDOWS
+ /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL
+ && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
+ {
+ config->utf8_mode = 1;
+ return _Py_INIT_OK();
+ }
+#endif
+
return _Py_INIT_OK();
}
@@ -1857,16 +1868,18 @@
const char *env = config_get_env_var("PYTHONCOERCECLOCALE");
if (env) {
if (strcmp(env, "0") == 0) {
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 0;
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
}
}
else if (strcmp(env, "warn") == 0) {
- config->coerce_c_locale_warn = 1;
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 1;
+ }
}
else {
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 1;
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
}
}
}
@@ -2046,7 +2059,7 @@
* See the documentation of the PYTHONCOERCECLOCALE setting for more
* details.
*/
- if (config->coerce_c_locale && !locale_coerced) {
+ if (config->_coerce_c_locale && !locale_coerced) {
locale_coerced = 1;
_Py_CoerceLegacyLocale(config);
encoding_changed = 1;
@@ -2073,7 +2086,7 @@
pymain_read_conf_impl(). Reset Py_IsolatedFlag and Py_NoSiteFlag
modified by _PyCoreConfig_Read(). */
int new_utf8_mode = config->utf8_mode;
- int new_coerce_c_locale = config->coerce_c_locale;
+ int new_coerce_c_locale = config->_coerce_c_locale;
Py_IgnoreEnvironmentFlag = init_ignore_env;
if (_PyCoreConfig_Copy(config, &save_config) < 0) {
pymain->err = _Py_INIT_NO_MEMORY();
@@ -2085,7 +2098,7 @@
cmdline_get_global_config(cmdline);
_PyCoreConfig_GetGlobalConfig(config);
config->utf8_mode = new_utf8_mode;
- config->coerce_c_locale = new_coerce_c_locale;
+ config->_coerce_c_locale = new_coerce_c_locale;
/* The encoding changed: read again the configuration
with the new encoding */
@@ -2103,28 +2116,76 @@
}
-static void
-config_init_locale(_PyCoreConfig *config)
+static _PyInitError
+config_init_coerce_c_locale(_PyCoreConfig *config)
{
- if (config->coerce_c_locale < 0) {
- /* The C locale enables the C locale coercion (PEP 538) */
- if (_Py_LegacyLocaleDetected()) {
- config->coerce_c_locale = 1;
+ const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
+ if (xopt) {
+ wchar_t *sep = wcschr(xopt, L'=');
+ if (sep) {
+ xopt = sep + 1;
+ if (wcscmp(xopt, L"1") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+ else if (wcscmp(xopt, L"0") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
+ }
+ }
+ else if (wcscmp(xopt, L"warn") == 0) {
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 1;
+ }
+ }
+ else {
+ return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
+ }
+ }
+ else {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 0;
}
}
-#ifndef MS_WINDOWS
- if (config->utf8_mode < 0) {
- /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
- const char *ctype_loc = setlocale(LC_CTYPE, NULL);
- if (ctype_loc != NULL
- && (strcmp(ctype_loc, "C") == 0
- || strcmp(ctype_loc, "POSIX") == 0))
- {
- config->utf8_mode = 1;
+ const char *env = config_get_env_var("PYTHONCOERCECLOCALE");
+ if (env) {
+ if (strcmp(env, "0") == 0) {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
+ }
+ }
+ else if (strcmp(env, "warn") == 0) {
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 1;
+ }
+ }
+ else {
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 1;
+ }
+ }
+
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 0;
}
}
-#endif
+
+ if (config->_coerce_c_locale < 0) {
+ /* The C locale enables the C locale coercion (PEP 538) */
+ if (_Py_LegacyLocaleDetected()) {
+ config->_coerce_c_locale = 1;
+ return _Py_INIT_OK();
+ }
+ }
+
+ return _Py_INIT_OK();
}
@@ -2284,8 +2345,11 @@
}
}
- if (config->utf8_mode < 0 || config->coerce_c_locale < 0) {
- config_init_locale(config);
+ if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
+ err = config_init_coerce_c_locale(config);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
}
if (!config->_disable_importlib) {
@@ -2317,8 +2381,11 @@
if (config->tracemalloc < 0) {
config->tracemalloc = 0;
}
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 0;
+ if (config->_coerce_c_locale < 0) {
+ config->_coerce_c_locale = 0;
+ }
+ if (config->_coerce_c_locale_warn < 0) {
+ config->_coerce_c_locale_warn = 0;
}
if (config->utf8_mode < 0) {
config->utf8_mode = 0;
@@ -2327,6 +2394,10 @@
config->argc = 0;
}
+ assert(config->_coerce_c_locale >= 0);
+ assert(config->_coerce_c_locale_warn >= 0);
+ assert(config->ignore_environment >= 0);
+
return _Py_INIT_OK();
}
@@ -2410,8 +2481,8 @@
COPY_ATTR(dump_refs);
COPY_ATTR(malloc_stats);
- COPY_ATTR(coerce_c_locale);
- COPY_ATTR(coerce_c_locale_warn);
+ COPY_ATTR(_coerce_c_locale);
+ COPY_ATTR(_coerce_c_locale_warn);
COPY_ATTR(utf8_mode);
COPY_STR_ATTR(module_search_path_env);
@@ -2638,7 +2709,7 @@
static void
-pymain_init(_PyMain *pymain)
+pymain_init(_PyMain *pymain, int use_c_locale_coercion)
{
/* 754 requires that FP exceptions run in "no stop" mode by default,
* and until C vendors implement C99's ways to control FP exceptions,
@@ -2651,6 +2722,11 @@
pymain->config._disable_importlib = 0;
pymain->config.install_signal_handlers = 1;
+ if (use_c_locale_coercion) {
+ /* set to -1 to be able to enable the feature */
+ pymain->config._coerce_c_locale = -1;
+ pymain->config._coerce_c_locale_warn = -1;
+ }
}
@@ -2751,9 +2827,9 @@
static int
-pymain_main(_PyMain *pymain)
+pymain_main(_PyMain *pymain, int use_c_locale_coercion)
{
- pymain_init(pymain);
+ pymain_init(pymain, use_c_locale_coercion);
int res = pymain_cmdline(pymain);
if (res < 0) {
@@ -2802,10 +2878,22 @@
pymain.argc = argc;
pymain.wchar_argv = argv;
- return pymain_main(&pymain);
+ return pymain_main(&pymain, 0);
}
+#ifdef MS_WINDOWS
+int
+_Py_WindowsMain(int argc, wchar_t **argv)
+{
+ _PyMain pymain = _PyMain_INIT;
+ pymain.use_bytes_argv = 0;
+ pymain.argc = argc;
+ pymain.wchar_argv = argv;
+
+ return pymain_main(&pymain, 1);
+}
+#else
int
_Py_UnixMain(int argc, char **argv)
{
@@ -2814,8 +2902,9 @@
pymain.argc = argc;
pymain.bytes_argv = argv;
- return pymain_main(&pymain);
+ return pymain_main(&pymain, 1);
}
+#endif
/* this is gonna seem *real weird*, but if you put some other code between