bpo-36142: Add _PyPreConfig.utf8_mode (GH-12174)

* Move following fields from _PyCoreConfig to _PyPreConfig:

  * coerce_c_locale
  * coerce_c_locale_warn
  * legacy_windows_stdio
  * utf8_mode

* _PyPreConfig_ReadFromArgv() is now responsible to choose the
  filesystem encoding
* _PyPreConfig_Write() now sets the LC_CTYPE locale
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index a6aa89b..e372de4 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -531,10 +531,6 @@
     COPY_ATTR(dump_refs);
     COPY_ATTR(malloc_stats);
 
-    COPY_ATTR(coerce_c_locale);
-    COPY_ATTR(coerce_c_locale_warn);
-    COPY_ATTR(utf8_mode);
-
     COPY_WSTR_ATTR(pycache_prefix);
     COPY_WSTR_ATTR(module_search_path_env);
     COPY_WSTR_ATTR(home);
@@ -571,7 +567,6 @@
     COPY_STR_ATTR(stdio_encoding);
     COPY_STR_ATTR(stdio_errors);
 #ifdef MS_WINDOWS
-    COPY_ATTR(legacy_windows_fs_encoding);
     COPY_ATTR(legacy_windows_stdio);
 #endif
     COPY_ATTR(skip_source_first_line);
@@ -592,19 +587,7 @@
 const char*
 _PyCoreConfig_GetEnv(const _PyCoreConfig *config, const char *name)
 {
-    assert(config->preconfig.use_environment >= 0);
-
-    if (!config->preconfig.use_environment) {
-        return NULL;
-    }
-
-    const char *var = getenv(name);
-    if (var && var[0] != '\0') {
-        return var;
-    }
-    else {
-        return NULL;
-    }
+    return _PyPreConfig_GetEnv(&config->preconfig, name);
 }
 
 
@@ -670,7 +653,6 @@
             config->ATTR = !(VALUE); \
         }
 
-    COPY_FLAG(utf8_mode, Py_UTF8Mode);
     COPY_FLAG(bytes_warning, Py_BytesWarningFlag);
     COPY_FLAG(inspect, Py_InspectFlag);
     COPY_FLAG(interactive, Py_InteractiveFlag);
@@ -679,7 +661,6 @@
     COPY_FLAG(verbose, Py_VerboseFlag);
     COPY_FLAG(quiet, Py_QuietFlag);
 #ifdef MS_WINDOWS
-    COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
     COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag);
 #endif
     COPY_FLAG(_frozen, Py_FrozenFlag);
@@ -709,7 +690,6 @@
             VAR = !config->ATTR; \
         }
 
-    COPY_FLAG(utf8_mode, Py_UTF8Mode);
     COPY_FLAG(bytes_warning, Py_BytesWarningFlag);
     COPY_FLAG(inspect, Py_InspectFlag);
     COPY_FLAG(interactive, Py_InteractiveFlag);
@@ -718,7 +698,6 @@
     COPY_FLAG(verbose, Py_VerboseFlag);
     COPY_FLAG(quiet, Py_QuietFlag);
 #ifdef MS_WINDOWS
-    COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
     COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag);
 #endif
     COPY_FLAG(_frozen, Py_FrozenFlag);
@@ -838,23 +817,7 @@
 static const wchar_t*
 config_get_xoption(const _PyCoreConfig *config, wchar_t *name)
 {
-    int nxoption = config->nxoption;
-    wchar_t **xoptions = config->xoptions;
-    for (int i=0; i < nxoption; i++) {
-        wchar_t *option = xoptions[i];
-        size_t len;
-        wchar_t *sep = wcschr(option, L'=');
-        if (sep != NULL) {
-            len = (sep - option);
-        }
-        else {
-            len = wcslen(option);
-        }
-        if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
-            return option;
-        }
-    }
-    return NULL;
+    return _Py_get_xoption(config->nxoption, config->xoptions, name);
 }
 
 
@@ -915,67 +878,6 @@
 }
 
 
-static _PyInitError
-config_init_utf8_mode(_PyCoreConfig *config)
-{
-    const wchar_t *xopt = config_get_xoption(config, L"utf8");
-    if (xopt) {
-        wchar_t *sep = wcschr(xopt, L'=');
-        if (sep) {
-            xopt = sep + 1;
-            if (wcscmp(xopt, L"1") == 0) {
-                config->utf8_mode = 1;
-            }
-            else if (wcscmp(xopt, L"0") == 0) {
-                config->utf8_mode = 0;
-            }
-            else {
-                return _Py_INIT_USER_ERR("invalid -X utf8 option value");
-            }
-        }
-        else {
-            config->utf8_mode = 1;
-        }
-        return _Py_INIT_OK();
-    }
-
-    const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONUTF8");
-    if (opt) {
-        if (strcmp(opt, "1") == 0) {
-            config->utf8_mode = 1;
-        }
-        else if (strcmp(opt, "0") == 0) {
-            config->utf8_mode = 0;
-        }
-        else {
-            return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment "
-                                     "variable value");
-        }
-        return _Py_INIT_OK();
-    }
-
-    return _Py_INIT_OK();
-}
-
-
-static int
-config_str_to_int(const char *str, int *result)
-{
-    const char *endptr = str;
-    errno = 0;
-    long value = strtol(str, (char **)&endptr, 10);
-    if (*endptr != '\0' || errno == ERANGE) {
-        return -1;
-    }
-    if (value < INT_MIN || value > INT_MAX) {
-        return -1;
-    }
-
-    *result = (int)value;
-    return 0;
-}
-
-
 static int
 config_wstr_to_int(const wchar_t *wstr, int *result)
 {
@@ -994,27 +896,12 @@
 }
 
 
-static void
-get_env_flag(_PyCoreConfig *config, int *flag, const char *name)
-{
-    const char *var = _PyCoreConfig_GetEnv(config, name);
-    if (!var) {
-        return;
-    }
-    int value;
-    if (config_str_to_int(var, &value) < 0 || value < 0) {
-        /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
-        value = 1;
-    }
-    if (*flag < value) {
-        *flag = value;
-    }
-}
-
-
 static _PyInitError
 config_read_env_vars(_PyCoreConfig *config)
 {
+#define get_env_flag(CONFIG, ATTR, NAME) \
+        _Py_get_env_flag(&(CONFIG)->preconfig, (ATTR), (NAME))
+
     /* Get environment variables */
     get_env_flag(config, &config->parser_debug, "PYTHONDEBUG");
     get_env_flag(config, &config->verbose, "PYTHONVERBOSE");
@@ -1040,8 +927,6 @@
     }
 
 #ifdef MS_WINDOWS
-    get_env_flag(config, &config->legacy_windows_fs_encoding,
-                 "PYTHONLEGACYWINDOWSFSENCODING");
     get_env_flag(config, &config->legacy_windows_stdio,
                  "PYTHONLEGACYWINDOWSSTDIO");
 #endif
@@ -1057,23 +942,6 @@
         config->malloc_stats = 1;
     }
 
-    const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
-    if (env) {
-        if (strcmp(env, "0") == 0) {
-            if (config->coerce_c_locale < 0) {
-                config->coerce_c_locale = 0;
-            }
-        }
-        else if (strcmp(env, "warn") == 0) {
-            config->coerce_c_locale_warn = 1;
-        }
-        else {
-            if (config->coerce_c_locale < 0) {
-                config->coerce_c_locale = 1;
-            }
-        }
-    }
-
     wchar_t *path;
     int res = _PyCoreConfig_GetEnvDup(config, &path,
                                       L"PYTHONPATH", "PYTHONPATH");
@@ -1090,6 +958,8 @@
     }
 
     return _Py_INIT_OK();
+
+#undef get_env_flag
 }
 
 
@@ -1101,7 +971,7 @@
 
     const char *env = _PyCoreConfig_GetEnv(config, "PYTHONTRACEMALLOC");
     if (env) {
-        if (!config_str_to_int(env, &nframe)) {
+        if (!_Py_str_to_int(env, &nframe)) {
             valid = (nframe >= 0);
         }
         else {
@@ -1213,37 +1083,6 @@
 }
 
 
-static void
-config_init_locale(_PyCoreConfig *config)
-{
-    /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't
-       imply that the C locale is always coerced. It is only coerced if
-       if the LC_CTYPE locale is "C". */
-    if (config->coerce_c_locale != 0) {
-        /* The C locale enables the C locale coercion (PEP 538) */
-        if (_Py_LegacyLocaleDetected()) {
-            config->coerce_c_locale = 1;
-        }
-        else {
-            config->coerce_c_locale = 0;
-        }
-    }
-
-#ifndef MS_WINDOWS
-    if (config->utf8_mode < 0) {
-        /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
-        const char *ctype_loc = setlocale(LC_CTYPE, NULL);
-        if (ctype_loc != NULL
-           && (strcmp(ctype_loc, "C") == 0
-               || strcmp(ctype_loc, "POSIX") == 0))
-        {
-            config->utf8_mode = 1;
-        }
-    }
-#endif
-}
-
-
 static const char *
 get_stdio_errors(const _PyCoreConfig *config)
 {
@@ -1365,7 +1204,7 @@
     }
 
     /* UTF-8 Mode uses UTF-8/surrogateescape */
-    if (config->utf8_mode) {
+    if (config->preconfig.utf8_mode) {
         if (config->stdio_encoding == NULL) {
             config->stdio_encoding = _PyMem_RawStrdup("utf-8");
             if (config->stdio_encoding == NULL) {
@@ -1403,7 +1242,7 @@
 config_init_fs_encoding(_PyCoreConfig *config)
 {
 #ifdef MS_WINDOWS
-    if (config->legacy_windows_fs_encoding) {
+    if (config->preconfig.legacy_windows_fs_encoding) {
         /* Legacy Windows filesystem encoding: mbcs/replace */
         if (config->filesystem_encoding == NULL) {
             config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
@@ -1438,7 +1277,7 @@
     }
 #else
     if (config->filesystem_encoding == NULL) {
-        if (config->utf8_mode) {
+        if (config->preconfig.utf8_mode) {
             /* UTF-8 Mode use: utf-8/surrogateescape */
             config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
             /* errors defaults to surrogateescape above */
@@ -1539,12 +1378,6 @@
         config->user_site_directory = 0;
     }
 
-#ifdef MS_WINDOWS
-    if (config->legacy_windows_fs_encoding) {
-        config->utf8_mode = 0;
-    }
-#endif
-
     if (config->preconfig.use_environment) {
         err = config_read_env_vars(config);
         if (_Py_INIT_FAILED(err)) {
@@ -1565,13 +1398,6 @@
         return err;
     }
 
-    if (config->utf8_mode < 0) {
-        err = config_init_utf8_mode(config);
-        if (_Py_INIT_FAILED(err)) {
-            return err;
-        }
-    }
-
     if (config->home == NULL) {
         err = config_init_home(config);
         if (_Py_INIT_FAILED(err)) {
@@ -1593,10 +1419,6 @@
         }
     }
 
-    if (config->coerce_c_locale != 0 || config->utf8_mode < 0) {
-        config_init_locale(config);
-    }
-
     if (config->_install_importlib) {
         err = _PyCoreConfig_InitPathConfig(config);
         if (_Py_INIT_FAILED(err)) {
@@ -1623,12 +1445,6 @@
     if (config->tracemalloc < 0) {
         config->tracemalloc = 0;
     }
-    if (config->coerce_c_locale < 0) {
-        config->coerce_c_locale = 0;
-    }
-    if (config->utf8_mode < 0) {
-        config->utf8_mode = 0;
-    }
     if (config->argc < 0) {
         config->argc = 0;
     }
@@ -1645,7 +1461,6 @@
         return err;
     }
 
-    assert(config->coerce_c_locale >= 0);
     assert(config->preconfig.use_environment >= 0);
     assert(config->filesystem_encoding != NULL);
     assert(config->filesystem_errors != NULL);
@@ -1703,9 +1518,6 @@
 void
 _PyCoreConfig_Write(const _PyCoreConfig *config)
 {
-    if (config->coerce_c_locale) {
-        _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
-    }
     _PyCoreConfig_SetGlobalConfig(config);
     config_init_stdio(config);
 }
@@ -1769,11 +1581,8 @@
     SET_ITEM_INT(show_alloc_count);
     SET_ITEM_INT(dump_refs);
     SET_ITEM_INT(malloc_stats);
-    SET_ITEM_INT(coerce_c_locale);
-    SET_ITEM_INT(coerce_c_locale_warn);
     SET_ITEM_STR(filesystem_encoding);
     SET_ITEM_STR(filesystem_errors);
-    SET_ITEM_INT(utf8_mode);
     SET_ITEM_WSTR(pycache_prefix);
     SET_ITEM_WSTR(program_name);
     SET_ITEM_WSTRLIST(argc, argv);
@@ -1805,7 +1614,6 @@
     SET_ITEM_STR(stdio_encoding);
     SET_ITEM_STR(stdio_errors);
 #ifdef MS_WINDOWS
-    SET_ITEM_INT(legacy_windows_fs_encoding);
     SET_ITEM_INT(legacy_windows_stdio);
 #endif
     SET_ITEM_INT(skip_source_first_line);
@@ -2318,8 +2126,16 @@
 }
 
 
-static _PyInitError
-config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args,
+/* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE
+   locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538).
+
+   Read the configuration from:
+
+   * Command line arguments
+   * Environment variables
+   * Py_xxx global configuration variables */
+_PyInitError
+_PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args,
                            const _PyPreConfig *preconfig)
 {
     _PyInitError err;
@@ -2343,133 +2159,3 @@
     cmdline_clear(&cmdline);
     return err;
 }
-
-
-/* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE
-   locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538).
-
-   Read the configuration from:
-
-   * Command line arguments
-   * Environment variables
-   * Py_xxx global configuration variables */
-_PyInitError
-_PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args,
-                           const _PyPreConfig *preconfig)
-{
-    _PyInitError err;
-    int init_utf8_mode = Py_UTF8Mode;
-#ifdef MS_WINDOWS
-    int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
-#endif
-    _PyCoreConfig save_config = _PyCoreConfig_INIT;
-    int locale_coerced = 0;
-    int loops = 0;
-    char *init_ctype_locale = NULL;
-
-    /* copy LC_CTYPE locale */
-    const char *loc = setlocale(LC_CTYPE, NULL);
-    if (loc == NULL) {
-        err = _Py_INIT_ERR("failed to LC_CTYPE locale");
-        goto done;
-    }
-    init_ctype_locale = _PyMem_RawStrdup(loc);
-    if (init_ctype_locale == NULL) {
-        err = _Py_INIT_NO_MEMORY();
-        goto done;
-    }
-
-    if (_PyCoreConfig_Copy(&save_config, config) < 0) {
-        err = _Py_INIT_NO_MEMORY();
-        goto done;
-    }
-
-    /* Set LC_CTYPE to the user preferred locale */
-    _Py_SetLocaleFromEnv(LC_CTYPE);
-
-    while (1) {
-        int utf8_mode = config->utf8_mode;
-        int encoding_changed = 0;
-
-        /* Watchdog to prevent an infinite loop */
-        loops++;
-        if (loops == 3) {
-            err = _Py_INIT_ERR("Encoding changed twice while "
-                               "reading the configuration");
-            goto done;
-        }
-
-        /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
-           on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
-        Py_UTF8Mode = config->utf8_mode;
-#ifdef MS_WINDOWS
-        Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
-#endif
-
-        err = config_read_from_argv_impl(config, args, preconfig);
-        if (_Py_INIT_FAILED(err)) {
-            goto done;
-        }
-        if (locale_coerced) {
-            config->coerce_c_locale = 1;
-        }
-
-        /* The legacy C locale assumes ASCII as the default text encoding, which
-         * causes problems not only for the CPython runtime, but also other
-         * components like GNU readline.
-         *
-         * Accordingly, when the CLI detects it, it attempts to coerce it to a
-         * more capable UTF-8 based alternative.
-         *
-         * See the documentation of the PYTHONCOERCECLOCALE setting for more
-         * details.
-         */
-        if (config->coerce_c_locale && !locale_coerced) {
-            locale_coerced = 1;
-            _Py_CoerceLegacyLocale(0);
-            encoding_changed = 1;
-        }
-
-        if (utf8_mode == -1) {
-            if (config->utf8_mode == 1) {
-                /* UTF-8 Mode enabled */
-                encoding_changed = 1;
-            }
-        }
-        else {
-            if (config->utf8_mode != utf8_mode) {
-                encoding_changed = 1;
-            }
-        }
-
-        if (!encoding_changed) {
-            break;
-        }
-
-        /* Reset the configuration before reading again the configuration,
-           just keep UTF-8 Mode value. */
-        int new_utf8_mode = config->utf8_mode;
-        int new_coerce_c_locale = config->coerce_c_locale;
-        if (_PyCoreConfig_Copy(config, &save_config) < 0) {
-            err = _Py_INIT_NO_MEMORY();
-            goto done;
-        }
-        config->utf8_mode = new_utf8_mode;
-        config->coerce_c_locale = new_coerce_c_locale;
-
-        /* The encoding changed: read again the configuration
-           with the new encoding */
-    }
-    err = _Py_INIT_OK();
-
-done:
-    if (init_ctype_locale != NULL) {
-        setlocale(LC_CTYPE, init_ctype_locale);
-    }
-    _PyCoreConfig_Clear(&save_config);
-    Py_UTF8Mode = init_utf8_mode ;
-#ifdef MS_WINDOWS
-    Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
-#endif
-    return err;
-}
diff --git a/Python/preconfig.c b/Python/preconfig.c
index af70f38..3befecf 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -1,6 +1,8 @@
 #include "Python.h"
 #include "pycore_coreconfig.h"
 #include "pycore_getopt.h"
+#include "pycore_pystate.h"   /* _PyRuntime_Initialize() */
+#include <locale.h>       /* setlocale() */
 
 
 #define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -99,6 +101,8 @@
     const _PyArgv *args;
     int argc;
     wchar_t **argv;
+    int nxoption;           /* Number of -X options */
+    wchar_t **xoptions;     /* -X options */
 } _PyPreCmdline;
 
 
@@ -109,6 +113,10 @@
         _Py_wstrlist_clear(cmdline->args->argc, cmdline->argv);
     }
     cmdline->argv = NULL;
+
+    _Py_wstrlist_clear(cmdline->nxoption, cmdline->xoptions);
+    cmdline->nxoption = 0;
+    cmdline->xoptions = NULL;
 }
 
 
@@ -129,6 +137,12 @@
 
     COPY_ATTR(isolated);
     COPY_ATTR(use_environment);
+    COPY_ATTR(coerce_c_locale);
+    COPY_ATTR(coerce_c_locale_warn);
+#ifdef MS_WINDOWS
+    COPY_ATTR(legacy_windows_fs_encoding);
+#endif
+    COPY_ATTR(utf8_mode);
 
 #undef COPY_ATTR
     return 0;
@@ -149,6 +163,10 @@
 
     COPY_FLAG(isolated, Py_IsolatedFlag);
     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
+#ifdef MS_WINDOWS
+    COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
+#endif
+    COPY_FLAG(utf8_mode, Py_UTF8Mode);
 
 #undef COPY_FLAG
 #undef COPY_NOT_FLAG
@@ -169,14 +187,161 @@
 
     COPY_FLAG(isolated, Py_IsolatedFlag);
     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
+#ifdef MS_WINDOWS
+    COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
+#endif
+    COPY_FLAG(utf8_mode, Py_UTF8Mode);
 
 #undef COPY_FLAG
 #undef COPY_NOT_FLAG
 }
 
 
-_PyInitError
-_PyPreConfig_Read(_PyPreConfig *config)
+const char*
+_PyPreConfig_GetEnv(const _PyPreConfig *config, const char *name)
+{
+    assert(config->use_environment >= 0);
+
+    if (!config->use_environment) {
+        return NULL;
+    }
+
+    const char *var = getenv(name);
+    if (var && var[0] != '\0') {
+        return var;
+    }
+    else {
+        return NULL;
+    }
+}
+
+
+int
+_Py_str_to_int(const char *str, int *result)
+{
+    const char *endptr = str;
+    errno = 0;
+    long value = strtol(str, (char **)&endptr, 10);
+    if (*endptr != '\0' || errno == ERANGE) {
+        return -1;
+    }
+    if (value < INT_MIN || value > INT_MAX) {
+        return -1;
+    }
+
+    *result = (int)value;
+    return 0;
+}
+
+
+void
+_Py_get_env_flag(_PyPreConfig *config, int *flag, const char *name)
+{
+    const char *var = _PyPreConfig_GetEnv(config, name);
+    if (!var) {
+        return;
+    }
+    int value;
+    if (_Py_str_to_int(var, &value) < 0 || value < 0) {
+        /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
+        value = 1;
+    }
+    if (*flag < value) {
+        *flag = value;
+    }
+}
+
+
+const wchar_t*
+_Py_get_xoption(int nxoption, wchar_t * const *xoptions, const wchar_t *name)
+{
+    for (int i=0; i < nxoption; i++) {
+        const wchar_t *option = xoptions[i];
+        size_t len;
+        wchar_t *sep = wcschr(option, L'=');
+        if (sep != NULL) {
+            len = (sep - option);
+        }
+        else {
+            len = wcslen(option);
+        }
+        if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
+            return option;
+        }
+    }
+    return NULL;
+}
+
+
+static _PyInitError
+preconfig_init_utf8_mode(_PyPreConfig *config, const _PyPreCmdline *cmdline)
+{
+    const wchar_t *xopt;
+    if (cmdline) {
+        xopt = _Py_get_xoption(cmdline->nxoption, cmdline->xoptions, L"utf8");
+    }
+    else {
+        xopt = NULL;
+    }
+    if (xopt) {
+        wchar_t *sep = wcschr(xopt, L'=');
+        if (sep) {
+            xopt = sep + 1;
+            if (wcscmp(xopt, L"1") == 0) {
+                config->utf8_mode = 1;
+            }
+            else if (wcscmp(xopt, L"0") == 0) {
+                config->utf8_mode = 0;
+            }
+            else {
+                return _Py_INIT_USER_ERR("invalid -X utf8 option value");
+            }
+        }
+        else {
+            config->utf8_mode = 1;
+        }
+        return _Py_INIT_OK();
+    }
+
+    const char *opt = _PyPreConfig_GetEnv(config, "PYTHONUTF8");
+    if (opt) {
+        if (strcmp(opt, "1") == 0) {
+            config->utf8_mode = 1;
+        }
+        else if (strcmp(opt, "0") == 0) {
+            config->utf8_mode = 0;
+        }
+        else {
+            return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment "
+                                     "variable value");
+        }
+        return _Py_INIT_OK();
+    }
+
+    return _Py_INIT_OK();
+}
+
+
+static void
+preconfig_init_locale(_PyPreConfig *config)
+{
+    /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't
+       imply that the C locale is always coerced. It is only coerced if
+       if the LC_CTYPE locale is "C". */
+    if (config->coerce_c_locale != 0) {
+        /* The C locale enables the C locale coercion (PEP 538) */
+        if (_Py_LegacyLocaleDetected()) {
+            config->coerce_c_locale = 1;
+        }
+        else {
+            config->coerce_c_locale = 0;
+        }
+    }
+}
+
+
+static _PyInitError
+preconfig_read(_PyPreConfig *config, const _PyPreCmdline *cmdline)
 {
     _PyPreConfig_GetGlobalConfig(config);
 
@@ -189,6 +354,69 @@
         config->use_environment = 0;
     }
 
+    if (config->use_environment) {
+#ifdef MS_WINDOWS
+        _Py_get_env_flag(config, &config->legacy_windows_fs_encoding,
+                "PYTHONLEGACYWINDOWSFSENCODING");
+#endif
+
+        const char *env = _PyPreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
+        if (env) {
+            if (strcmp(env, "0") == 0) {
+                if (config->coerce_c_locale < 0) {
+                    config->coerce_c_locale = 0;
+                }
+            }
+            else if (strcmp(env, "warn") == 0) {
+                config->coerce_c_locale_warn = 1;
+            }
+            else {
+                if (config->coerce_c_locale < 0) {
+                    config->coerce_c_locale = 1;
+                }
+            }
+        }
+    }
+
+#ifdef MS_WINDOWS
+    if (config->legacy_windows_fs_encoding) {
+        config->utf8_mode = 0;
+    }
+#endif
+
+    if (config->utf8_mode < 0) {
+        _PyInitError err = preconfig_init_utf8_mode(config, cmdline);
+        if (_Py_INIT_FAILED(err)) {
+            return err;
+        }
+    }
+
+    if (config->coerce_c_locale != 0) {
+        preconfig_init_locale(config);
+    }
+
+#ifndef MS_WINDOWS
+    if (config->utf8_mode < 0) {
+        /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+        const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+        if (ctype_loc != NULL
+           && (strcmp(ctype_loc, "C") == 0
+               || strcmp(ctype_loc, "POSIX") == 0))
+        {
+            config->utf8_mode = 1;
+        }
+    }
+#endif
+
+    if (config->coerce_c_locale < 0) {
+        config->coerce_c_locale = 0;
+    }
+    if (config->utf8_mode < 0) {
+        config->utf8_mode = 0;
+    }
+
+    assert(config->coerce_c_locale >= 0);
+    assert(config->utf8_mode >= 0);
     assert(config->isolated >= 0);
     assert(config->use_environment >= 0);
 
@@ -196,6 +424,13 @@
 }
 
 
+_PyInitError
+_PyPreConfig_Read(_PyPreConfig *config)
+{
+    return preconfig_read(config, NULL);
+}
+
+
 int
 _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict)
 {
@@ -216,6 +451,12 @@
 
     SET_ITEM_INT(isolated);
     SET_ITEM_INT(use_environment);
+    SET_ITEM_INT(coerce_c_locale);
+    SET_ITEM_INT(coerce_c_locale_warn);
+    SET_ITEM_INT(utf8_mode);
+#ifdef MS_WINDOWS
+    SET_ITEM_INT(legacy_windows_fs_encoding);
+#endif
     return 0;
 
 fail:
@@ -251,6 +492,18 @@
             config->isolated++;
             break;
 
+        case 'X':
+        {
+            _PyInitError err;
+            err = _Py_wstrlist_append(&cmdline->nxoption,
+                                      &cmdline->xoptions,
+                                      _PyOS_optarg);
+            if (_Py_INIT_FAILED(err)) {
+                return err;
+            }
+            break;
+        }
+
         default:
             /* ignore other argument:
                handled by _PyCoreConfig_ReadFromArgv() */
@@ -262,8 +515,8 @@
 }
 
 
-_PyInitError
-_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
+static _PyInitError
+preconfig_from_argv(_PyPreConfig *config, const _PyArgv *args)
 {
     _PyInitError err;
 
@@ -281,7 +534,7 @@
         goto done;
     }
 
-    err = _PyPreConfig_Read(config);
+    err = preconfig_read(config, &cmdline);
     if (_Py_INIT_FAILED(err)) {
         goto done;
     }
@@ -293,7 +546,144 @@
 }
 
 
+/* Read the preconfiguration. */
+_PyInitError
+_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
+{
+    _PyInitError err;
+
+    err = _PyRuntime_Initialize();
+    if (_Py_INIT_FAILED(err)) {
+        return err;
+    }
+
+    char *init_ctype_locale = NULL;
+    int init_utf8_mode = Py_UTF8Mode;
+#ifdef MS_WINDOWS
+    int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
+#endif
+    _PyPreConfig save_config = _PyPreConfig_INIT;
+    int locale_coerced = 0;
+    int loops = 0;
+
+    /* copy LC_CTYPE locale */
+    const char *loc = setlocale(LC_CTYPE, NULL);
+    if (loc == NULL) {
+        err = _Py_INIT_ERR("failed to LC_CTYPE locale");
+        goto done;
+    }
+    init_ctype_locale = _PyMem_RawStrdup(loc);
+    if (init_ctype_locale == NULL) {
+        err = _Py_INIT_NO_MEMORY();
+        goto done;
+    }
+
+    if (_PyPreConfig_Copy(&save_config, config) < 0) {
+        err = _Py_INIT_NO_MEMORY();
+        goto done;
+    }
+
+    /* Set LC_CTYPE to the user preferred locale */
+    _Py_SetLocaleFromEnv(LC_CTYPE);
+
+    while (1) {
+        int utf8_mode = config->utf8_mode;
+
+        /* Watchdog to prevent an infinite loop */
+        loops++;
+        if (loops == 3) {
+            err = _Py_INIT_ERR("Encoding changed twice while "
+                               "reading the configuration");
+            goto done;
+        }
+
+        /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
+           on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
+        Py_UTF8Mode = config->utf8_mode;
+#ifdef MS_WINDOWS
+        Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
+#endif
+
+        err = preconfig_from_argv(config, args);
+        if (_Py_INIT_FAILED(err)) {
+            goto done;
+        }
+
+        if (locale_coerced) {
+            config->coerce_c_locale = 1;
+        }
+
+        /* The legacy C locale assumes ASCII as the default text encoding, which
+         * causes problems not only for the CPython runtime, but also other
+         * components like GNU readline.
+         *
+         * Accordingly, when the CLI detects it, it attempts to coerce it to a
+         * more capable UTF-8 based alternative.
+         *
+         * See the documentation of the PYTHONCOERCECLOCALE setting for more
+         * details.
+         */
+        int encoding_changed = 0;
+        if (config->coerce_c_locale && !locale_coerced) {
+            locale_coerced = 1;
+            _Py_CoerceLegacyLocale(0);
+            encoding_changed = 1;
+        }
+
+        if (utf8_mode == -1) {
+            if (config->utf8_mode == 1) {
+                /* UTF-8 Mode enabled */
+                encoding_changed = 1;
+            }
+        }
+        else {
+            if (config->utf8_mode != utf8_mode) {
+                encoding_changed = 1;
+            }
+        }
+
+        if (!encoding_changed) {
+            break;
+        }
+
+        /* Reset the configuration before reading again the configuration,
+           just keep UTF-8 Mode value. */
+        int new_utf8_mode = config->utf8_mode;
+        int new_coerce_c_locale = config->coerce_c_locale;
+        if (_PyPreConfig_Copy(config, &save_config) < 0) {
+            err = _Py_INIT_NO_MEMORY();
+            goto done;
+        }
+        config->utf8_mode = new_utf8_mode;
+        config->coerce_c_locale = new_coerce_c_locale;
+
+        /* The encoding changed: read again the configuration
+           with the new encoding */
+    }
+    err = _Py_INIT_OK();
+
+done:
+    if (init_ctype_locale != NULL) {
+        setlocale(LC_CTYPE, init_ctype_locale);
+    }
+    _PyPreConfig_Clear(&save_config);
+    Py_UTF8Mode = init_utf8_mode ;
+#ifdef MS_WINDOWS
+    Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
+#endif
+    return err;
+}
+
+
 void
 _PyPreConfig_Write(const _PyPreConfig *config)
 {
+    _PyPreConfig_SetGlobalConfig(config);
+
+    if (config->coerce_c_locale) {
+        _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
+    }
+
+    /* Set LC_CTYPE to the user preferred locale */
+    _Py_SetLocaleFromEnv(LC_CTYPE);
 }
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 7cf4a6d..dec8904 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -287,7 +287,7 @@
 static void
 _emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
 {
-    if (core_config->coerce_c_locale_warn && _Py_LegacyLocaleDetected()) {
+    if (core_config->preconfig.coerce_c_locale_warn && _Py_LegacyLocaleDetected()) {
         PySys_FormatStderr("%s", _C_LOCALE_WARNING);
     }
 }
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 4b12280..50ba1a7 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2181,7 +2181,7 @@
     SetFlag(config->use_hash_seed == 0 || config->hash_seed != 0);
     SetFlag(config->preconfig.isolated);
     PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode));
-    SetFlag(config->utf8_mode);
+    SetFlag(config->preconfig.utf8_mode);
 #undef SetFlag
 
     if (PyErr_Occurred()) {