bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)

_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.

Changes:

* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
  structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
  functions.
* Add error_handler parameter to unicode_encode_locale()
  and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
  to _PyCoreConfig_DecodeLocale().
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index c40c1f8..15643be 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -523,27 +523,7 @@
 
 /* Copy str into *config_str (duplicate the string) */
 _PyInitError
-_PyCoreConfig_SetString(char **config_str, const char *str)
-{
-    char *str2;
-    if (str != NULL) {
-        str2 = _PyMem_RawStrdup(str);
-        if (str2 == NULL) {
-            return _Py_INIT_NO_MEMORY();
-        }
-    }
-    else {
-        str2 = NULL;
-    }
-    PyMem_RawFree(*config_str);
-    *config_str = str2;
-    return _Py_INIT_OK();
-}
-
-
-/* Copy str into *config_str (duplicate the string) */
-_PyInitError
-_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
+_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
 {
     wchar_t *str2;
     if (str != NULL) {
@@ -563,8 +543,8 @@
 
 /* Decode str using Py_DecodeLocale() and set the result into *config_str */
 static _PyInitError
-_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
-                                         const char *decode_err_msg)
+_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
+                              const char *decode_err_msg)
 {
     wchar_t *str2;
     if (str != NULL) {
@@ -588,17 +568,15 @@
 }
 
 
-_PyInitError
-_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
-{
-    return _PyCoreConfig_SetWideStringFromStringErr(
-                config_str, str, "cannot decode string");
-}
-
-
 #define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
-    _PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
-                                             "cannot decode " NAME)
+    _PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
+
+
+_PyInitError
+_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
+{
+    return CONFIG_DECODE_LOCALE(config_str, str, "string");
+}
 
 
 _PyInitError
@@ -608,16 +586,9 @@
     _PyCoreConfig_Clear(config);
 
 #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
-#define COPY_STR_ATTR(ATTR) \
-    do { \
-        err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
-        if (_Py_INIT_FAILED(err)) { \
-            return err; \
-        } \
-    } while (0)
 #define COPY_WSTR_ATTR(ATTR) \
     do { \
-        err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
+        err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
         if (_Py_INIT_FAILED(err)) { \
             return err; \
         } \
@@ -676,10 +647,10 @@
     COPY_ATTR(quiet);
     COPY_ATTR(user_site_directory);
     COPY_ATTR(buffered_stdio);
-    COPY_STR_ATTR(filesystem_encoding);
-    COPY_STR_ATTR(filesystem_errors);
-    COPY_STR_ATTR(stdio_encoding);
-    COPY_STR_ATTR(stdio_errors);
+    COPY_WSTR_ATTR(filesystem_encoding);
+    COPY_WSTR_ATTR(filesystem_errors);
+    COPY_WSTR_ATTR(stdio_encoding);
+    COPY_WSTR_ATTR(stdio_errors);
 #ifdef MS_WINDOWS
     COPY_ATTR(legacy_windows_stdio);
 #endif
@@ -692,7 +663,6 @@
     COPY_ATTR(_init_main);
 
 #undef COPY_ATTR
-#undef COPY_STR_ATTR
 #undef COPY_WSTR_ATTR
 #undef COPY_WSTRLIST
     return _Py_INIT_OK();
@@ -721,16 +691,10 @@
                 goto fail; \
             } \
         } while (0)
-#define FROM_STRING(STR) \
-    ((STR != NULL) ? \
-        PyUnicode_FromString(STR) \
-        : (Py_INCREF(Py_None), Py_None))
 #define SET_ITEM_INT(ATTR) \
     SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
 #define SET_ITEM_UINT(ATTR) \
     SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
-#define SET_ITEM_STR(ATTR) \
-    SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
 #define FROM_WSTRING(STR) \
     ((STR != NULL) ? \
         PyUnicode_FromWideChar(STR, -1) \
@@ -753,8 +717,8 @@
     SET_ITEM_INT(show_alloc_count);
     SET_ITEM_INT(dump_refs);
     SET_ITEM_INT(malloc_stats);
-    SET_ITEM_STR(filesystem_encoding);
-    SET_ITEM_STR(filesystem_errors);
+    SET_ITEM_WSTR(filesystem_encoding);
+    SET_ITEM_WSTR(filesystem_errors);
     SET_ITEM_WSTR(pycache_prefix);
     SET_ITEM_WSTR(program_name);
     SET_ITEM_WSTRLIST(argv);
@@ -783,8 +747,8 @@
     SET_ITEM_INT(quiet);
     SET_ITEM_INT(user_site_directory);
     SET_ITEM_INT(buffered_stdio);
-    SET_ITEM_STR(stdio_encoding);
-    SET_ITEM_STR(stdio_errors);
+    SET_ITEM_WSTR(stdio_encoding);
+    SET_ITEM_WSTR(stdio_errors);
 #ifdef MS_WINDOWS
     SET_ITEM_INT(legacy_windows_stdio);
 #endif
@@ -803,12 +767,10 @@
     Py_DECREF(dict);
     return NULL;
 
-#undef FROM_STRING
 #undef FROM_WSTRING
 #undef SET_ITEM
 #undef SET_ITEM_INT
 #undef SET_ITEM_UINT
-#undef SET_ITEM_STR
 #undef SET_ITEM_WSTR
 #undef SET_ITEM_WSTRLIST
 }
@@ -845,7 +807,7 @@
         return _Py_INIT_OK();
     }
 
-    return _PyCoreConfig_SetWideString(dest, var);
+    return _PyCoreConfig_SetString(dest, var);
 #else
     const char *var = getenv(name);
     if (!var || var[0] == '\0') {
@@ -853,7 +815,7 @@
         return _Py_INIT_OK();
     }
 
-    return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
+    return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
 #endif
 }
 
@@ -996,8 +958,7 @@
 
     /* Use argv[0] by default, if available */
     if (config->program != NULL) {
-        err = _PyCoreConfig_SetWideString(&config->program_name,
-                                          config->program);
+        err = _PyCoreConfig_SetString(&config->program_name, config->program);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1010,7 +971,7 @@
 #else
     const wchar_t *default_program_name = L"python3";
 #endif
-    err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
+    err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
     if (_Py_INIT_FAILED(err)) {
         return err;
     }
@@ -1025,8 +986,8 @@
     /* If Py_SetProgramFullPath() was called, use its value */
     const wchar_t *program_full_path = _Py_path_config.program_full_path;
     if (program_full_path != NULL) {
-        _PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
-                                                       program_full_path);
+        _PyInitError err = _PyCoreConfig_SetString(&config->executable,
+                                                   program_full_path);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1051,7 +1012,7 @@
     /* If Py_SetPythonHome() was called, use its value */
     wchar_t *home = _Py_path_config.home;
     if (home) {
-        _PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
+        _PyInitError err = _PyCoreConfig_SetString(&config->home, home);
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1280,7 +1241,7 @@
 }
 
 
-static const char *
+static const wchar_t *
 config_get_stdio_errors(const _PyCoreConfig *config)
 {
 #ifndef MS_WINDOWS
@@ -1288,43 +1249,44 @@
     if (loc != NULL) {
         /* surrogateescape is the default in the legacy C and POSIX locales */
         if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
-            return "surrogateescape";
+            return L"surrogateescape";
         }
 
 #ifdef PY_COERCE_C_LOCALE
         /* surrogateescape is the default in locale coercion target locales */
         if (_Py_IsLocaleCoercionTarget(loc)) {
-            return "surrogateescape";
+            return L"surrogateescape";
         }
 #endif
     }
 
-    return "strict";
+    return L"strict";
 #else
     /* On Windows, always use surrogateescape by default */
-    return "surrogateescape";
+    return L"surrogateescape";
 #endif
 }
 
 
 static _PyInitError
-config_get_locale_encoding(char **locale_encoding)
+config_get_locale_encoding(wchar_t **locale_encoding)
 {
 #ifdef MS_WINDOWS
     char encoding[20];
     PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
+    return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
 #elif defined(_Py_FORCE_UTF8_LOCALE)
-    const char *encoding = "UTF-8";
+    return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
 #else
     const char *encoding = nl_langinfo(CODESET);
     if (!encoding || encoding[0] == '\0') {
         return _Py_INIT_ERR("failed to get the locale encoding: "
                             "nl_langinfo(CODESET) failed");
     }
+    /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
+    return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
+                                "nl_langinfo(CODESET)");
 #endif
-
-    assert(*locale_encoding == NULL);
-    return _PyCoreConfig_SetString(locale_encoding, encoding);
 }
 
 
@@ -1337,16 +1299,18 @@
     /* If Py_SetStandardStreamEncoding() have been called, use these
         parameters. */
     if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
-        err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                      _Py_StandardStreamEncoding);
+        err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+                                   _Py_StandardStreamEncoding,
+                                   "_Py_StandardStreamEncoding");
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
     }
 
     if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
-        err = _PyCoreConfig_SetString(&config->stdio_errors,
-                                      _Py_StandardStreamErrors);
+        err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+                                   _Py_StandardStreamErrors,
+                                   "_Py_StandardStreamErrors");
         if (_Py_INIT_FAILED(err)) {
             return err;
         }
@@ -1359,11 +1323,9 @@
     /* PYTHONIOENCODING environment variable */
     const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
     if (opt) {
-        /* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
-        char *pythonioencoding = NULL;
-        err = _PyCoreConfig_SetString(&pythonioencoding, opt);
-        if (_Py_INIT_FAILED(err)) {
-            return err;
+        char *pythonioencoding = _PyMem_RawStrdup(opt);
+        if (pythonioencoding == NULL) {
+            return _Py_INIT_NO_MEMORY();
         }
 
         char *errors = strchr(pythonioencoding, ':');
@@ -1378,8 +1340,9 @@
         /* Does PYTHONIOENCODING contain an encoding? */
         if (pythonioencoding[0]) {
             if (config->stdio_encoding == NULL) {
-                err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                              pythonioencoding);
+                err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
+                                           pythonioencoding,
+                                           "PYTHONIOENCODING environment variable");
                 if (_Py_INIT_FAILED(err)) {
                     PyMem_RawFree(pythonioencoding);
                     return err;
@@ -1396,7 +1359,9 @@
         }
 
         if (config->stdio_errors == NULL && errors != NULL) {
-            err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
+            err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
+                                       errors,
+                                       "PYTHONIOENCODING environment variable");
             if (_Py_INIT_FAILED(err)) {
                 PyMem_RawFree(pythonioencoding);
                 return err;
@@ -1409,15 +1374,14 @@
     /* UTF-8 Mode uses UTF-8/surrogateescape */
     if (preconfig->utf8_mode) {
         if (config->stdio_encoding == NULL) {
-            err = _PyCoreConfig_SetString(&config->stdio_encoding,
-                                          "utf-8");
+            err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
             if (_Py_INIT_FAILED(err)) {
                 return err;
             }
         }
         if (config->stdio_errors == NULL) {
             err = _PyCoreConfig_SetString(&config->stdio_errors,
-                                          "surrogateescape");
+                                          L"surrogateescape");
             if (_Py_INIT_FAILED(err)) {
                 return err;
             }
@@ -1432,7 +1396,7 @@
         }
     }
     if (config->stdio_errors == NULL) {
-        const char *errors = config_get_stdio_errors(config);
+        const wchar_t *errors = config_get_stdio_errors(config);
         assert(errors != NULL);
 
         err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
@@ -1452,33 +1416,32 @@
 
     if (config->filesystem_encoding == NULL) {
 #ifdef _Py_FORCE_UTF8_FS_ENCODING
-        err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                      "utf-8");
+        err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
 #else
 
 #ifdef MS_WINDOWS
         if (preconfig->legacy_windows_fs_encoding) {
             /* Legacy Windows filesystem encoding: mbcs/replace */
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "mbcs");
+                                          L"mbcs");
         }
         else
 #endif
         if (preconfig->utf8_mode) {
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "utf-8");
+                                          L"utf-8");
         }
 #ifndef MS_WINDOWS
         else if (_Py_GetForceASCII()) {
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "ascii");
+                                          L"ascii");
         }
 #endif
         else {
 #ifdef MS_WINDOWS
             /* Windows defaults to utf-8/surrogatepass (PEP 529). */
             err = _PyCoreConfig_SetString(&config->filesystem_encoding,
-                                          "utf-8");
+                                          L"utf-8");
 #else
             err = config_get_locale_encoding(&config->filesystem_encoding);
 #endif
@@ -1491,16 +1454,16 @@
     }
 
     if (config->filesystem_errors == NULL) {
-        const char *errors;
+        const wchar_t *errors;
 #ifdef MS_WINDOWS
         if (preconfig->legacy_windows_fs_encoding) {
-            errors = "replace";
+            errors = L"replace";
         }
         else {
-            errors = "surrogatepass";
+            errors = L"surrogatepass";
         }
 #else
-        errors = "surrogateescape";
+        errors = L"surrogateescape";
 #endif
         err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
         if (_Py_INIT_FAILED(err)) {
@@ -1745,8 +1708,8 @@
                 || wcscmp(_PyOS_optarg, L"never") == 0
                 || wcscmp(_PyOS_optarg, L"default") == 0)
             {
-                err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
-                                                  _PyOS_optarg);
+                err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
+                                              _PyOS_optarg);
                 if (_Py_INIT_FAILED(err)) {
                     return err;
                 }
@@ -2119,7 +2082,7 @@
     }
 
     if (config->check_hash_pycs_mode == NULL) {
-        err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
+        err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
         if (_Py_INIT_FAILED(err)) {
             goto done;
         }
diff --git a/Python/preconfig.c b/Python/preconfig.c
index 108cbc6..48b9e83 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -14,7 +14,10 @@
 /* --- File system encoding/errors -------------------------------- */
 
 /* The filesystem encoding is chosen by config_init_fs_encoding(),
-   see also initfsencoding(). */
+   see also initfsencoding().
+
+   Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+   are encoded to UTF-8. */
 const char *Py_FileSystemDefaultEncoding = NULL;
 int Py_HasFileSystemDefaultEncoding = 0;
 const char *Py_FileSystemDefaultEncodeErrors = NULL;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 01ef027..2a633cf 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1668,7 +1668,7 @@
 static PyObject*
 create_stdio(const _PyCoreConfig *config, PyObject* io,
     int fd, int write_mode, const char* name,
-    const char* encoding, const char* errors)
+    const wchar_t* encoding, const wchar_t* errors)
 {
     PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
     const char* mode;
@@ -1718,7 +1718,7 @@
 #ifdef MS_WINDOWS
     /* Windows console IO is always UTF-8 encoded */
     if (PyWindowsConsoleIO_Check(raw))
-        encoding = "utf-8";
+        encoding = L"utf-8";
 #endif
 
     text = PyUnicode_FromString(name);
@@ -1754,10 +1754,25 @@
     newline = "\n";
 #endif
 
-    stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
-                                    buf, encoding, errors,
+    PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
+    if (encoding_str == NULL) {
+        Py_CLEAR(buf);
+        goto error;
+    }
+
+    PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
+    if (errors_str == NULL) {
+        Py_CLEAR(buf);
+        Py_CLEAR(encoding_str);
+        goto error;
+    }
+
+    stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
+                                    buf, encoding_str, errors_str,
                                     newline, line_buffering, write_through);
     Py_CLEAR(buf);
+    Py_CLEAR(encoding_str);
+    Py_CLEAR(errors_str);
     if (stream == NULL)
         goto error;
 
@@ -1874,7 +1889,7 @@
     fd = fileno(stderr);
     std = create_stdio(config, iomod, fd, 1, "<stderr>",
                        config->stdio_encoding,
-                       "backslashreplace");
+                       L"backslashreplace");
     if (std == NULL)
         goto error;
 
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 0f7af2c..fbdeb9b 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -424,7 +424,7 @@
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     const _PyCoreConfig *config = &interp->core_config;
-    return PyUnicode_FromString(config->filesystem_encoding);
+    return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
 }
 
 /*[clinic input]
@@ -439,7 +439,7 @@
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     const _PyCoreConfig *config = &interp->core_config;
-    return PyUnicode_FromString(config->filesystem_errors);
+    return PyUnicode_FromWideChar(config->filesystem_errors, -1);
 }
 
 /*[clinic input]
@@ -1211,30 +1211,9 @@
 sys__enablelegacywindowsfsencoding_impl(PyObject *module)
 /*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-    _PyCoreConfig *config = &interp->core_config;
-
-    /* Set the filesystem encoding to mbcs/replace (PEP 529) */
-    char *encoding = _PyMem_RawStrdup("mbcs");
-    char *errors = _PyMem_RawStrdup("replace");
-    if (encoding == NULL || errors == NULL) {
-        PyMem_Free(encoding);
-        PyMem_Free(errors);
-        PyErr_NoMemory();
+    if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
         return NULL;
     }
-
-    PyMem_RawFree(config->filesystem_encoding);
-    config->filesystem_encoding = encoding;
-    PyMem_RawFree(config->filesystem_errors);
-    config->filesystem_errors = errors;
-
-    if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
-                                  config->filesystem_errors) < 0) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-
     Py_RETURN_NONE;
 }