bpo-42236: Enhance _locale._get_locale_encoding() (GH-23083)

* Rename _Py_GetLocaleEncoding() to _Py_GetLocaleEncodingObject()
* Add _Py_GetLocaleEncoding() which returns a wchar_t* string to
  share code between _Py_GetLocaleEncodingObject()
  and config_get_locale_encoding().
* _Py_GetLocaleEncodingObject() now decodes nl_langinfo(CODESET)
  from the current locale encoding with surrogateescape,
  rather than using UTF-8.
diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h
index ff7bc48..1ab554f 100644
--- a/Include/internal/pycore_fileutils.h
+++ b/Include/internal/pycore_fileutils.h
@@ -50,7 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
 
 PyAPI_FUNC(void) _Py_closerange(int first, int last);
 
-PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void);
+PyAPI_FUNC(wchar_t*) _Py_GetLocaleEncoding(const char **errmsg);
+PyAPI_FUNC(PyObject*) _Py_GetLocaleEncodingObject(void);
 
 #ifdef __cplusplus
 }
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 2078bb3..f08d14e 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1155,7 +1155,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
         }
     }
     if (encoding == NULL && self->encoding == NULL) {
-        self->encoding = _Py_GetLocaleEncoding();
+        self->encoding = _Py_GetLocaleEncodingObject();
         if (self->encoding == NULL) {
             goto error;
         }
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 359deb7..7b3597e 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -783,7 +783,7 @@ static PyObject *
 _locale__get_locale_encoding_impl(PyObject *module)
 /*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
 {
-    return _Py_GetLocaleEncoding();
+    return _Py_GetLocaleEncodingObject();
 }
 
 
diff --git a/Python/fileutils.c b/Python/fileutils.c
index ba26904..72cdee2 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -821,23 +821,41 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
 }
 
 
-// Get the current locale encoding: locale.getpreferredencoding(False).
+// Get the current locale encoding name:
+//
+// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
+// - Return "UTF-8" if the UTF-8 Mode is enabled
+// - On Windows, return the ANSI code page (ex: "cp1250")
+// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string
+//   and if the _Py_FORCE_UTF8_FS_ENCODING macro is defined (ex: on macOS).
+// - Otherwise, return nl_langinfo(CODESET).
+//
+// Return NULL and set errmsg to an error message
+// if nl_langinfo(CODESET) fails.
+//
+// Return NULL and set errmsg to NULL on memory allocation failure.
+//
 // See also config_get_locale_encoding()
-PyObject *
-_Py_GetLocaleEncoding(void)
+wchar_t*
+_Py_GetLocaleEncoding(const char **errmsg)
 {
+    *errmsg = NULL;
 #ifdef _Py_FORCE_UTF8_LOCALE
     // On Android langinfo.h and CODESET are missing,
     // and UTF-8 is always used in mbstowcs() and wcstombs().
-    return PyUnicode_FromString("UTF-8");
+    return _PyMem_RawWcsdup(L"UTF-8");
 #else
     const PyPreConfig *preconfig = &_PyRuntime.preconfig;
     if (preconfig->utf8_mode) {
-        return PyUnicode_FromString("UTF-8");
+        return _PyMem_RawWcsdup(L"UTF-8");
     }
 
-#if defined(MS_WINDOWS)
-    return PyUnicode_FromFormat("cp%u", GetACP());
+#ifdef MS_WINDOWS
+    wchar_t encoding[23];
+    unsigned int ansi_codepage = GetACP();
+    swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
+    encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
+    return _PyMem_RawWcsdup(encoding);
 #else
     const char *encoding = nl_langinfo(CODESET);
     if (!encoding || encoding[0] == '\0') {
@@ -845,19 +863,45 @@ _Py_GetLocaleEncoding(void)
         // nl_langinfo() can return an empty string when the LC_CTYPE locale is
         // not supported. Default to UTF-8 in that case, because UTF-8 is the
         // default charset on macOS.
-        encoding = "UTF-8";
+        return _PyMem_RawWcsdup(L"UTF-8");
 #else
-        PyErr_SetString(PyExc_ValueError,
-                        "failed to get the locale encoding: "
-                        "nl_langinfo(CODESET) returns an empty string");
+        *errmsg = "failed to get the locale encoding: "
+                  "nl_langinfo(CODESET) returns an empty string";
         return NULL;
 #endif
     }
-    // Decode from UTF-8
-    return PyUnicode_FromString(encoding);
-#endif  // !CODESET
 
-#endif
+    wchar_t *wstr;
+    int res = decode_current_locale(encoding, &wstr, NULL,
+                                    errmsg, _Py_ERROR_SURROGATEESCAPE);
+    if (res < 0) {
+        return NULL;
+    }
+    return wstr;
+#endif  // !MS_WINDOWS
+
+#endif  // !_Py_FORCE_UTF8_LOCALE
+}
+
+
+PyObject *
+_Py_GetLocaleEncodingObject(void)
+{
+    const char *errmsg;
+    wchar_t *encoding = _Py_GetLocaleEncoding(&errmsg);
+    if (encoding == NULL) {
+        if (errmsg != NULL) {
+            PyErr_SetString(PyExc_ValueError, errmsg);
+        }
+        else {
+            PyErr_NoMemory();
+        }
+        return NULL;
+    }
+
+    PyObject *str = PyUnicode_FromWideChar(encoding, -1);
+    PyMem_RawFree(encoding);
+    return str;
 }
 
 
diff --git a/Python/initconfig.c b/Python/initconfig.c
index e129278..56f4297 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -11,11 +11,7 @@
 
 #include "osdefs.h"               // DELIM
 #include <locale.h>               // setlocale()
-#ifdef HAVE_LANGINFO_H
-#  include <langinfo.h>           // nl_langinfo(CODESET)
-#endif
 #if defined(MS_WINDOWS) || defined(__CYGWIN__)
-#  include <windows.h>            // GetACP()
 #  ifdef HAVE_IO_H
 #    include <io.h>
 #  endif
@@ -1497,41 +1493,24 @@ config_get_stdio_errors(const PyPreConfig *preconfig)
 }
 
 
-// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
+// See also config_get_fs_encoding()
 static PyStatus
 config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
                            wchar_t **locale_encoding)
 {
-#ifdef _Py_FORCE_UTF8_LOCALE
-    return PyConfig_SetString(config, locale_encoding, L"utf-8");
-#else
-    if (preconfig->utf8_mode) {
-        return PyConfig_SetString(config, locale_encoding, L"utf-8");
+    const char *errmsg;
+    wchar_t *encoding = _Py_GetLocaleEncoding(&errmsg);
+    if (encoding == NULL) {
+        if (errmsg != NULL) {
+            return _PyStatus_ERR(errmsg);
+        }
+        else {
+            return _PyStatus_NO_MEMORY();
+        }
     }
-
-#ifdef MS_WINDOWS
-    char encoding[20];
-    PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
-    return PyConfig_SetBytesString(config, locale_encoding, encoding);
-#else
-    const char *encoding = nl_langinfo(CODESET);
-    if (!encoding || encoding[0] == '\0') {
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
-        // nl_langinfo() can return an empty string when the LC_CTYPE locale is
-        // not supported. Default to UTF-8 in that case, because UTF-8 is the
-        // default charset on macOS.
-        encoding = "UTF-8";
-#else
-        return _PyStatus_ERR("failed to get the locale encoding: "
-                             "nl_langinfo(CODESET) returns an empty string");
-#endif
-    }
-    /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
-    return CONFIG_SET_BYTES_STR(config,
-                                locale_encoding, encoding,
-                                "nl_langinfo(CODESET)");
-#endif  // !MS_WINDOWS
-#endif  // !_Py_FORCE_UTF8_LOCALE
+    PyStatus status = PyConfig_SetString(config, locale_encoding, encoding);
+    PyMem_RawFree(encoding);
+    return status;
 }