bpo-36775: Add _Py_FORCE_UTF8_FS_ENCODING macro (GH-13056)
Add _Py_FORCE_UTF8_LOCALE and _Py_FORCE_UTF8_FS_ENCODING macros to
avoid factorize "#if defined(__ANDROID__) || defined(__VXWORKS__)"
and "#if defined(__APPLE__)".
Cleanup also config_init_fs_encoding().
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index 1cb4b52..c40c1f8 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -1313,7 +1313,7 @@
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
-#elif defined(__ANDROID__) || defined(__VXWORKS__)
+#elif defined(_Py_FORCE_UTF8_LOCALE)
const char *encoding = "UTF-8";
#else
const char *encoding = nl_langinfo(CODESET);
@@ -1450,66 +1450,40 @@
{
_PyInitError err;
-#ifdef MS_WINDOWS
- if (preconfig->legacy_windows_fs_encoding) {
- /* Legacy Windows filesystem encoding: mbcs/replace */
- if (config->filesystem_encoding == NULL) {
- err = _PyCoreConfig_SetString(&config->filesystem_encoding,
- "mbcs");
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- }
- if (config->filesystem_errors == NULL) {
- err = _PyCoreConfig_SetString(&config->filesystem_errors,
- "replace");
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- }
- }
-
- /* Windows defaults to utf-8/surrogatepass (PEP 529).
-
- Note: UTF-8 Mode takes the same code path and the Legacy Windows FS
- encoding has the priortiy over UTF-8 Mode. */
if (config->filesystem_encoding == NULL) {
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- }
-
- if (config->filesystem_errors == NULL) {
- err = _PyCoreConfig_SetString(&config->filesystem_errors,
- "surrogatepass");
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- }
#else
- if (config->filesystem_encoding == NULL) {
+
+#ifdef MS_WINDOWS
+ if (preconfig->legacy_windows_fs_encoding) {
+ /* Legacy Windows filesystem encoding: mbcs/replace */
+ err = _PyCoreConfig_SetString(&config->filesystem_encoding,
+ "mbcs");
+ }
+ else
+#endif
if (preconfig->utf8_mode) {
- /* UTF-8 Mode use: utf-8/surrogateescape */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
- /* errors defaults to surrogateescape above */
}
+#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"ascii");
}
+#endif
else {
- /* macOS and Android use UTF-8,
- other platforms use the locale encoding. */
-#if defined(__APPLE__) || defined(__ANDROID__)
+#ifdef MS_WINDOWS
+ /* Windows defaults to utf-8/surrogatepass (PEP 529). */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
#else
err = config_get_locale_encoding(&config->filesystem_encoding);
#endif
}
+#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
if (_Py_INIT_FAILED(err)) {
return err;
@@ -1517,14 +1491,22 @@
}
if (config->filesystem_errors == NULL) {
- /* by default, use the "surrogateescape" error handler */
- err = _PyCoreConfig_SetString(&config->filesystem_errors,
- "surrogateescape");
+ const char *errors;
+#ifdef MS_WINDOWS
+ if (preconfig->legacy_windows_fs_encoding) {
+ errors = "replace";
+ }
+ else {
+ errors = "surrogatepass";
+ }
+#else
+ errors = "surrogateescape";
+#endif
+ err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
-#endif
return _Py_INIT_OK();
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index b933874..dfad48e 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -85,7 +85,7 @@
Py_RETURN_NONE;
}
-#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
+#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
#define USE_FORCE_ASCII
@@ -309,7 +309,7 @@
{
/* nothing to do */
}
-#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
+#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
@@ -536,7 +536,7 @@
int current_locale, _Py_error_handler errors)
{
if (current_locale) {
-#if defined(__ANDROID__) || defined(__VXWORKS__)
+#ifdef _Py_FORCE_UTF8_LOCALE
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors);
#else
@@ -544,7 +544,7 @@
#endif
}
-#if defined(__APPLE__) || defined(__ANDROID__) || defined(__VXWORKS__)
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors);
#else
@@ -569,7 +569,7 @@
#endif
return decode_current_locale(arg, wstr, wlen, reason, errors);
-#endif /* __APPLE__ or __ANDROID__ or __VXWORKS__ */
+#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
}
@@ -727,7 +727,7 @@
int raw_malloc, int current_locale, _Py_error_handler errors)
{
if (current_locale) {
-#ifdef __ANDROID__
+#ifdef _Py_FORCE_UTF8_LOCALE
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors);
#else
@@ -736,7 +736,7 @@
#endif
}
-#if defined(__APPLE__) || defined(__ANDROID__)
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors);
#else
@@ -762,7 +762,7 @@
return encode_current_locale(text, str, error_pos, reason,
raw_malloc, errors);
-#endif /* __APPLE__ or __ANDROID__ */
+#endif /* _Py_FORCE_UTF8_FS_ENCODING */
}
static char*
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index afa683b..40eeebd 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -366,8 +366,7 @@
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
-#if !defined(__APPLE__) && !defined(__ANDROID__) && \
-defined(HAVE_LANGINFO_H) && defined(CODESET)
+#if !defined(_Py_FORCE_UTF8_LOCALE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {