bpo-29240: readline now ignores the UTF-8 Mode (#5145)
Add new fuctions ignoring the UTF-8 mode:
* _Py_DecodeCurrentLocale()
* _Py_EncodeCurrentLocale()
* _PyUnicode_DecodeCurrentLocaleAndSize()
* _PyUnicode_EncodeCurrentLocale()
Modify the readline module to use these functions.
Re-enable test_readline.test_nonascii().
diff --git a/Include/fileutils.h b/Include/fileutils.h
index d027e18..2527d84 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -24,6 +24,14 @@
const char *s,
Py_ssize_t size,
size_t *p_wlen);
+
+PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
+ const char *arg,
+ size_t *size);
+
+PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
+ const wchar_t *text,
+ size_t *error_pos);
#endif
#ifndef Py_LIMITED_API
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0274de6..576e7ad 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1810,6 +1810,16 @@
PyObject *unicode,
const char *errors
);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
+ const char *str,
+ Py_ssize_t len,
+ const char *errors);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
+ PyObject *unicode,
+ const char *errors
+ );
#endif
/* --- File system encoding ---------------------------------------------- */
diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py
index 28ea38b7..b4c25de 100644
--- a/Lib/test/test_readline.py
+++ b/Lib/test/test_readline.py
@@ -152,8 +152,6 @@
output = run_pty(self.auto_history_script.format(False))
self.assertIn(b"History length: 0\r\n", output)
- @unittest.skipIf(True,
- "FIXME: test broken by bpo-29240")
def test_nonascii(self):
try:
readline.add_history("\xEB\xEF")
diff --git a/Modules/readline.c b/Modules/readline.c
index 811fca8..8db4cfd 100644
--- a/Modules/readline.c
+++ b/Modules/readline.c
@@ -132,13 +132,14 @@
static PyObject *
encode(PyObject *b)
{
- return PyUnicode_EncodeLocale(b, "surrogateescape");
+ return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
}
static PyObject *
decode(const char *s)
{
- return PyUnicode_DecodeLocale(s, "surrogateescape");
+ return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s),
+ "surrogateescape");
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 92a6ad6..1a230e0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3395,8 +3395,8 @@
}
}
-PyObject *
-PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
+static PyObject *
+unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
{
Py_ssize_t wlen, wlen2;
wchar_t *wstr;
@@ -3423,7 +3423,12 @@
/* "surrogateescape" error handler */
char *str;
- str = Py_EncodeLocale(wstr, &error_pos);
+ if (current_locale) {
+ str = _Py_EncodeCurrentLocale(wstr, &error_pos);
+ }
+ else {
+ str = Py_EncodeLocale(wstr, &error_pos);
+ }
if (str == NULL) {
if (error_pos == (size_t)-1) {
PyErr_NoMemory();
@@ -3437,7 +3442,12 @@
PyMem_Free(wstr);
bytes = PyBytes_FromString(str);
- PyMem_Free(str);
+ if (current_locale) {
+ PyMem_RawFree(str);
+ }
+ else {
+ PyMem_Free(str);
+ }
}
else {
/* strict mode */
@@ -3503,6 +3513,18 @@
}
PyObject *
+PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
+{
+ return unicode_encode_locale(unicode, errors, 0);
+}
+
+PyObject *
+_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
+{
+ return unicode_encode_locale(unicode, errors, 1);
+}
+
+PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
#if defined(__APPLE__)
@@ -3524,7 +3546,8 @@
Py_FileSystemDefaultEncodeErrors);
}
else {
- return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
+ return unicode_encode_locale(unicode,
+ Py_FileSystemDefaultEncodeErrors, 0);
}
#endif
}
@@ -3695,9 +3718,9 @@
return 0;
}
-PyObject*
-PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
- const char *errors)
+static PyObject*
+unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
+ int current_locale)
{
wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
@@ -3719,7 +3742,12 @@
if (surrogateescape) {
/* "surrogateescape" error handler */
- wstr = Py_DecodeLocale(str, &wlen);
+ if (current_locale) {
+ wstr = _Py_DecodeCurrentLocale(str, &wlen);
+ }
+ else {
+ wstr = Py_DecodeLocale(str, &wlen);
+ }
if (wstr == NULL) {
if (wlen == (size_t)-1)
PyErr_NoMemory();
@@ -3795,10 +3823,24 @@
}
PyObject*
+PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
+ const char *errors)
+{
+ return unicode_decode_locale(str, len, errors, 0);
+}
+
+PyObject*
+_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
+ const char *errors)
+{
+ return unicode_decode_locale(str, len, errors, 1);
+}
+
+PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
- return PyUnicode_DecodeLocaleAndSize(str, size, errors);
+ return unicode_decode_locale(str, size, errors, 0);
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 645a179..9275494 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -263,7 +263,7 @@
#if !defined(__APPLE__) && !defined(__ANDROID__)
static wchar_t*
-decode_locale(const char* arg, size_t *size)
+decode_current_locale(const char* arg, size_t *size)
{
wchar_t *res;
size_t argsize;
@@ -380,6 +380,38 @@
#endif
+static wchar_t*
+decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
+{
+#if defined(__APPLE__) || defined(__ANDROID__)
+ return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+#else
+ if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
+ return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+ }
+
+#ifndef MS_WINDOWS
+ if (force_ascii == -1)
+ force_ascii = check_force_ascii();
+
+ if (force_ascii) {
+ /* force ASCII encoding to workaround mbstowcs() issue */
+ wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
+ if (wstr == NULL) {
+ if (size != NULL) {
+ *size = (size_t)-1;
+ }
+ return NULL;
+ }
+ return wstr;
+ }
+#endif
+
+ return decode_current_locale(arg, size);
+#endif /* __APPLE__ or __ANDROID__ */
+}
+
+
/* Decode a byte string from the locale encoding with the
surrogateescape error handler: undecodable bytes are decoded as characters
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
@@ -402,32 +434,15 @@
wchar_t*
Py_DecodeLocale(const char* arg, size_t *size)
{
-#if defined(__APPLE__) || defined(__ANDROID__)
- return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
-#else
- if (Py_UTF8Mode == 1) {
- return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
- }
+ return decode_locale(arg, size, 0);
+}
-#ifndef MS_WINDOWS
- if (force_ascii == -1)
- force_ascii = check_force_ascii();
- if (force_ascii) {
- /* force ASCII encoding to workaround mbstowcs() issue */
- wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
- if (wstr == NULL) {
- if (size != NULL) {
- *size = (size_t)-1;
- }
- return NULL;
- }
- return wstr;
- }
-#endif
-
- return decode_locale(arg, size);
-#endif /* __APPLE__ or __ANDROID__ */
+/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
+wchar_t*
+_Py_DecodeCurrentLocale(const char* arg, size_t *size)
+{
+ return decode_locale(arg, size, 1);
}
@@ -508,12 +523,13 @@
#endif
static char*
-encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
+encode_locale(const wchar_t *text, size_t *error_pos,
+ int raw_malloc, int ignore_utf8_mode)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
#else /* __APPLE__ */
- if (Py_UTF8Mode == 1) {
+ if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
}
@@ -544,7 +560,7 @@
char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
- return encode_locale(text, error_pos, 0);
+ return encode_locale(text, error_pos, 0, 0);
}
@@ -553,7 +569,15 @@
char*
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
{
- return encode_locale(text, error_pos, 1);
+ return encode_locale(text, error_pos, 1, 0);
+}
+
+
+/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
+char*
+_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
+{
+ return encode_locale(text, error_pos, 1, 1);
}