[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
* bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).
Add tests on sys.stdout.errors with LC_ALL=POSIX.
Fix the error handler of standard streams like sys.stdout:
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
"strict".
(cherry picked from commit 315877dc361d554bec34b4b62c270479ad36a1be)
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 336ae44..27f7590 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -654,10 +654,10 @@
expected = None
self.check_fsencoding(fs_encoding, expected)
- def c_locale_get_error_handler(self, isolated=False, encoding=None):
+ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale
env = os.environ.copy()
- env["LC_ALL"] = "C"
+ env["LC_ALL"] = locale
env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join((
'import sys',
@@ -683,43 +683,49 @@
stdout, stderr = p.communicate()
return stdout
- def test_c_locale_surrogateescape(self):
- out = self.c_locale_get_error_handler(isolated=True)
+ def check_locale_surrogateescape(self, locale):
+ out = self.c_locale_get_error_handler(locale, isolated=True)
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
# replace the default error handler
- out = self.c_locale_get_error_handler(encoding=':ignore')
+ out = self.c_locale_get_error_handler(locale, encoding=':ignore')
self.assertEqual(out,
'stdin: ignore\n'
'stdout: ignore\n'
'stderr: backslashreplace\n')
# force the encoding
- out = self.c_locale_get_error_handler(encoding='iso8859-1')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
# have no any effect
- out = self.c_locale_get_error_handler(encoding=':')
- self.assertEqual(out,
- 'stdin: strict\n'
- 'stdout: strict\n'
- 'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='')
+ out = self.c_locale_get_error_handler(locale, encoding=':')
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
+ out = self.c_locale_get_error_handler(locale, encoding='')
+ self.assertEqual(out,
+ 'stdin: surrogateescape\n'
+ 'stdout: surrogateescape\n'
+ 'stderr: backslashreplace\n')
+
+ def test_c_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('C')
+
+ def test_posix_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('POSIX')
def test_implementation(self):
# This test applies to all implementations equally.
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 4a16b73..554abfa 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -146,9 +146,9 @@
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(),
- ['stdin: UTF-8/namereplace',
- 'stdout: UTF-8/namereplace',
- 'stderr: UTF-8/backslashreplace'])
+ ['stdin: utf-8/namereplace',
+ 'stdout: utf-8/namereplace',
+ 'stderr: utf-8/backslashreplace'])
def test_io(self):
code = textwrap.dedent('''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
new file mode 100644
index 0000000..5ca373a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
@@ -0,0 +1,3 @@
+Fix the error handler of standard streams like sys.stdout:
+PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
+"strict".
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644
index 0000000..893e4f5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index fc4ee06..539d62a 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -423,13 +423,13 @@
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
- /* "surrogateescape" is the default in the legacy C locale */
- if (strcmp(ctype_loc, "C") == 0) {
+ /* surrogateescape is the default in the legacy C and POSIX locales */
+ if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
return "surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
- /* "surrogateescape" is the default in locale coercion target locales */
+ /* surrogateescape is the default in locale coercion target locales */
const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) {
if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -440,7 +440,7 @@
}
/* Otherwise return NULL to request the typical default error handler */
- return NULL;
+ return "strict";
}
#ifdef PY_COERCE_C_LOCALE
@@ -1851,20 +1851,42 @@
if (err) {
*err = '\0';
err++;
- if (*err && !errors) {
- errors = err;
+ if (!err[0]) {
+ err = NULL;
}
}
- if (*pythonioencoding && !encoding) {
- encoding = pythonioencoding;
+
+ /* Does PYTHONIOENCODING contain an encoding? */
+ if (pythonioencoding[0]) {
+ if (!encoding) {
+ encoding = pythonioencoding;
+ }
+
+ /* If the encoding is set but not the error handler,
+ use "strict" error handler by default.
+ PYTHONIOENCODING=latin1 behaves as
+ PYTHONIOENCODING=latin1:strict. */
+ if (!err) {
+ err = "strict";
+ }
+ }
+
+ if (!errors && err != NULL) {
+ errors = err;
}
}
- else if (interp->core_config.utf8_mode) {
- encoding = "utf-8";
- errors = "surrogateescape";
+
+ if (interp->core_config.utf8_mode) {
+ if (!encoding) {
+ encoding = "utf-8";
+ }
+ if (!errors) {
+ errors = "surrogateescape";
+ }
}
- if (!errors && !pythonioencoding) {
+
+ if (!errors) {
/* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler();
}