bpo-29240: PEP 540: Add a new UTF-8 Mode (#855)
* Add -X utf8 command line option, PYTHONUTF8 environment variable
and a new sys.flags.utf8_mode flag.
* If the LC_CTYPE locale is "C" at startup: enable automatically the
UTF-8 mode.
* Add _winapi.GetACP(). encodings._alias_mbcs() now calls
_winapi.GetACP() to get the ANSI code page
* locale.getpreferredencoding() now returns 'UTF-8' in the UTF-8
mode. As a side effect, open() now uses the UTF-8 encoding by
default in this mode.
* Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding
in the UTF-8 Mode.
* Update subprocess._args_from_interpreter_flags() to handle -X utf8
* Skip some tests relying on the current locale if the UTF-8 mode is
enabled.
* Add test_utf8mode.py.
* _Py_DecodeUTF8_surrogateescape() gets a new optional parameter to
return also the length (number of wide characters).
* pymain_get_global_config() and pymain_set_global_config() now
always copy flag values, rather than only copying if the new value
is greater than the old value.
diff --git a/Lib/locale.py b/Lib/locale.py
index f1d157d..18079e7 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -617,6 +617,8 @@
# On Win32, this will return the ANSI code page
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
import _bootlocale
return _bootlocale.getpreferredencoding(False)
else:
@@ -634,6 +636,8 @@
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
by looking at environment variables."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
res = getdefaultlocale()[1]
if res is None:
# LANG not set, default conservatively to ASCII
@@ -643,6 +647,8 @@
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
according to the system configuration."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
import _bootlocale
if do_setlocale:
oldloc = setlocale(LC_CTYPE)