Issue #28596: The preferred encoding is UTF-8 on Android.
diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py
index 4bccac1..0c61b0d 100644
--- a/Lib/_bootlocale.py
+++ b/Lib/_bootlocale.py
@@ -14,11 +14,17 @@
try:
_locale.CODESET
except AttributeError:
- def getpreferredencoding(do_setlocale=True):
- # This path for legacy systems needs the more complex
- # getdefaultlocale() function, import the full locale module.
- import locale
- return locale.getpreferredencoding(do_setlocale)
+ if hasattr(sys, 'getandroidapilevel'):
+ # On Android langinfo.h and CODESET are missing, and UTF-8 is
+ # always used in mbstowcs() and wcstombs().
+ def getpreferredencoding(do_setlocale=True):
+ return 'UTF-8'
+ else:
+ def getpreferredencoding(do_setlocale=True):
+ # This path for legacy systems needs the more complex
+ # getdefaultlocale() function, import the full locale module.
+ import locale
+ return locale.getpreferredencoding(do_setlocale)
else:
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
diff --git a/Lib/locale.py b/Lib/locale.py
index 4de0090..f8d1d78 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -618,15 +618,21 @@
try:
CODESET
except NameError:
- # Fall back to parsing environment variables :-(
- def getpreferredencoding(do_setlocale = True):
- """Return the charset that the user is likely using,
- by looking at environment variables."""
- res = getdefaultlocale()[1]
- if res is None:
- # LANG not set, default conservatively to ASCII
- res = 'ascii'
- return res
+ if hasattr(sys, 'getandroidapilevel'):
+ # On Android langinfo.h and CODESET are missing, and UTF-8 is
+ # always used in mbstowcs() and wcstombs().
+ def getpreferredencoding(do_setlocale = True):
+ return 'UTF-8'
+ else:
+ # Fall back to parsing environment variables :-(
+ def getpreferredencoding(do_setlocale = True):
+ """Return the charset that the user is likely using,
+ by looking at environment variables."""
+ res = getdefaultlocale()[1]
+ if res is None:
+ # LANG not set, default conservatively to ASCII
+ res = 'ascii'
+ return res
else:
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
diff --git a/Misc/NEWS b/Misc/NEWS
index 3ef25b6..b2cbad1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,9 @@
- Issue #18896: Python function can now have more than 255 parameters.
collections.namedtuple() now supports tuples with more than 255 elements.
+- Issue #28596: The preferred encoding is UTF-8 on Android. Patch written by
+ Chi Hsuan Yen.
+
- Issue #26919: On Android, operating system data is now always encoded/decoded
to/from UTF-8, instead of the locale encoding to avoid inconsistencies with
os.fsencode() and os.fsdecode() which are already using UTF-8.