Issue #11303: Added shortcuts for utf8 and latin1 encodings.
Documented the list of optimized encodings as CPython implementation
detail.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e7bbd80..48ea0a2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1462,13 +1462,15 @@
char lower[11]; /* Enough for any encoding shortcut */
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ return PyUnicode_DecodeUTF8(s, size, errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
- if (strcmp(lower, "utf-8") == 0)
+ if ((strcmp(lower, "utf-8") == 0) ||
+ (strcmp(lower, "utf8") == 0))
return PyUnicode_DecodeUTF8(s, size, errors);
else if ((strcmp(lower, "latin-1") == 0) ||
+ (strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@@ -1670,15 +1672,19 @@
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
- if (strcmp(lower, "utf-8") == 0)
+ if ((strcmp(lower, "utf-8") == 0) ||
+ (strcmp(lower, "utf8") == 0))
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
else if ((strcmp(lower, "latin-1") == 0) ||
+ (strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),