Add PyInterpreterState.fs_codec.utf8 (GH-18367)

Add a fast-path for UTF-8 encoding in PyUnicode_EncodeFSDefault()
and PyUnicode_DecodeFSDefaultAndSize().

Add _PyUnicode_FiniEncodings() helper function for _PyUnicode_Fini().
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5f10437..7c8bc06 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3615,39 +3615,32 @@
 PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
-    if (interp->fs_codec.encoding) {
+    if (interp->fs_codec.utf8) {
         return unicode_encode_utf8(unicode,
                                    interp->fs_codec.error_handler,
                                    interp->fs_codec.errors);
     }
-    else {
-        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
-        _Py_error_handler errors;
-        errors = get_error_handler_wide(filesystem_errors);
-        assert(errors != _Py_ERROR_UNKNOWN);
-        return unicode_encode_utf8(unicode, errors, NULL);
-    }
-#else
-    /* Bootstrap check: if the filesystem codec is implemented in Python, we
-       cannot use it to encode and decode filenames before it is loaded. Load
-       the Python codec requires to encode at least its own filename. Use the C
-       implementation of the locale codec until the codec registry is
-       initialized and the Python codec is loaded.
-       See _PyUnicode_InitEncodings(). */
-    if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+    else if (interp->fs_codec.encoding) {
         return PyUnicode_AsEncodedString(unicode,
                                          interp->fs_codec.encoding,
                                          interp->fs_codec.errors);
     }
-    else {
-        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
-        _Py_error_handler errors;
-        errors = get_error_handler_wide(filesystem_errors);
-        assert(errors != _Py_ERROR_UNKNOWN);
-        return unicode_encode_locale(unicode, errors, 0);
-    }
 #endif
+    else {
+        /* Before _PyUnicode_InitEncodings() is called, the Python codec
+           machinery is not ready and so cannot be used:
+           use wcstombs() in this case. */
+        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
+        assert(filesystem_errors != NULL);
+        _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        return unicode_encode_utf8(unicode, errors, NULL);
+#else
+        return unicode_encode_locale(unicode, errors, 0);
+#endif
+    }
 }
 
 PyObject *
@@ -3857,39 +3850,33 @@
 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
-    if (interp->fs_codec.encoding) {
+    if (interp->fs_codec.utf8) {
         return unicode_decode_utf8(s, size,
                                    interp->fs_codec.error_handler,
                                    interp->fs_codec.errors,
                                    NULL);
     }
-    else {
-        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
-        _Py_error_handler errors;
-        errors = get_error_handler_wide(filesystem_errors);
-        assert(errors != _Py_ERROR_UNKNOWN);
-        return unicode_decode_utf8(s, size, errors, NULL, NULL);
-    }
-#else
-    /* Bootstrap check: if the filesystem codec is implemented in Python, we
-       cannot use it to encode and decode filenames before it is loaded. Load
-       the Python codec requires to encode at least its own filename. Use the C
-       implementation of the locale codec until the codec registry is
-       initialized and the Python codec is loaded.
-       See _PyUnicode_InitEncodings(). */
-    if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+    else if (interp->fs_codec.encoding) {
         return PyUnicode_Decode(s, size,
                                 interp->fs_codec.encoding,
                                 interp->fs_codec.errors);
     }
-    else {
-        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
-        _Py_error_handler errors;
-        errors = get_error_handler_wide(filesystem_errors);
-        return unicode_decode_locale(s, size, errors, 0);
-    }
 #endif
+    else {
+        /* Before _PyUnicode_InitEncodings() is called, the Python codec
+           machinery is not ready and so cannot be used:
+           use mbstowcs() in this case. */
+        const wchar_t *filesystem_errors = interp->config.filesystem_errors;
+        assert(filesystem_errors != NULL);
+        _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        return unicode_decode_utf8(s, size, errors, NULL, NULL);
+#else
+        return unicode_decode_locale(s, size, errors, 0);
+#endif
+    }
 }
 
 
@@ -15849,10 +15836,16 @@
 
     PyMem_RawFree(interp->fs_codec.encoding);
     interp->fs_codec.encoding = encoding;
+    /* encoding has been normalized by init_fs_encoding() */
+    interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
     PyMem_RawFree(interp->fs_codec.errors);
     interp->fs_codec.errors = errors;
     interp->fs_codec.error_handler = error_handler;
 
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+    assert(interp->fs_codec.utf8 == 1);
+#endif
+
     /* At this point, PyUnicode_EncodeFSDefault() and
        PyUnicode_DecodeFSDefault() can now use the Python codec rather than
        the C implementation of the filesystem encoding. */
@@ -15902,6 +15895,19 @@
 }
 
 
+static void
+_PyUnicode_FiniEncodings(PyThreadState *tstate)
+{
+    PyInterpreterState *interp = tstate->interp;
+    PyMem_RawFree(interp->fs_codec.encoding);
+    interp->fs_codec.encoding = NULL;
+    interp->fs_codec.utf8 = 0;
+    PyMem_RawFree(interp->fs_codec.errors);
+    interp->fs_codec.errors = NULL;
+    interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
+}
+
+
 #ifdef MS_WINDOWS
 int
 _PyUnicode_EnableLegacyWindowsFSEncoding(void)
@@ -15954,12 +15960,7 @@
         _PyUnicode_ClearStaticStrings();
     }
 
-    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-    PyMem_RawFree(interp->fs_codec.encoding);
-    interp->fs_codec.encoding = NULL;
-    PyMem_RawFree(interp->fs_codec.errors);
-    interp->fs_codec.errors = NULL;
-    interp->config.filesystem_errors = (wchar_t *)_Py_ERROR_UNKNOWN;
+    _PyUnicode_FiniEncodings(tstate);
 }