bpo-41842: Add codecs.unregister() function (GH-22360)

Add codecs.unregister() and PyCodec_Unregister() functions
to unregister a codec search function.
diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst
index 172dcb3..235c77c 100644
--- a/Doc/c-api/codec.rst
+++ b/Doc/c-api/codec.rst
@@ -10,6 +10,14 @@
    As side effect, this tries to load the :mod:`encodings` package, if not yet
    done, to make sure that it is always first in the list of search functions.
 
+.. c:function:: int PyCodec_Unregister(PyObject *search_function)
+
+   Unregister a codec search function and clear the registry's cache.
+   If the search function is not registered, do nothing.
+   Return 0 on success. Raise an exception and return -1 on error.
+
+   .. versionadded:: 3.10
+
 .. c:function:: int PyCodec_KnownEncoding(const char *encoding)
 
    Return ``1`` or ``0`` depending on whether there is a registered codec for
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index f071057..a026513 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -163,11 +163,14 @@
    :class:`CodecInfo` object. In case a search function cannot find
    a given encoding, it should return ``None``.
 
-   .. note::
 
-      Search function registration is not currently reversible,
-      which may cause problems in some cases, such as unit testing or
-      module reloading.
+.. function:: unregister(search_function)
+
+   Unregister a codec search function and clear the registry's cache.
+   If the search function is not registered, do nothing.
+
+   .. versionadded:: 3.10
+
 
 While the builtin :func:`open` and the associated :mod:`io` module are the
 recommended approach for working with encoded text files, this module
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 1228f26..f74dd1a 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -109,6 +109,12 @@
 Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
 Base32 Encoding with Extended Hex Alphabet.
 
+codecs
+------
+
+Add a :func:`codecs.unregister` function to unregister a codec search function.
+(Contributed by Hai Shi in :issue:`41842`.)
+
 curses
 ------
 
@@ -237,6 +243,10 @@
   :class:`datetime.time` objects.
   (Contributed by Zackery Spytz in :issue:`30155`.)
 
+* Add a :c:func:`PyCodec_Unregister` function to unregister a codec
+  search function.
+  (Contributed by Hai Shi in :issue:`41842`.)
+
 Porting to Python 3.10
 ----------------------
 
diff --git a/Include/codecs.h b/Include/codecs.h
index 3ad0f2b..37ecfb4 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -27,6 +27,14 @@
        PyObject *search_function
        );
 
+/* Unregister a codec search function and clear the registry's cache.
+   If the search function is not registered, do nothing.
+   Return 0 on success. Raise an exception and return -1 on error. */
+
+PyAPI_FUNC(int) PyCodec_Unregister(
+       PyObject *search_function
+       );
+
 /* Codec registry lookup API.
 
    Looks up the given encoding and returns a CodecInfo object with
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 3dd5682..ed508f3 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1641,6 +1641,18 @@
         self.assertRaises(TypeError, codecs.register)
         self.assertRaises(TypeError, codecs.register, 42)
 
+    def test_unregister(self):
+        name = "nonexistent_codec_name"
+        search_function = mock.Mock()
+        codecs.register(search_function)
+        self.assertRaises(TypeError, codecs.lookup, name)
+        search_function.assert_called_with(name)
+        search_function.reset_mock()
+
+        codecs.unregister(search_function)
+        self.assertRaises(LookupError, codecs.lookup, name)
+        search_function.assert_not_called()
+
     def test_lookup(self):
         self.assertRaises(TypeError, codecs.lookup)
         self.assertRaises(LookupError, codecs.lookup, "__spam__")
diff --git a/Misc/ACKS b/Misc/ACKS
index 7b74346..85001da 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1575,6 +1575,7 @@
 Charlie Shepherd
 Bruce Sherwood
 Gregory Shevchenko
+Hai Shi
 Alexander Shigin
 Pete Shinners
 Michael Shiplett
diff --git a/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst
new file mode 100644
index 0000000..116d08f
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst
@@ -0,0 +1,2 @@
+Add :c:func:`PyCodec_Unregister` function to unregister a codec search
+function.
diff --git a/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst
new file mode 100644
index 0000000..306b02d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst
@@ -0,0 +1 @@
+Add :func:`codecs.unregister` function to unregister a codec search function.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 9520721..08a3d4a 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -69,6 +69,27 @@
 }
 
 /*[clinic input]
+_codecs.unregister
+    search_function: object
+    /
+
+Unregister a codec search function and clear the registry's cache.
+
+If the search function is not registered, do nothing.
+[clinic start generated code]*/
+
+static PyObject *
+_codecs_unregister(PyObject *module, PyObject *search_function)
+/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
+{
+    if (PyCodec_Unregister(search_function) < 0) {
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+/*[clinic input]
 _codecs.lookup
     encoding: str
     /
@@ -992,6 +1013,7 @@
 
 static PyMethodDef _codecs_functions[] = {
     _CODECS_REGISTER_METHODDEF
+    _CODECS_UNREGISTER_METHODDEF
     _CODECS_LOOKUP_METHODDEF
     _CODECS_ENCODE_METHODDEF
     _CODECS_DECODE_METHODDEF
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index 249065c..e2ebb68 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -15,6 +15,17 @@
 #define _CODECS_REGISTER_METHODDEF    \
     {"register", (PyCFunction)_codecs_register, METH_O, _codecs_register__doc__},
 
+PyDoc_STRVAR(_codecs_unregister__doc__,
+"unregister($module, search_function, /)\n"
+"--\n"
+"\n"
+"Unregister a codec search function and clear the registry\'s cache.\n"
+"\n"
+"If the search function is not registered, do nothing.");
+
+#define _CODECS_UNREGISTER_METHODDEF    \
+    {"unregister", (PyCFunction)_codecs_unregister, METH_O, _codecs_unregister__doc__},
+
 PyDoc_STRVAR(_codecs_lookup__doc__,
 "lookup($module, encoding, /)\n"
 "--\n"
@@ -2827,4 +2838,4 @@
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
     #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=eeead01414be6e42 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=9a97e2ddf3e69072 input=a9049054013a1b77]*/
diff --git a/Python/codecs.c b/Python/codecs.c
index 0f18c27..a8233a7 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -50,6 +50,31 @@
     return -1;
 }
 
+int
+PyCodec_Unregister(PyObject *search_function)
+{
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    PyObject *codec_search_path = interp->codec_search_path;
+    /* Do nothing if codec_search_path is not created yet or was cleared. */
+    if (codec_search_path == NULL) {
+        return 0;
+    }
+
+    assert(PyList_CheckExact(codec_search_path));
+    Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
+    for (Py_ssize_t i = 0; i < n; i++) {
+        PyObject *item = PyList_GET_ITEM(codec_search_path, i);
+        if (item == search_function) {
+            if (interp->codec_search_cache != NULL) {
+                assert(PyDict_CheckExact(interp->codec_search_cache));
+                PyDict_Clear(interp->codec_search_cache);
+            }
+            return PyList_SetSlice(codec_search_path, i, i+1, NULL);
+        }
+    }
+    return 0;
+}
+
 extern int _Py_normalize_encoding(const char *, char *, size_t);
 
 /* Convert a string to a normalized Python string(decoded from UTF-8): all characters are