bpo-41842: Add codecs.unregister() function (GH-22360)
Add codecs.unregister() and PyCodec_Unregister() functions
to unregister a codec search function.
diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst
index 172dcb3..235c77c 100644
--- a/Doc/c-api/codec.rst
+++ b/Doc/c-api/codec.rst
@@ -10,6 +10,14 @@
As side effect, this tries to load the :mod:`encodings` package, if not yet
done, to make sure that it is always first in the list of search functions.
+.. c:function:: int PyCodec_Unregister(PyObject *search_function)
+
+ Unregister a codec search function and clear the registry's cache.
+ If the search function is not registered, do nothing.
+ Return 0 on success. Raise an exception and return -1 on error.
+
+ .. versionadded:: 3.10
+
.. c:function:: int PyCodec_KnownEncoding(const char *encoding)
Return ``1`` or ``0`` depending on whether there is a registered codec for
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index f071057..a026513 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -163,11 +163,14 @@
:class:`CodecInfo` object. In case a search function cannot find
a given encoding, it should return ``None``.
- .. note::
- Search function registration is not currently reversible,
- which may cause problems in some cases, such as unit testing or
- module reloading.
+.. function:: unregister(search_function)
+
+ Unregister a codec search function and clear the registry's cache.
+ If the search function is not registered, do nothing.
+
+ .. versionadded:: 3.10
+
While the builtin :func:`open` and the associated :mod:`io` module are the
recommended approach for working with encoded text files, this module
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 1228f26..f74dd1a 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -109,6 +109,12 @@
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
Base32 Encoding with Extended Hex Alphabet.
+codecs
+------
+
+Add a :func:`codecs.unregister` function to unregister a codec search function.
+(Contributed by Hai Shi in :issue:`41842`.)
+
curses
------
@@ -237,6 +243,10 @@
:class:`datetime.time` objects.
(Contributed by Zackery Spytz in :issue:`30155`.)
+* Add a :c:func:`PyCodec_Unregister` function to unregister a codec
+ search function.
+ (Contributed by Hai Shi in :issue:`41842`.)
+
Porting to Python 3.10
----------------------
diff --git a/Include/codecs.h b/Include/codecs.h
index 3ad0f2b..37ecfb4 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -27,6 +27,14 @@
PyObject *search_function
);
+/* Unregister a codec search function and clear the registry's cache.
+ If the search function is not registered, do nothing.
+ Return 0 on success. Raise an exception and return -1 on error. */
+
+PyAPI_FUNC(int) PyCodec_Unregister(
+ PyObject *search_function
+ );
+
/* Codec registry lookup API.
Looks up the given encoding and returns a CodecInfo object with
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 3dd5682..ed508f3 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1641,6 +1641,18 @@
self.assertRaises(TypeError, codecs.register)
self.assertRaises(TypeError, codecs.register, 42)
+ def test_unregister(self):
+ name = "nonexistent_codec_name"
+ search_function = mock.Mock()
+ codecs.register(search_function)
+ self.assertRaises(TypeError, codecs.lookup, name)
+ search_function.assert_called_with(name)
+ search_function.reset_mock()
+
+ codecs.unregister(search_function)
+ self.assertRaises(LookupError, codecs.lookup, name)
+ search_function.assert_not_called()
+
def test_lookup(self):
self.assertRaises(TypeError, codecs.lookup)
self.assertRaises(LookupError, codecs.lookup, "__spam__")
diff --git a/Misc/ACKS b/Misc/ACKS
index 7b74346..85001da 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1575,6 +1575,7 @@
Charlie Shepherd
Bruce Sherwood
Gregory Shevchenko
+Hai Shi
Alexander Shigin
Pete Shinners
Michael Shiplett
diff --git a/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst
new file mode 100644
index 0000000..116d08f
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst
@@ -0,0 +1,2 @@
+Add :c:func:`PyCodec_Unregister` function to unregister a codec search
+function.
diff --git a/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst
new file mode 100644
index 0000000..306b02d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst
@@ -0,0 +1 @@
+Add :func:`codecs.unregister` function to unregister a codec search function.
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index 9520721..08a3d4a 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -69,6 +69,27 @@
}
/*[clinic input]
+_codecs.unregister
+ search_function: object
+ /
+
+Unregister a codec search function and clear the registry's cache.
+
+If the search function is not registered, do nothing.
+[clinic start generated code]*/
+
+static PyObject *
+_codecs_unregister(PyObject *module, PyObject *search_function)
+/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
+{
+ if (PyCodec_Unregister(search_function) < 0) {
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
_codecs.lookup
encoding: str
/
@@ -992,6 +1013,7 @@
static PyMethodDef _codecs_functions[] = {
_CODECS_REGISTER_METHODDEF
+ _CODECS_UNREGISTER_METHODDEF
_CODECS_LOOKUP_METHODDEF
_CODECS_ENCODE_METHODDEF
_CODECS_DECODE_METHODDEF
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
index 249065c..e2ebb68 100644
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -15,6 +15,17 @@
#define _CODECS_REGISTER_METHODDEF \
{"register", (PyCFunction)_codecs_register, METH_O, _codecs_register__doc__},
+PyDoc_STRVAR(_codecs_unregister__doc__,
+"unregister($module, search_function, /)\n"
+"--\n"
+"\n"
+"Unregister a codec search function and clear the registry\'s cache.\n"
+"\n"
+"If the search function is not registered, do nothing.");
+
+#define _CODECS_UNREGISTER_METHODDEF \
+ {"unregister", (PyCFunction)_codecs_unregister, METH_O, _codecs_unregister__doc__},
+
PyDoc_STRVAR(_codecs_lookup__doc__,
"lookup($module, encoding, /)\n"
"--\n"
@@ -2827,4 +2838,4 @@
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=eeead01414be6e42 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=9a97e2ddf3e69072 input=a9049054013a1b77]*/
diff --git a/Python/codecs.c b/Python/codecs.c
index 0f18c27..a8233a7 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -50,6 +50,31 @@
return -1;
}
+int
+PyCodec_Unregister(PyObject *search_function)
+{
+ PyInterpreterState *interp = PyInterpreterState_Get();
+ PyObject *codec_search_path = interp->codec_search_path;
+ /* Do nothing if codec_search_path is not created yet or was cleared. */
+ if (codec_search_path == NULL) {
+ return 0;
+ }
+
+ assert(PyList_CheckExact(codec_search_path));
+ Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
+ for (Py_ssize_t i = 0; i < n; i++) {
+ PyObject *item = PyList_GET_ITEM(codec_search_path, i);
+ if (item == search_function) {
+ if (interp->codec_search_cache != NULL) {
+ assert(PyDict_CheckExact(interp->codec_search_cache));
+ PyDict_Clear(interp->codec_search_cache);
+ }
+ return PyList_SetSlice(codec_search_path, i, i+1, NULL);
+ }
+ }
+ return 0;
+}
+
extern int _Py_normalize_encoding(const char *, char *, size_t);
/* Convert a string to a normalized Python string(decoded from UTF-8): all characters are