bpo-29782: Consolidate _Py_Bit_Length() (GH-20739)

In GH-2866, _Py_Bit_Length() was added to pymath.h for lack of a better
location. GH-20518 added a more appropriate header file for bit utilities. It
also shows how to properly use intrinsics. This allows reconsidering bpo-29782.

* Move the function to the new header.
* Changed return type to match __builtin_clzl() and reviewed usage.
* Use intrinsics where available.
* Pick a fallback implementation suitable for inlining.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 6d5af59..7970e2f 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -102,6 +102,45 @@
 }
 
 
+static int
+check_bit_length(unsigned long x, int expected)
+{
+    // Use volatile to prevent the compiler to optimize out the whole test
+    volatile unsigned long u = x;
+    int len = _Py_bit_length(u);
+    if (len != expected) {
+        PyErr_Format(PyExc_AssertionError,
+                     "_Py_bit_length(%lu) returns %i, expected %i",
+                     x, len, expected);
+        return -1;
+    }
+    return 0;
+}
+
+
+static PyObject*
+test_bit_length(PyObject *self, PyObject *Py_UNUSED(args))
+{
+#define CHECK(X, RESULT) \
+    do { \
+        if (check_bit_length(X, RESULT) < 0) { \
+            return NULL; \
+        } \
+    } while (0)
+
+    CHECK(0, 0);
+    CHECK(1, 1);
+    CHECK(0x1000, 13);
+    CHECK(0x1234, 13);
+    CHECK(0x54321, 19);
+    CHECK(0x7FFFFFFF, 31);
+    CHECK(0xFFFFFFFF, 32);
+    Py_RETURN_NONE;
+
+#undef CHECK
+}
+
+
 #define TO_PTR(ch) ((void*)(uintptr_t)ch)
 #define FROM_PTR(ptr) ((uintptr_t)ptr)
 #define VALUE(key) (1 + ((int)(key) - 'a'))
@@ -197,6 +236,7 @@
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
     {"test_bswap", test_bswap, METH_NOARGS},
     {"test_popcount", test_popcount, METH_NOARGS},
+    {"test_bit_length", test_bit_length, METH_NOARGS},
     {"test_hashtable", test_hashtable, METH_NOARGS},
     {NULL, NULL} /* sentinel */
 };
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index cb05ce7..4450ce1 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -53,6 +53,7 @@
  */
 
 #include "Python.h"
+#include "pycore_bitutils.h"      // _Py_bit_length()
 #include "pycore_dtoa.h"
 #include "_math.h"