bpo-38644: Pass tstate to Py_EnterRecursiveCall() (GH-16997)

* Add _Py_EnterRecursiveCall() and _Py_LeaveRecursiveCall() which
  require a tstate argument.
* Pass tstate to _Py_MakeRecCheck() and  _Py_CheckRecursiveCall().
* Convert Py_EnterRecursiveCall() and Py_LeaveRecursiveCall() macros
  to static inline functions.

_PyThreadState_GET() is the most efficient way to get the tstate, and
so using it with _Py_EnterRecursiveCall() and
_Py_LeaveRecursiveCall() should be a little bit more efficient than
using Py_EnterRecursiveCall() and Py_LeaveRecursiveCall() which use
the "slower" PyThreadState_GET().
diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h
index 61bbc4f..1e2c457 100644
--- a/Include/cpython/ceval.h
+++ b/Include/cpython/ceval.h
@@ -11,20 +11,31 @@
 #ifdef USE_STACKCHECK
 /* With USE_STACKCHECK macro defined, trigger stack checks in
    _Py_CheckRecursiveCall() on every 64th call to Py_EnterRecursiveCall. */
-#  define _Py_MakeRecCheck(x)  \
-    (++(x) > _Py_CheckRecursionLimit || \
-     ++(PyThreadState_GET()->stackcheck_counter) > 64)
+static inline int _Py_MakeRecCheck(PyThreadState *tstate)  {
+    return (++tstate->recursion_depth > _Py_CheckRecursionLimit
+            || ++tstate->stackcheck_counter > 64);
+}
 #else
-#  define _Py_MakeRecCheck(x)  (++(x) > _Py_CheckRecursionLimit)
+static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
+    return (++tstate->recursion_depth > _Py_CheckRecursionLimit);
+}
 #endif
 
-PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where);
+PyAPI_FUNC(int) _Py_CheckRecursiveCall(
+    PyThreadState *tstate,
+    const char *where);
 
-#define _Py_EnterRecursiveCall_macro(where)  \
-            (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) &&  \
-             _Py_CheckRecursiveCall(where))
+static inline int _Py_EnterRecursiveCall(PyThreadState *tstate,
+                                         const char *where) {
+    return (_Py_MakeRecCheck(tstate) && _Py_CheckRecursiveCall(tstate, where));
+}
 
-#define Py_EnterRecursiveCall(where) _Py_EnterRecursiveCall_macro(where)
+static inline int _Py_EnterRecursiveCall_inline(const char *where) {
+    PyThreadState *tstate = PyThreadState_GET();
+    return _Py_EnterRecursiveCall(tstate, where);
+}
+
+#define Py_EnterRecursiveCall(where) _Py_EnterRecursiveCall_inline(where)
 
 
 /* Compute the "lower-water mark" for a recursion limit. When
@@ -38,12 +49,18 @@
 #define _Py_MakeEndRecCheck(x) \
     (--(x) < _Py_RecursionLimitLowerWaterMark(_Py_CheckRecursionLimit))
 
-#define _Py_LeaveRecursiveCall_macro()                         \
-    do{ if(_Py_MakeEndRecCheck(PyThreadState_GET()->recursion_depth))  \
-      PyThreadState_GET()->overflowed = 0;  \
-    } while(0)
+static inline void _Py_LeaveRecursiveCall(PyThreadState *tstate)  {
+    if (_Py_MakeEndRecCheck(tstate->recursion_depth)) {
+        tstate->overflowed = 0;
+    }
+}
 
-#define Py_LeaveRecursiveCall() _Py_LeaveRecursiveCall_macro()
+static inline void _Py_LeaveRecursiveCall_inline(void)  {
+    PyThreadState *tstate = PyThreadState_GET();
+    _Py_LeaveRecursiveCall(tstate);
+}
+
+#define Py_LeaveRecursiveCall() _Py_LeaveRecursiveCall_inline()
 
 #ifdef __cplusplus
 }