Remove CALL_PROFILE special build

Issue #28799:

* Remove the PyEval_GetCallStats() function.
* Deprecate the untested and undocumented sys.callstats() function.
* Remove the CALL_PROFILE special build

Use the sys.setprofile() function, cProfile or profile module to profile
function calls.
diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 2965bc9..7d9eefb 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -1147,46 +1147,6 @@
    :c:func:`PyEval_SetProfile`, except the tracing function does receive line-number
    events.
 
-.. c:function:: PyObject* PyEval_GetCallStats(PyObject *self)
-
-   Return a tuple of function call counts.  There are constants defined for the
-   positions within the tuple:
-
-   +-------------------------------+-------+
-   | Name                          | Value |
-   +===============================+=======+
-   | :const:`PCALL_ALL`            | 0     |
-   +-------------------------------+-------+
-   | :const:`PCALL_FUNCTION`       | 1     |
-   +-------------------------------+-------+
-   | :const:`PCALL_FAST_FUNCTION`  | 2     |
-   +-------------------------------+-------+
-   | :const:`PCALL_FASTER_FUNCTION`| 3     |
-   +-------------------------------+-------+
-   | :const:`PCALL_METHOD`         | 4     |
-   +-------------------------------+-------+
-   | :const:`PCALL_BOUND_METHOD`   | 5     |
-   +-------------------------------+-------+
-   | :const:`PCALL_CFUNCTION`      | 6     |
-   +-------------------------------+-------+
-   | :const:`PCALL_TYPE`           | 7     |
-   +-------------------------------+-------+
-   | :const:`PCALL_GENERATOR`      | 8     |
-   +-------------------------------+-------+
-   | :const:`PCALL_OTHER`          | 9     |
-   +-------------------------------+-------+
-   | :const:`PCALL_POP`            | 10    |
-   +-------------------------------+-------+
-
-   :const:`PCALL_FAST_FUNCTION` means no argument tuple needs to be created.
-   :const:`PCALL_FASTER_FUNCTION` means that the fast-path frame setup code is used.
-
-   If there is a method call where the call can be optimized by changing
-   the argument tuple and calling the function directly, it gets recorded
-   twice.
-
-   This function is only present if Python is compiled with :const:`CALL_PROFILE`
-   defined.
 
 .. _advanced-debugging:
 
diff --git a/Include/ceval.h b/Include/ceval.h
index e721718..4222969 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -120,7 +120,6 @@
 PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *);
 PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *);
 
-PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *);
 PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *);
 PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc);
 #ifndef Py_LIMITED_API
diff --git a/Misc/NEWS b/Misc/NEWS
index 56f2aed..37072fe 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@
 Core and Builtins
 -----------------
 
+- Issue #28799: Remove the ``PyEval_GetCallStats()`` function and deprecate
+  the untested and undocumented ``sys.callstats()`` function. Remove the
+  ``CALL_PROFILE`` special build: use the :func:`sys.setprofile` function,
+  :mod:`cProfile` or :mod:`profile` to profile function calls.
+
 - Issue #12844: More than 255 arguments can now be passed to a function.
 
 - Issue #28782: Fix a bug in the implementation ``yield from`` when checking
diff --git a/Python/ceval.c b/Python/ceval.c
index ca9914c..569c609 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -83,63 +83,6 @@
 #endif
 #endif
 
-/* Function call profile */
-#ifdef CALL_PROFILE
-#define PCALL_NUM 11
-static int pcall[PCALL_NUM];
-
-#define PCALL_ALL 0
-#define PCALL_FUNCTION 1
-#define PCALL_FAST_FUNCTION 2
-#define PCALL_FASTER_FUNCTION 3
-#define PCALL_METHOD 4
-#define PCALL_BOUND_METHOD 5
-#define PCALL_CFUNCTION 6
-#define PCALL_TYPE 7
-#define PCALL_GENERATOR 8
-#define PCALL_OTHER 9
-#define PCALL_POP 10
-
-/* Notes about the statistics
-
-   PCALL_FAST stats
-
-   FAST_FUNCTION means no argument tuple needs to be created.
-   FASTER_FUNCTION means that the fast-path frame setup code is used.
-
-   If there is a method call where the call can be optimized by changing
-   the argument tuple and calling the function directly, it gets recorded
-   twice.
-
-   As a result, the relationship among the statistics appears to be
-   PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
-                PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
-   PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION
-   PCALL_METHOD > PCALL_BOUND_METHOD
-*/
-
-#define PCALL(POS) pcall[POS]++
-
-PyObject *
-PyEval_GetCallStats(PyObject *self)
-{
-    return Py_BuildValue("iiiiiiiiiii",
-                         pcall[0], pcall[1], pcall[2], pcall[3],
-                         pcall[4], pcall[5], pcall[6], pcall[7],
-                         pcall[8], pcall[9], pcall[10]);
-}
-#else
-#define PCALL(O)
-
-PyObject *
-PyEval_GetCallStats(PyObject *self)
-{
-    Py_INCREF(Py_None);
-    return Py_None;
-}
-#endif
-
-
 #ifdef WITH_THREAD
 #define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request)
 #else
@@ -3278,7 +3221,6 @@
         PREDICTED(CALL_FUNCTION);
         TARGET(CALL_FUNCTION) {
             PyObject **sp, *res;
-            PCALL(PCALL_ALL);
             sp = stack_pointer;
             res = call_function(&sp, oparg, NULL);
             stack_pointer = sp;
@@ -3294,7 +3236,6 @@
 
             names = POP();
             assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
-            PCALL(PCALL_ALL);
             sp = stack_pointer;
             res = call_function(&sp, oparg, names);
             stack_pointer = sp;
@@ -3309,7 +3250,6 @@
 
         TARGET(CALL_FUNCTION_EX) {
             PyObject *func, *callargs, *kwargs = NULL, *result;
-            PCALL(PCALL_ALL);
             if (oparg & 0x01) {
                 kwargs = POP();
                 if (!PyDict_CheckExact(kwargs)) {
@@ -4099,8 +4039,6 @@
          * when the generator is resumed. */
         Py_CLEAR(f->f_back);
 
-        PCALL(PCALL_GENERATOR);
-
         /* Create a new generator that owns the ready to run frame
          * and return that as the value. */
         if (is_coro) {
@@ -4793,8 +4731,6 @@
     if (PyCFunction_Check(func)) {
         PyThreadState *tstate = PyThreadState_GET();
 
-        PCALL(PCALL_CFUNCTION);
-
         stack = (*pp_stack) - nargs - nkwargs;
         C_TRACE(x, _PyCFunction_FastCallKeywords(func, stack, nargs, kwnames));
     }
@@ -4802,8 +4738,6 @@
         if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
             /* optimize access to bound methods */
             PyObject *self = PyMethod_GET_SELF(func);
-            PCALL(PCALL_METHOD);
-            PCALL(PCALL_BOUND_METHOD);
             Py_INCREF(self);
             func = PyMethod_GET_FUNCTION(func);
             Py_INCREF(func);
@@ -4835,7 +4769,6 @@
     while ((*pp_stack) > pfunc) {
         w = EXT_POP(*pp_stack);
         Py_DECREF(w);
-        PCALL(PCALL_POP);
     }
 
     return x;
@@ -4860,7 +4793,6 @@
     Py_ssize_t i;
     PyObject *result;
 
-    PCALL(PCALL_FASTER_FUNCTION);
     assert(globals != NULL);
     /* XXX Perhaps we should create a specialized
        PyFrame_New() that doesn't take locals, but does
@@ -4906,9 +4838,6 @@
     /* kwnames must only contains str strings, no subclass, and all keys must
        be unique */
 
-    PCALL(PCALL_FUNCTION);
-    PCALL(PCALL_FAST_FUNCTION);
-
     if (co->co_kwonlyargcount == 0 && nkwargs == 0 &&
         co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
     {
@@ -4971,9 +4900,6 @@
     assert(nargs == 0 || args != NULL);
     assert(kwargs == NULL || PyDict_Check(kwargs));
 
-    PCALL(PCALL_FUNCTION);
-    PCALL(PCALL_FAST_FUNCTION);
-
     if (co->co_kwonlyargcount == 0 &&
         (kwargs == NULL || PyDict_Size(kwargs) == 0) &&
         co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
@@ -5041,23 +4967,6 @@
 static PyObject *
 do_call_core(PyObject *func, PyObject *callargs, PyObject *kwdict)
 {
-#ifdef CALL_PROFILE
-    /* At this point, we have to look at the type of func to
-       update the call stats properly.  Do it here so as to avoid
-       exposing the call stats machinery outside ceval.c
-    */
-    if (PyFunction_Check(func))
-        PCALL(PCALL_FUNCTION);
-    else if (PyMethod_Check(func))
-        PCALL(PCALL_METHOD);
-    else if (PyType_Check(func))
-        PCALL(PCALL_TYPE);
-    else if (PyCFunction_Check(func))
-        PCALL(PCALL_CFUNCTION);
-    else
-        PCALL(PCALL_OTHER);
-#endif
-
     if (PyCFunction_Check(func)) {
         PyObject *result;
         PyThreadState *tstate = PyThreadState_GET();
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index db5a48f..7906830 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1287,6 +1287,19 @@
 10. Number of stack pops performed by call_function()"
 );
 
+static PyObject *
+sys_callstats(PyObject *self)
+{
+    if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                      "sys.callstats() has been deprecated in Python 3.7 "
+                      "and will be removed in the future", 1) < 0) {
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -1352,7 +1365,7 @@
 
 static PyMethodDef sys_methods[] = {
     /* Might as well keep this in alphabetic order */
-    {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS,
+    {"callstats", (PyCFunction)sys_callstats, METH_NOARGS,
      callstats_doc},
     {"_clear_type_cache",       sys_clear_type_cache,     METH_NOARGS,
      sys_clear_type_cache__doc__},