Issue #29049: Call _PyObject_GC_TRACK() lazily when calling Python function.

Calling function is up to 5% faster.
diff --git a/Include/frameobject.h b/Include/frameobject.h
index 00c5093..616c611 100644
--- a/Include/frameobject.h
+++ b/Include/frameobject.h
@@ -60,7 +60,11 @@
 #define PyFrame_Check(op) (Py_TYPE(op) == &PyFrame_Type)
 
 PyAPI_FUNC(PyFrameObject *) PyFrame_New(PyThreadState *, PyCodeObject *,
-                                       PyObject *, PyObject *);
+                                        PyObject *, PyObject *);
+
+/* only internal use */
+PyFrameObject* _PyFrame_New_NoTrack(PyThreadState *, PyCodeObject *,
+                                    PyObject *, PyObject *);
 
 
 /* The rest of the interface is specific for frame objects */
diff --git a/Misc/NEWS b/Misc/NEWS
index 31ff5e3..76a8411 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #29049: Call _PyObject_GC_TRACK() lazily when calling Python function.
+  Calling function is up to 5% faster.
+
 - Issue #28927: bytes.fromhex() and bytearray.fromhex() now ignore all ASCII
   whitespace, not only spaces.  Patch by Robert Xiao.
 
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index eed5384..8448319 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -415,7 +415,9 @@
     PyObject **p, **valuestack;
     PyCodeObject *co;
 
-    PyObject_GC_UnTrack(f);
+    if (_PyObject_GC_IS_TRACKED(f))
+        _PyObject_GC_UNTRACK(f);
+
     Py_TRASHCAN_SAFE_BEGIN(f)
     /* Kill all local variables */
     valuestack = f->f_valuestack;
@@ -606,8 +608,8 @@
 }
 
 PyFrameObject* _Py_HOT_FUNCTION
-PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
-            PyObject *locals)
+_PyFrame_New_NoTrack(PyThreadState *tstate, PyCodeObject *code,
+                     PyObject *globals, PyObject *locals)
 {
     PyFrameObject *back = tstate->frame;
     PyFrameObject *f;
@@ -727,10 +729,20 @@
     f->f_executing = 0;
     f->f_gen = NULL;
 
-    _PyObject_GC_TRACK(f);
     return f;
 }
 
+PyFrameObject*
+PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
+            PyObject *globals, PyObject *locals)
+{
+    PyFrameObject *f = _PyFrame_New_NoTrack(tstate, code, globals, locals);
+    if (f)
+        _PyObject_GC_TRACK(f);
+    return f;
+}
+
+
 /* Block management */
 
 void
diff --git a/Python/ceval.c b/Python/ceval.c
index f7ee041..e48586d 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3931,7 +3931,7 @@
     /* Create the frame */
     tstate = PyThreadState_GET();
     assert(tstate != NULL);
-    f = PyFrame_New(tstate, co, globals, locals);
+    f = _PyFrame_New_NoTrack(tstate, co, globals, locals);
     if (f == NULL) {
         return NULL;
     }
@@ -4176,9 +4176,15 @@
        so recursion_depth must be boosted for the duration.
     */
     assert(tstate != NULL);
-    ++tstate->recursion_depth;
-    Py_DECREF(f);
-    --tstate->recursion_depth;
+    if (Py_REFCNT(f) > 1) {
+        Py_DECREF(f);
+        _PyObject_GC_TRACK(f);
+    }
+    else {
+        ++tstate->recursion_depth;
+        Py_DECREF(f);
+        --tstate->recursion_depth;
+    }
     return retval;
 }
 
@@ -4904,11 +4910,11 @@
 
     assert(globals != NULL);
     /* XXX Perhaps we should create a specialized
-       PyFrame_New() that doesn't take locals, but does
+       _PyFrame_New_NoTrack() that doesn't take locals, but does
        take builtins without sanity checking them.
        */
     assert(tstate != NULL);
-    f = PyFrame_New(tstate, co, globals, NULL);
+    f = _PyFrame_New_NoTrack(tstate, co, globals, NULL);
     if (f == NULL) {
         return NULL;
     }
@@ -4921,10 +4927,15 @@
     }
     result = PyEval_EvalFrameEx(f,0);
 
-    ++tstate->recursion_depth;
-    Py_DECREF(f);
-    --tstate->recursion_depth;
-
+    if (Py_REFCNT(f) > 1) {
+        Py_DECREF(f);
+        _PyObject_GC_TRACK(f);
+    }
+    else {
+        ++tstate->recursion_depth;
+        Py_DECREF(f);
+        --tstate->recursion_depth;
+    }
     return result;
 }