bpo-33720: Refactor marshalling/unmarshalling floats. (GH-8071)

diff --git a/Include/pyport.h b/Include/pyport.h
index c1f4c7f..f4b547a 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -525,8 +525,10 @@
  * Usage:
  *    int _Py_NO_INLINE x(void) { return 3; }
  */
-#if defined(__GNUC__) || defined(__clang__)
-#  define _Py_NO_INLINE __attribute__((noinline))
+#if defined(_MSC_VER)
+#  define _Py_NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__) || defined(__clang__)
+#  define _Py_NO_INLINE __attribute__ ((noinline))
 #else
 #  define _Py_NO_INLINE
 #endif
diff --git a/Objects/call.c b/Objects/call.c
index b53a98c..1937a8b 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -1271,7 +1271,7 @@
 
 /* Issue #29234: Inlining _PyStack_AsTuple() into callers increases their
    stack consumption, Disable inlining to optimize the stack consumption. */
-PyObject* _Py_NO_INLINE
+_Py_NO_INLINE PyObject *
 _PyStack_AsTuple(PyObject *const *stack, Py_ssize_t nargs)
 {
     PyObject *args;
diff --git a/Python/marshal.c b/Python/marshal.c
index 6d06266..21cdd60 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -266,6 +266,32 @@
     } while (d != 0);
 }
 
+static void
+w_float_bin(double v, WFILE *p)
+{
+    unsigned char buf[8];
+    if (_PyFloat_Pack8(v, buf, 1) < 0) {
+        p->error = WFERR_UNMARSHALLABLE;
+        return;
+    }
+    w_string((const char *)buf, 8, p);
+}
+
+static void
+w_float_str(double v, WFILE *p)
+{
+    int n;
+    char *buf = PyOS_double_to_string(v, 'g', 17, 0, NULL);
+    if (!buf) {
+        p->error = WFERR_NOMEMORY;
+        return;
+    }
+    n = (int)strlen(buf);
+    w_byte(n, p);
+    w_string(buf, n, p);
+    PyMem_Free(buf);
+}
+
 static int
 w_ref(PyObject *v, char *flag, WFILE *p)
 {
@@ -375,69 +401,24 @@
     }
     else if (PyFloat_CheckExact(v)) {
         if (p->version > 1) {
-            unsigned char buf[8];
-            if (_PyFloat_Pack8(PyFloat_AsDouble(v),
-                               buf, 1) < 0) {
-                p->error = WFERR_UNMARSHALLABLE;
-                return;
-            }
             W_TYPE(TYPE_BINARY_FLOAT, p);
-            w_string((char*)buf, 8, p);
+            w_float_bin(PyFloat_AS_DOUBLE(v), p);
         }
         else {
-            char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
-                                              'g', 17, 0, NULL);
-            if (!buf) {
-                p->error = WFERR_NOMEMORY;
-                return;
-            }
-            n = strlen(buf);
             W_TYPE(TYPE_FLOAT, p);
-            w_byte((int)n, p);
-            w_string(buf, n, p);
-            PyMem_Free(buf);
+            w_float_str(PyFloat_AS_DOUBLE(v), p);
         }
     }
     else if (PyComplex_CheckExact(v)) {
         if (p->version > 1) {
-            unsigned char buf[8];
-            if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
-                               buf, 1) < 0) {
-                p->error = WFERR_UNMARSHALLABLE;
-                return;
-            }
             W_TYPE(TYPE_BINARY_COMPLEX, p);
-            w_string((char*)buf, 8, p);
-            if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
-                               buf, 1) < 0) {
-                p->error = WFERR_UNMARSHALLABLE;
-                return;
-            }
-            w_string((char*)buf, 8, p);
+            w_float_bin(PyComplex_RealAsDouble(v), p);
+            w_float_bin(PyComplex_ImagAsDouble(v), p);
         }
         else {
-            char *buf;
             W_TYPE(TYPE_COMPLEX, p);
-            buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
-                                        'g', 17, 0, NULL);
-            if (!buf) {
-                p->error = WFERR_NOMEMORY;
-                return;
-            }
-            n = strlen(buf);
-            w_byte((int)n, p);
-            w_string(buf, n, p);
-            PyMem_Free(buf);
-            buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
-                                        'g', 17, 0, NULL);
-            if (!buf) {
-                p->error = WFERR_NOMEMORY;
-                return;
-            }
-            n = strlen(buf);
-            w_byte((int)n, p);
-            w_string(buf, n, p);
-            PyMem_Free(buf);
+            w_float_str(PyComplex_RealAsDouble(v), p);
+            w_float_str(PyComplex_ImagAsDouble(v), p);
         }
     }
     else if (PyBytes_CheckExact(v)) {
@@ -880,6 +861,38 @@
     return NULL;
 }
 
+static double
+r_float_bin(RFILE *p)
+{
+    const unsigned char *buf = (const unsigned char *) r_string(8, p);
+    if (buf == NULL)
+        return -1;
+    return _PyFloat_Unpack8(buf, 1);
+}
+
+/* Issue #33720: Disable inlining for reducing the C stack consumption
+   on PGO builds. */
+_Py_NO_INLINE static double
+r_float_str(RFILE *p)
+{
+    int n;
+    char buf[256];
+    const char *ptr;
+    n = r_byte(p);
+    if (n == EOF) {
+        PyErr_SetString(PyExc_EOFError,
+            "EOF read where object expected");
+        return -1;
+    }
+    ptr = r_string(n, p);
+    if (ptr == NULL) {
+        return -1;
+    }
+    memcpy(buf, ptr, n);
+    buf[n] = '\0';
+    return PyOS_string_to_double(buf, NULL, NULL);
+}
+
 /* allocate the reflist index for a new object. Return -1 on failure */
 static Py_ssize_t
 r_ref_reserve(int flag, RFILE *p)
@@ -1016,36 +1029,17 @@
 
     case TYPE_FLOAT:
         {
-            char buf[256];
-            const char *ptr;
-            double dx;
-            n = r_byte(p);
-            if (n == EOF) {
-                PyErr_SetString(PyExc_EOFError,
-                    "EOF read where object expected");
+            double x = r_float_str(p);
+            if (x == -1.0 && PyErr_Occurred())
                 break;
-            }
-            ptr = r_string(n, p);
-            if (ptr == NULL)
-                break;
-            memcpy(buf, ptr, n);
-            buf[n] = '\0';
-            dx = PyOS_string_to_double(buf, NULL, NULL);
-            if (dx == -1.0 && PyErr_Occurred())
-                break;
-            retval = PyFloat_FromDouble(dx);
+            retval = PyFloat_FromDouble(x);
             R_REF(retval);
             break;
         }
 
     case TYPE_BINARY_FLOAT:
         {
-            const unsigned char *buf;
-            double x;
-            buf = (const unsigned char *) r_string(8, p);
-            if (buf == NULL)
-                break;
-            x = _PyFloat_Unpack8(buf, 1);
+            double x = r_float_bin(p);
             if (x == -1.0 && PyErr_Occurred())
                 break;
             retval = PyFloat_FromDouble(x);
@@ -1055,35 +1049,11 @@
 
     case TYPE_COMPLEX:
         {
-            char buf[256];
-            const char *ptr;
             Py_complex c;
-            n = r_byte(p);
-            if (n == EOF) {
-                PyErr_SetString(PyExc_EOFError,
-                    "EOF read where object expected");
-                break;
-            }
-            ptr = r_string(n, p);
-            if (ptr == NULL)
-                break;
-            memcpy(buf, ptr, n);
-            buf[n] = '\0';
-            c.real = PyOS_string_to_double(buf, NULL, NULL);
+            c.real = r_float_str(p);
             if (c.real == -1.0 && PyErr_Occurred())
                 break;
-            n = r_byte(p);
-            if (n == EOF) {
-                PyErr_SetString(PyExc_EOFError,
-                    "EOF read where object expected");
-                break;
-            }
-            ptr = r_string(n, p);
-            if (ptr == NULL)
-                break;
-            memcpy(buf, ptr, n);
-            buf[n] = '\0';
-            c.imag = PyOS_string_to_double(buf, NULL, NULL);
+            c.imag = r_float_str(p);
             if (c.imag == -1.0 && PyErr_Occurred())
                 break;
             retval = PyComplex_FromCComplex(c);
@@ -1093,18 +1063,11 @@
 
     case TYPE_BINARY_COMPLEX:
         {
-            const unsigned char *buf;
             Py_complex c;
-            buf = (const unsigned char *) r_string(8, p);
-            if (buf == NULL)
-                break;
-            c.real = _PyFloat_Unpack8(buf, 1);
+            c.real = r_float_bin(p);
             if (c.real == -1.0 && PyErr_Occurred())
                 break;
-            buf = (const unsigned char *) r_string(8, p);
-            if (buf == NULL)
-                break;
-            c.imag = _PyFloat_Unpack8(buf, 1);
+            c.imag = r_float_bin(p);
             if (c.imag == -1.0 && PyErr_Occurred())
                 break;
             retval = PyComplex_FromCComplex(c);