bpo-31773: _PyTime_GetPerfCounter() uses _PyTime_t (GH-3983)

* Rewrite win_perf_counter() to only use integers internally.
* Add _PyTime_MulDiv() which compute "ticks * mul / div"
  in two parts (int part and remaining) to prevent integer overflow.
* Clock frequency is checked at initialization for integer overflow.
* Enhance also pymonotonic() to reduce the precision loss on macOS
  (mach_absolute_time() clock).
diff --git a/Python/import.c b/Python/import.c
index 76aa912..d396b4d 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1669,10 +1669,10 @@
     else {
         static int ximporttime = 0;
         static int import_level;
-        static double accumulated;
+        static _PyTime_t accumulated;
         _Py_IDENTIFIER(importtime);
 
-        double t1 = 0, accumulated_copy = accumulated;
+        _PyTime_t t1 = 0, accumulated_copy = accumulated;
 
         Py_XDECREF(mod);
 
@@ -1695,7 +1695,7 @@
 
         if (ximporttime) {
             import_level++;
-            t1 = _PyTime_GetPerfCounterDouble();
+            t1 = _PyTime_GetPerfCounter();
             accumulated = 0;
         }
 
@@ -1711,12 +1711,12 @@
                                            mod != NULL);
 
         if (ximporttime) {
-            double cum = _PyTime_GetPerfCounterDouble() - t1;
+            _PyTime_t cum = _PyTime_GetPerfCounter() - t1;
 
             import_level--;
             fprintf(stderr, "import time: %9ld | %10ld | %*s%s\n",
-                    (long)ceil((cum - accumulated) * 1e6),
-                    (long)ceil(cum * 1e6),
+                    (long)_PyTime_AsMicroseconds(cum - accumulated, _PyTime_ROUND_CEILING),
+                    (long)_PyTime_AsMicroseconds(cum, _PyTime_ROUND_CEILING),
                     import_level*2, "", PyUnicode_AsUTF8(abs_name));
 
             accumulated = accumulated_copy + cum;