bpo-41299: Reduce lag in Windows threading timeouts by using a higher precision time source (GH-26568) (GH-26580)

(cherry picked from commit 449e6f0ef395231e3abe467f910b02d7f075c27f)

Co-authored-by: Ryan Hileman <lunixbochs@gmail.com>

Co-authored-by: Ryan Hileman <lunixbochs@gmail.com>
diff --git a/Misc/NEWS.d/next/Windows/2021-06-06-16-36-13.bpo-41299.Rg-vb_.rst b/Misc/NEWS.d/next/Windows/2021-06-06-16-36-13.bpo-41299.Rg-vb_.rst
new file mode 100644
index 0000000..71f700f
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2021-06-06-16-36-13.bpo-41299.Rg-vb_.rst
@@ -0,0 +1 @@
+Fix 16ms jitter when using timeouts in :mod:`threading`, such as with :meth:`threading.Lock.acquire` or :meth:`threading.Condition.wait`.
diff --git a/Python/thread_nt.h b/Python/thread_nt.h
index 05b982d..0ce5e94 100644
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@@ -76,16 +76,22 @@ EnterNonRecursiveMutex(PNRMUTEX mutex, DWORD milliseconds)
         }
     } else if (milliseconds != 0) {
         /* wait at least until the target */
-        ULONGLONG now, target = GetTickCount64() + milliseconds;
+        _PyTime_t now = _PyTime_GetPerfCounter();
+        if (now <= 0) {
+            Py_FatalError("_PyTime_GetPerfCounter() == 0");
+        }
+        _PyTime_t nanoseconds = _PyTime_FromNanoseconds((_PyTime_t)milliseconds * 1000000);
+        _PyTime_t target = now + nanoseconds;
         while (mutex->locked) {
-            if (PyCOND_TIMEDWAIT(&mutex->cv, &mutex->cs, (long long)milliseconds*1000) < 0) {
+            _PyTime_t microseconds = _PyTime_AsMicroseconds(nanoseconds, _PyTime_ROUND_TIMEOUT);
+            if (PyCOND_TIMEDWAIT(&mutex->cv, &mutex->cs, microseconds) < 0) {
                 result = WAIT_FAILED;
                 break;
             }
-            now = GetTickCount64();
+            now = _PyTime_GetPerfCounter();
             if (target <= now)
                 break;
-            milliseconds = (DWORD)(target-now);
+            nanoseconds = target - now;
         }
     }
     if (!mutex->locked) {