bpo-34060: Report system load when running test suite for Windows (GH-8357)

While Windows exposes the system processor queue length, the raw value
used for load calculations on Unix systems, it does not provide an API
to access the averaged value. Hence to calculate the load we must track
and average it ourselves. We can't use multiprocessing or a thread to
read it in the background while the tests run since using those would
conflict with test_multiprocessing and test_xxsubprocess.

Thus, we use Window's asynchronous IO API to run the tracker in the
background with it sampling at the correct rate. When we wish to access
the load we check to see if there's new data on the stream, if there is,
we update our load values.

diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index 32ac440..18ef6d0 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -146,8 +146,8 @@
         line = f"[{line}] {test}"
 
         # add the system load prefix: "load avg: 1.80 "
-        if hasattr(os, 'getloadavg'):
-            load_avg_1min = os.getloadavg()[0]
+        if self.getloadavg:
+            load_avg_1min = self.getloadavg()
             line = f"load avg: {load_avg_1min:.2f} {line}"
 
         # add the timestamp prefix:  "0:01:05 "
@@ -616,6 +616,19 @@
             self.list_cases()
             sys.exit(0)
 
+        self.getloadavg = None
+        # If we're on windows and this is the parent runner (not a worker),
+        # report the load average.
+        if hasattr(os, 'getloadavg'):
+            def getloadavg_1m():
+                return os.getloadavg()[0]
+            self.getloadavg = getloadavg_1m
+        elif sys.platform == 'win32' and (self.ns.worker_args is None):
+            from test.libregrtest.win_utils import WindowsLoadTracker
+
+            load_tracker = WindowsLoadTracker()
+            self.getloadavg = load_tracker.getloadavg
+
         self.run_tests()
         self.display_result()
 
diff --git a/Lib/test/libregrtest/win_utils.py b/Lib/test/libregrtest/win_utils.py
new file mode 100644
index 0000000..2e64922
--- /dev/null
+++ b/Lib/test/libregrtest/win_utils.py
@@ -0,0 +1,100 @@
+import subprocess
+import sys
+import os
+import _winapi
+import msvcrt
+import uuid
+from test import support
+
+
+# Max size of asynchronous reads
+BUFSIZE = 8192
+# Exponential damping factor (see below)
+LOAD_FACTOR_1 = 0.9200444146293232478931553241
+# Seconds per measurement
+SAMPLING_INTERVAL = 5
+COUNTER_NAME = r'\System\Processor Queue Length'
+
+
+class WindowsLoadTracker():
+    """
+    This class asynchronously interacts with the `typeperf` command to read
+    the system load on Windows. Mulitprocessing and threads can't be used
+    here because they interfere with the test suite's cases for those
+    modules.
+    """
+
+    def __init__(self):
+        self.load = 0.0
+        self.start()
+
+    def start(self):
+        # Create a named pipe which allows for asynchronous IO in Windows
+        pipe_name =  r'\\.\pipe\typeperf_output_' + str(uuid.uuid4())
+
+        open_mode =  _winapi.PIPE_ACCESS_INBOUND
+        open_mode |= _winapi.FILE_FLAG_FIRST_PIPE_INSTANCE
+        open_mode |= _winapi.FILE_FLAG_OVERLAPPED
+
+        # This is the read end of the pipe, where we will be grabbing output
+        self.pipe = _winapi.CreateNamedPipe(
+            pipe_name, open_mode, _winapi.PIPE_WAIT,
+            1, BUFSIZE, BUFSIZE, _winapi.NMPWAIT_WAIT_FOREVER, _winapi.NULL
+        )
+        # The write end of the pipe which is passed to the created process
+        pipe_write_end = _winapi.CreateFile(
+            pipe_name, _winapi.GENERIC_WRITE, 0, _winapi.NULL,
+            _winapi.OPEN_EXISTING, 0, _winapi.NULL
+        )
+        # Open up the handle as a python file object so we can pass it to
+        # subprocess
+        command_stdout = msvcrt.open_osfhandle(pipe_write_end, 0)
+
+        # Connect to the read end of the pipe in overlap/async mode
+        overlap = _winapi.ConnectNamedPipe(self.pipe, overlapped=True)
+        overlap.GetOverlappedResult(True)
+
+        # Spawn off the load monitor
+        command = ['typeperf', COUNTER_NAME, '-si', str(SAMPLING_INTERVAL)]
+        self.p = subprocess.Popen(command, stdout=command_stdout, cwd=support.SAVEDCWD)
+
+        # Close our copy of the write end of the pipe
+        os.close(command_stdout)
+
+    def __del__(self):
+        self.p.kill()
+        self.p.wait()
+
+    def read_output(self):
+        import _winapi
+
+        overlapped, _ = _winapi.ReadFile(self.pipe, BUFSIZE, True)
+        bytes_read, res = overlapped.GetOverlappedResult(False)
+        if res != 0:
+            return
+
+        return overlapped.getbuffer().decode()
+
+    def getloadavg(self):
+        typeperf_output = self.read_output()
+        # Nothing to update, just return the current load
+        if not typeperf_output:
+            return self.load
+
+        # Process the backlog of load values
+        for line in typeperf_output.splitlines():
+            # typeperf outputs in a CSV format like this:
+            # "07/19/2018 01:32:26.605","3.000000"
+            toks = line.split(',')
+            # Ignore blank lines and the initial header
+            if line.strip() == '' or (COUNTER_NAME in line) or len(toks) != 2:
+                continue
+
+            load = float(toks[1].replace('"', ''))
+            # We use an exponentially weighted moving average, imitating the
+            # load calculation on Unix systems.
+            # https://en.wikipedia.org/wiki/Load_(computing)#Unix-style_load_calculation
+            new_load = self.load * LOAD_FACTOR_1 + load * (1.0 - LOAD_FACTOR_1)
+            self.load = new_load
+
+        return self.load
diff --git a/Misc/NEWS.d/next/Windows/2018-07-20-13-09-19.bpo-34060.v-z87j.rst b/Misc/NEWS.d/next/Windows/2018-07-20-13-09-19.bpo-34060.v-z87j.rst
new file mode 100644
index 0000000..b77d805
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2018-07-20-13-09-19.bpo-34060.v-z87j.rst
@@ -0,0 +1,2 @@
+Report system load when running test suite on Windows. Patch by Ammar Askar.
+Based on prior work by Jeremy Kloth.