bpo-37531: Enhance regrtest multiprocess timeout (GH-15345) (GH-15871)
* Write a message when killing a worker process
* Put a timeout on the second popen.communicate() call
(after killing the process)
* Put a timeout on popen.wait() call
* Catch popen.kill() and popen.wait() exceptions
(cherry picked from commit de2d9eed8bc628533e1628b843cc4c7a5010f6e5)
diff --git a/Lib/test/libregrtest/runtest_mp.py b/Lib/test/libregrtest/runtest_mp.py
index eed6432..c22479b 100644
--- a/Lib/test/libregrtest/runtest_mp.py
+++ b/Lib/test/libregrtest/runtest_mp.py
@@ -126,6 +126,38 @@
info.append(f'pid={popen.pid}')
return '<%s>' % ' '.join(info)
+ def _kill(self):
+ dt = time.monotonic() - self.start_time
+
+ popen = self._popen
+ pid = popen.pid
+ print("Kill worker process %s running for %.1f sec" % (pid, dt),
+ file=sys.stderr, flush=True)
+
+ try:
+ popen.kill()
+ return True
+ except OSError as exc:
+ print("WARNING: Failed to kill worker process %s: %r" % (pid, exc),
+ file=sys.stderr, flush=True)
+ return False
+
+ def _close_wait(self):
+ popen = self._popen
+
+ # stdout and stderr must be closed to ensure that communicate()
+ # does not hang
+ popen.stdout.close()
+ popen.stderr.close()
+
+ try:
+ popen.wait(JOIN_TIMEOUT)
+ except (subprocess.TimeoutExpired, OSError) as exc:
+ print("WARNING: Failed to wait for worker process %s "
+ "completion (timeout=%.1f sec): %r"
+ % (popen.pid, JOIN_TIMEOUT, exc),
+ file=sys.stderr, flush=True)
+
def kill(self):
"""
Kill the current process (if any).
@@ -135,15 +167,13 @@
"""
self._killed = True
- popen = self._popen
- if popen is None:
+ if self._popen is None:
return
- popen.kill()
- # stdout and stderr must be closed to ensure that communicate()
- # does not hang
- popen.stdout.close()
- popen.stderr.close()
- popen.wait()
+
+ if not self._kill():
+ return
+
+ self._close_wait()
def mp_result_error(self, test_name, error_type, stdout='', stderr='',
err_msg=None):
@@ -151,6 +181,23 @@
result = TestResult(test_name, error_type, test_time, None)
return MultiprocessResult(result, stdout, stderr, err_msg)
+ def _timedout(self, test_name):
+ self._kill()
+
+ stdout = sterr = ''
+ popen = self._popen
+ try:
+ stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)
+ except (subprocess.TimeoutExpired, OSError) as exc:
+ print("WARNING: Failed to read worker process %s output "
+ "(timeout=%.1f sec): %r"
+ % (popen.pid, exc, timeout),
+ file=sys.stderr, flush=True)
+
+ self._close_wait()
+
+ return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
+
def _runtest(self, test_name):
try:
self.start_time = time.monotonic()
@@ -158,7 +205,7 @@
self._popen = run_test_in_subprocess(test_name, self.ns)
popen = self._popen
- with popen:
+ try:
try:
if self._killed:
# If kill() has been called before self._popen is set,
@@ -175,12 +222,7 @@
# on reading closed stdout/stderr
raise ExitThread
- popen.kill()
- stdout, stderr = popen.communicate()
- self.kill()
-
- return self.mp_result_error(test_name, TIMEOUT,
- stdout, stderr)
+ return self._timedout(test_name)
except OSError:
if self._killed:
# kill() has been called: communicate() fails
@@ -190,8 +232,10 @@
except:
self.kill()
raise
+ finally:
+ self._close_wait()
- retcode = popen.wait()
+ retcode = popen.returncode
finally:
self.current_test_name = None
self._popen = None
@@ -286,10 +330,11 @@
if not worker.is_alive():
break
dt = time.monotonic() - start_time
- print("Wait for regrtest worker %r for %.1f sec" % (worker, dt))
+ print("Wait for regrtest worker %r for %.1f sec" % (worker, dt),
+ flush=True)
if dt > JOIN_TIMEOUT:
print("Warning -- failed to join a regrtest worker %s"
- % worker)
+ % worker, flush=True)
break
def _get_result(self):
diff --git a/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst b/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst
new file mode 100644
index 0000000..59500ce
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2019-08-20-19-24-19.bpo-37531.wRoXfU.rst
@@ -0,0 +1,3 @@
+Enhance regrtest multiprocess timeout: write a message when killing a worker
+process, catch popen.kill() and popen.wait() exceptions, put a timeout on the
+second call to popen.communicate().