bpo-35621: Support running subprocesses in asyncio when loop is executed in non-main thread  (GH-14344)

(cherry picked from commit 0d671c04c39b52e44597491b893eb0b6c86b3d45)

Co-authored-by: Andrew Svetlov <andrew.svetlov@gmail.com>
diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py
index 28128d2..d7a4af8 100644
--- a/Lib/asyncio/unix_events.py
+++ b/Lib/asyncio/unix_events.py
@@ -2,6 +2,7 @@
 
 import errno
 import io
+import itertools
 import os
 import selectors
 import signal
@@ -12,7 +13,6 @@
 import threading
 import warnings
 
-
 from . import base_events
 from . import base_subprocess
 from . import constants
@@ -29,7 +29,9 @@
 __all__ = (
     'SelectorEventLoop',
     'AbstractChildWatcher', 'SafeChildWatcher',
-    'FastChildWatcher', 'DefaultEventLoopPolicy',
+    'FastChildWatcher',
+    'MultiLoopChildWatcher', 'ThreadedChildWatcher',
+    'DefaultEventLoopPolicy',
 )
 
 
@@ -184,6 +186,13 @@
                                          stdin, stdout, stderr, bufsize,
                                          extra=None, **kwargs):
         with events.get_child_watcher() as watcher:
+            if not watcher.is_active():
+                # Check early.
+                # Raising exception before process creation
+                # prevents subprocess execution if the watcher
+                # is not ready to handle it.
+                raise RuntimeError("asyncio.get_child_watcher() is not activated, "
+                                   "subprocess support is not installed.")
             waiter = self.create_future()
             transp = _UnixSubprocessTransport(self, protocol, args, shell,
                                               stdin, stdout, stderr, bufsize,
@@ -838,6 +847,15 @@
         """
         raise NotImplementedError()
 
+    def is_active(self):
+        """Return ``True`` if the watcher is active and is used by the event loop.
+
+        Return True if the watcher is installed and ready to handle process exit
+        notifications.
+
+        """
+        raise NotImplementedError()
+
     def __enter__(self):
         """Enter the watcher's context and allow starting new processes
 
@@ -849,6 +867,20 @@
         raise NotImplementedError()
 
 
+def _compute_returncode(status):
+    if os.WIFSIGNALED(status):
+        # The child process died because of a signal.
+        return -os.WTERMSIG(status)
+    elif os.WIFEXITED(status):
+        # The child process exited (e.g sys.exit()).
+        return os.WEXITSTATUS(status)
+    else:
+        # The child exited, but we don't understand its status.
+        # This shouldn't happen, but if it does, let's just
+        # return that status; perhaps that helps debug it.
+        return status
+
+
 class BaseChildWatcher(AbstractChildWatcher):
 
     def __init__(self):
@@ -858,6 +890,9 @@
     def close(self):
         self.attach_loop(None)
 
+    def is_active(self):
+        return self._loop is not None and self._loop.is_running()
+
     def _do_waitpid(self, expected_pid):
         raise NotImplementedError()
 
@@ -898,19 +933,6 @@
                 'exception': exc,
             })
 
-    def _compute_returncode(self, status):
-        if os.WIFSIGNALED(status):
-            # The child process died because of a signal.
-            return -os.WTERMSIG(status)
-        elif os.WIFEXITED(status):
-            # The child process exited (e.g sys.exit()).
-            return os.WEXITSTATUS(status)
-        else:
-            # The child exited, but we don't understand its status.
-            # This shouldn't happen, but if it does, let's just
-            # return that status; perhaps that helps debug it.
-            return status
-
 
 class SafeChildWatcher(BaseChildWatcher):
     """'Safe' child watcher implementation.
@@ -934,11 +956,6 @@
         pass
 
     def add_child_handler(self, pid, callback, *args):
-        if self._loop is None:
-            raise RuntimeError(
-                "Cannot add child handler, "
-                "the child watcher does not have a loop attached")
-
         self._callbacks[pid] = (callback, args)
 
         # Prevent a race condition in case the child is already terminated.
@@ -974,7 +991,7 @@
                 # The child process is still alive.
                 return
 
-            returncode = self._compute_returncode(status)
+            returncode = _compute_returncode(status)
             if self._loop.get_debug():
                 logger.debug('process %s exited with returncode %s',
                              expected_pid, returncode)
@@ -1035,11 +1052,6 @@
     def add_child_handler(self, pid, callback, *args):
         assert self._forks, "Must use the context manager"
 
-        if self._loop is None:
-            raise RuntimeError(
-                "Cannot add child handler, "
-                "the child watcher does not have a loop attached")
-
         with self._lock:
             try:
                 returncode = self._zombies.pop(pid)
@@ -1072,7 +1084,7 @@
                     # A child process is still alive.
                     return
 
-                returncode = self._compute_returncode(status)
+                returncode = _compute_returncode(status)
 
             with self._lock:
                 try:
@@ -1101,6 +1113,209 @@
                 callback(pid, returncode, *args)
 
 
+class MultiLoopChildWatcher(AbstractChildWatcher):
+    """A watcher that doesn't require running loop in the main thread.
+
+    This implementation registers a SIGCHLD signal handler on
+    instantiation (which may conflict with other code that
+    install own handler for this signal).
+
+    The solution is safe but it has a significant overhead when
+    handling a big number of processes (*O(n)* each time a
+    SIGCHLD is received).
+    """
+
+    # Implementation note:
+    # The class keeps compatibility with AbstractChildWatcher ABC
+    # To achieve this it has empty attach_loop() method
+    # and doesn't accept explicit loop argument
+    # for add_child_handler()/remove_child_handler()
+    # but retrieves the current loop by get_running_loop()
+
+    def __init__(self):
+        self._callbacks = {}
+        self._saved_sighandler = None
+
+    def is_active(self):
+        return self._saved_sighandler is not None
+
+    def close(self):
+        self._callbacks.clear()
+        if self._saved_sighandler is not None:
+            handler = signal.getsignal(signal.SIGCHLD)
+            if handler != self._sig_chld:
+                logger.warning("SIGCHLD handler was changed by outside code")
+            else:
+                signal.signal(signal.SIGCHLD, self._saved_sighandler)
+            self._saved_sighandler = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def add_child_handler(self, pid, callback, *args):
+        loop = events.get_running_loop()
+        self._callbacks[pid] = (loop, callback, args)
+
+        # Prevent a race condition in case the child is already terminated.
+        self._do_waitpid(pid)
+
+    def remove_child_handler(self, pid):
+        try:
+            del self._callbacks[pid]
+            return True
+        except KeyError:
+            return False
+
+    def attach_loop(self, loop):
+        # Don't save the loop but initialize itself if called first time
+        # The reason to do it here is that attach_loop() is called from
+        # unix policy only for the main thread.
+        # Main thread is required for subscription on SIGCHLD signal
+        if self._saved_sighandler is None:
+            self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld)
+            if self._saved_sighandler is None:
+                logger.warning("Previous SIGCHLD handler was set by non-Python code, "
+                               "restore to default handler on watcher close.")
+                self._saved_sighandler = signal.SIG_DFL
+
+            # Set SA_RESTART to limit EINTR occurrences.
+            signal.siginterrupt(signal.SIGCHLD, False)
+
+    def _do_waitpid_all(self):
+        for pid in list(self._callbacks):
+            self._do_waitpid(pid)
+
+    def _do_waitpid(self, expected_pid):
+        assert expected_pid > 0
+
+        try:
+            pid, status = os.waitpid(expected_pid, os.WNOHANG)
+        except ChildProcessError:
+            # The child process is already reaped
+            # (may happen if waitpid() is called elsewhere).
+            pid = expected_pid
+            returncode = 255
+            logger.warning(
+                "Unknown child process pid %d, will report returncode 255",
+                pid)
+            debug_log = False
+        else:
+            if pid == 0:
+                # The child process is still alive.
+                return
+
+            returncode = _compute_returncode(status)
+            debug_log = True
+        try:
+            loop, callback, args = self._callbacks.pop(pid)
+        except KeyError:  # pragma: no cover
+            # May happen if .remove_child_handler() is called
+            # after os.waitpid() returns.
+            logger.warning("Child watcher got an unexpected pid: %r",
+                           pid, exc_info=True)
+        else:
+            if loop.is_closed():
+                logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+            else:
+                if debug_log and loop.get_debug():
+                    logger.debug('process %s exited with returncode %s',
+                                 expected_pid, returncode)
+                loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+    def _sig_chld(self, signum, frame):
+        try:
+            self._do_waitpid_all()
+        except (SystemExit, KeyboardInterrupt):
+            raise
+        except BaseException:
+            logger.warning('Unknown exception in SIGCHLD handler', exc_info=True)
+
+
+class ThreadedChildWatcher(AbstractChildWatcher):
+    """Threaded child watcher implementation.
+
+    The watcher uses a thread per process
+    for waiting for the process finish.
+
+    It doesn't require subscription on POSIX signal
+    but a thread creation is not free.
+
+    The watcher has O(1) complexity, its perfomance doesn't depend
+    on amount of spawn processes.
+    """
+
+    def __init__(self):
+        self._pid_counter = itertools.count(0)
+        self._threads = {}
+
+    def is_active(self):
+        return True
+
+    def close(self):
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def __del__(self, _warn=warnings.warn):
+        threads = [thread for thread in list(self._threads.values())
+                   if thread.is_alive()]
+        if threads:
+            _warn(f"{self.__class__} has registered but not finished child processes",
+                  ResourceWarning,
+                  source=self)
+
+    def add_child_handler(self, pid, callback, *args):
+        loop = events.get_running_loop()
+        thread = threading.Thread(target=self._do_waitpid,
+                                  name=f"waitpid-{next(self._pid_counter)}",
+                                  args=(loop, pid, callback, args),
+                                  daemon=True)
+        self._threads[pid] = thread
+        thread.start()
+
+    def remove_child_handler(self, pid):
+        # asyncio never calls remove_child_handler() !!!
+        # The method is no-op but is implemented because
+        # abstract base classe requires it
+        return True
+
+    def attach_loop(self, loop):
+        pass
+
+    def _do_waitpid(self, loop, expected_pid, callback, args):
+        assert expected_pid > 0
+
+        try:
+            pid, status = os.waitpid(expected_pid, 0)
+        except ChildProcessError:
+            # The child process is already reaped
+            # (may happen if waitpid() is called elsewhere).
+            pid = expected_pid
+            returncode = 255
+            logger.warning(
+                "Unknown child process pid %d, will report returncode 255",
+                pid)
+        else:
+            returncode = _compute_returncode(status)
+            if loop.get_debug():
+                logger.debug('process %s exited with returncode %s',
+                             expected_pid, returncode)
+
+        if loop.is_closed():
+            logger.warning("Loop %r that handles pid %r is closed", loop, pid)
+        else:
+            loop.call_soon_threadsafe(callback, pid, returncode, *args)
+
+        self._threads.pop(expected_pid)
+
+
 class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy):
     """UNIX event loop policy with a watcher for child processes."""
     _loop_factory = _UnixSelectorEventLoop
@@ -1112,7 +1327,7 @@
     def _init_watcher(self):
         with events._lock:
             if self._watcher is None:  # pragma: no branch
-                self._watcher = SafeChildWatcher()
+                self._watcher = ThreadedChildWatcher()
                 if isinstance(threading.current_thread(),
                               threading._MainThread):
                     self._watcher.attach_loop(self._local._loop)
@@ -1134,7 +1349,7 @@
     def get_child_watcher(self):
         """Get the watcher for child processes.
 
-        If not yet set, a SafeChildWatcher object is automatically created.
+        If not yet set, a ThreadedChildWatcher object is automatically created.
         """
         if self._watcher is None:
             self._init_watcher()