[3.8] bpo-36402: Fix threading._shutdown() race condition (GH-13948) (GH-14050)

* bpo-36402: Fix threading._shutdown() race condition (GH-13948)

Fix a race condition at Python shutdown when waiting for threads.
Wait until the Python thread state of all non-daemon threads get
deleted (join all non-daemon threads), rather than just wait until
Python threads complete.

* Add threading._shutdown_locks: set of Thread._tstate_lock locks
  of non-daemon threads used by _shutdown() to wait until all Python
  thread states get deleted. See Thread._set_tstate_lock().
* Add also threading._shutdown_locks_lock to protect access to
  threading._shutdown_locks.
* Add test_finalization_shutdown() test.

(cherry picked from commit 468e5fec8a2f534f1685d59da3ca4fad425c38dd)

* bpo-36402: Fix threading.Thread._stop() (GH-14047)

Remove the _tstate_lock from _shutdown_locks, don't remove None.

(cherry picked from commit 6f75c873752a16a7ad8f35855b1e29f59d048e84)
diff --git a/Lib/threading.py b/Lib/threading.py
index 3d197ee..7c6d404 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -739,6 +739,11 @@
 _active = {}    # maps thread id to Thread object
 _limbo = {}
 _dangling = WeakSet()
+# Set of Thread._tstate_lock locks of non-daemon threads used by _shutdown()
+# to wait until all Python thread states get deleted:
+# see Thread._set_tstate_lock().
+_shutdown_locks_lock = _allocate_lock()
+_shutdown_locks = set()
 
 # Main class for threads
 
@@ -903,6 +908,10 @@
         self._tstate_lock = _set_sentinel()
         self._tstate_lock.acquire()
 
+        if not self.daemon:
+            with _shutdown_locks_lock:
+                _shutdown_locks.add(self._tstate_lock)
+
     def _bootstrap_inner(self):
         try:
             self._set_ident()
@@ -954,6 +963,9 @@
             assert not lock.locked()
         self._is_stopped = True
         self._tstate_lock = None
+        if not self.daemon:
+            with _shutdown_locks_lock:
+                _shutdown_locks.discard(lock)
 
     def _delete(self):
         "Remove current thread from the dict of currently running threads."
@@ -1342,6 +1354,9 @@
 _main_thread = _MainThread()
 
 def _shutdown():
+    """
+    Wait until the Python thread state of all non-daemon threads get deleted.
+    """
     # Obscure:  other threads may be waiting to join _main_thread.  That's
     # dubious, but some code does it.  We can't wait for C code to release
     # the main thread's tstate_lock - that won't happen until the interpreter
@@ -1350,6 +1365,8 @@
     if _main_thread._is_stopped:
         # _shutdown() was already called
         return
+
+    # Main thread
     tlock = _main_thread._tstate_lock
     # The main thread isn't finished yet, so its thread state lock can't have
     # been released.
@@ -1357,16 +1374,24 @@
     assert tlock.locked()
     tlock.release()
     _main_thread._stop()
-    t = _pickSomeNonDaemonThread()
-    while t:
-        t.join()
-        t = _pickSomeNonDaemonThread()
 
-def _pickSomeNonDaemonThread():
-    for t in enumerate():
-        if not t.daemon and t.is_alive():
-            return t
-    return None
+    # Join all non-deamon threads
+    while True:
+        with _shutdown_locks_lock:
+            locks = list(_shutdown_locks)
+            _shutdown_locks.clear()
+
+        if not locks:
+            break
+
+        for lock in locks:
+            # mimick Thread.join()
+            lock.acquire()
+            lock.release()
+
+        # new threads can be spawned while we were waiting for the other
+        # threads to complete
+
 
 def main_thread():
     """Return the main thread object.
@@ -1392,12 +1417,18 @@
     # Reset _active_limbo_lock, in case we forked while the lock was held
     # by another (non-forked) thread.  http://bugs.python.org/issue874900
     global _active_limbo_lock, _main_thread
+    global _shutdown_locks_lock, _shutdown_locks
     _active_limbo_lock = _allocate_lock()
 
     # fork() only copied the current thread; clear references to others.
     new_active = {}
     current = current_thread()
     _main_thread = current
+
+    # reset _shutdown() locks: threads re-register their _tstate_lock below
+    _shutdown_locks_lock = _allocate_lock()
+    _shutdown_locks = set()
+
     with _active_limbo_lock:
         # Dangling thread instances must still have their locks reset,
         # because someone may join() them.