Issue #11393: New try to fix faulthandler_thread()

Always release the cancel join.

Fix also another corner case: _PyFaulthandler_Fini() called after setting
running variable to zero, but before releasing the join lock.
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
index 760689e..f8b9fb6 100644
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -401,7 +401,6 @@
                                          thread.timeout_ms, 0);
         if (st == PY_LOCK_ACQUIRED) {
             /* Cancelled by user */
-            PyThread_release_lock(thread.cancel_event);
             break;
         }
         /* Timeout => dump traceback */
@@ -418,8 +417,9 @@
     } while (ok && thread.repeat);
 
     /* The only way out */
-    thread.running = 0;
+    PyThread_release_lock(thread.cancel_event);
     PyThread_release_lock(thread.join_event);
+    thread.running = 0;
 }
 
 static void
@@ -428,11 +428,11 @@
     if (thread.running) {
         /* Notify cancellation */
         PyThread_release_lock(thread.cancel_event);
-        /* Wait for thread to join */
-        PyThread_acquire_lock(thread.join_event, 1);
-        assert(thread.running == 0);
-        PyThread_release_lock(thread.join_event);
     }
+    /* Wait for thread to join */
+    PyThread_acquire_lock(thread.join_event, 1);
+    assert(thread.running == 0);
+    PyThread_release_lock(thread.join_event);
     Py_CLEAR(thread.file);
 }