bpo-39812: Remove daemon threads in concurrent.futures (GH-19149)

Remove daemon threads from :mod:`concurrent.futures` by adding
an internal `threading._register_atexit()`, which calls registered functions
prior to joining all non-daemon threads. This allows for compatibility
with subinterpreters, which don't support daemon threads.
diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py
index be79161..2aa4e17 100644
--- a/Lib/concurrent/futures/thread.py
+++ b/Lib/concurrent/futures/thread.py
@@ -13,19 +13,6 @@
 import weakref
 import os
 
-# Workers are created as daemon threads. This is done to allow the interpreter
-# to exit when there are still idle threads in a ThreadPoolExecutor's thread
-# pool (i.e. shutdown() was not called). However, allowing workers to die with
-# the interpreter has two undesirable properties:
-#   - The workers would still be running during interpreter shutdown,
-#     meaning that they would fail in unpredictable ways.
-#   - The workers could be killed while evaluating a work item, which could
-#     be bad if the callable being evaluated has external side-effects e.g.
-#     writing to a file.
-#
-# To work around this problem, an exit handler is installed which tells the
-# workers to exit when their work queues are empty and then waits until the
-# threads finish.
 
 _threads_queues = weakref.WeakKeyDictionary()
 _shutdown = False
@@ -43,7 +30,11 @@
     for t, q in items:
         t.join()
 
-atexit.register(_python_exit)
+# Register for `_python_exit()` to be called just before joining all
+# non-daemon threads. This is used instead of `atexit.register()` for
+# compatibility with subinterpreters, which no longer support daemon threads.
+# See bpo-39812 for context.
+threading._register_atexit(_python_exit)
 
 
 class _WorkItem(object):
@@ -197,7 +188,6 @@
                                        self._work_queue,
                                        self._initializer,
                                        self._initargs))
-            t.daemon = True
             t.start()
             self._threads.add(t)
             _threads_queues[t] = self._work_queue