bpo-35279: reduce default max_workers of ThreadPoolExecutor (GH-13618)

diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py
index ad6b4c2..2426e94 100644
--- a/Lib/concurrent/futures/thread.py
+++ b/Lib/concurrent/futures/thread.py
@@ -129,9 +129,14 @@
             initargs: A tuple of arguments to pass to the initializer.
         """
         if max_workers is None:
-            # Use this number because ThreadPoolExecutor is often
-            # used to overlap I/O instead of CPU work.
-            max_workers = (os.cpu_count() or 1) * 5
+            # ThreadPoolExecutor is often used to:
+            # * CPU bound task which releases GIL
+            # * I/O bound task (which releases GIL, of course)
+            #
+            # We use cpu_count + 4 for both types of tasks.
+            # But we limit it to 32 to avoid consuming surprisingly large resource
+            # on many core machine.
+            max_workers = min(32, (os.cpu_count() or 1) + 4)
         if max_workers <= 0:
             raise ValueError("max_workers must be greater than 0")
 
diff --git a/Lib/test/test_concurrent_futures.py b/Lib/test/test_concurrent_futures.py
index de6ad8f..b27ae71 100644
--- a/Lib/test/test_concurrent_futures.py
+++ b/Lib/test/test_concurrent_futures.py
@@ -755,8 +755,8 @@
 
     def test_default_workers(self):
         executor = self.executor_type()
-        self.assertEqual(executor._max_workers,
-                         (os.cpu_count() or 1) * 5)
+        expected = min(32, (os.cpu_count() or 1) + 4)
+        self.assertEqual(executor._max_workers, expected)
 
     def test_saturation(self):
         executor = self.executor_type(4)