Merge pull request #13017 from jtattermusch/timeout_retries_are_costly

Limit number of timeout_retries for run_tests.py
diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py
index 658b814..85eef44 100755
--- a/tools/run_tests/python_utils/jobset.py
+++ b/tools/run_tests/python_utils/jobset.py
@@ -302,6 +302,7 @@
           self._retries += 1
           self.result.num_failures += 1
           self.result.retries = self._timeout_retries + self._retries
+          # NOTE: job is restarted regardless of jobset's max_time setting
           self.start()
         else:
           self._state = _FAILURE
@@ -344,6 +345,7 @@
         if self._spec.kill_handler:
           self._spec.kill_handler(self)
         self._process.terminate()
+        # NOTE: job is restarted regardless of jobset's max_time setting
         self.start()
       else:
         message('TIMEOUT', '%s [pid=%d, time=%.1fsec]' % (self._spec.shortname, self._process.pid, elapsed), stdout(), do_newline=True)
diff --git a/tools/run_tests/run_interop_tests.py b/tools/run_tests/run_interop_tests.py
index 4dd9827..2f82687 100755
--- a/tools/run_tests/run_interop_tests.py
+++ b/tools/run_tests/run_interop_tests.py
@@ -680,7 +680,7 @@
           shortname='%s:%s:%s:%s' % (suite_name, language, server_host_name,
                                      test_case),
           timeout_seconds=_TEST_TIMEOUT,
-          flake_retries=5 if args.allow_flakes else 0,
+          flake_retries=4 if args.allow_flakes else 0,
           timeout_retries=2 if args.allow_flakes else 0,
           kill_handler=_job_kill_handler)
   if docker_image:
@@ -746,7 +746,7 @@
           shortname='cloud_to_cloud:%s:%s_server:%s' % (language, server_name,
                                                         test_case),
           timeout_seconds=_TEST_TIMEOUT,
-          flake_retries=5 if args.allow_flakes else 0,
+          flake_retries=4 if args.allow_flakes else 0,
           timeout_retries=2 if args.allow_flakes else 0,
           kill_handler=_job_kill_handler)
   if docker_image:
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index 51e448c..011ed38 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -159,8 +159,8 @@
                           environ=actual_environ,
                           cpu_cost=cpu_cost,
                           timeout_seconds=(self.timeout_multiplier * timeout_seconds if timeout_seconds else None),
-                          flake_retries=5 if flaky or args.allow_flakes else 0,
-                          timeout_retries=3 if flaky or args.allow_flakes else 0)
+                          flake_retries=4 if flaky or args.allow_flakes else 0,
+                          timeout_retries=1 if flaky or args.allow_flakes else 0)
 
 
 def get_c_tests(travis, test_lang) :
@@ -1495,7 +1495,7 @@
   return environ
 
 build_steps = list(set(
-                   jobset.JobSpec(cmdline, environ=build_step_environ(build_config), flake_retries=5)
+                   jobset.JobSpec(cmdline, environ=build_step_environ(build_config), flake_retries=2)
                    for l in languages
                    for cmdline in l.pre_build_steps()))
 if make_targets: