bpo-30764: regrtest: add --fail-env-changed option (#2402)

* bpo-30764: regrtest: change exit code on failure

* Exit code 2 if failed tests ("bad")
* Exit code 3 if interrupted

* bpo-30764: regrtest: add --fail-env-changed option

If the option is set, mark a test as failed if it alters the
environment, for example if it creates a file without removing it.
diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py
index bf64062..2315cd5 100644
--- a/Lib/test/libregrtest/cmdline.py
+++ b/Lib/test/libregrtest/cmdline.py
@@ -255,6 +255,9 @@
                             ' , don\'t execute them')
     group.add_argument('-P', '--pgo', dest='pgo', action='store_true',
                        help='enable Profile Guided Optimization training')
+    group.add_argument('--fail-env-changed', action='store_true',
+                       help='if a test file alters the environment, mark '
+                            'the test as failed')
 
     return parser
 
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index 1a77655..571eb61 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -478,6 +478,8 @@
             result = "FAILURE"
         elif self.interrupted:
             result = "INTERRUPTED"
+        elif self.environment_changed and self.ns.fail_env_changed:
+            result = "ENV CHANGED"
         else:
             result = "SUCCESS"
         print("Tests result: %s" % result)
@@ -538,7 +540,13 @@
             self.rerun_failed_tests()
 
         self.finalize()
-        sys.exit(len(self.bad) > 0 or self.interrupted)
+        if self.bad:
+            sys.exit(2)
+        if self.interrupted:
+            sys.exit(130)
+        if self.ns.fail_env_changed and self.environment_changed:
+            sys.exit(3)
+        sys.exit(0)
 
 
 def removepy(names):
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index 6f4fa79..0e676ee 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -377,19 +377,19 @@
         return list(match.group(1) for match in parser)
 
     def check_executed_tests(self, output, tests, skipped=(), failed=(),
-                             omitted=(), randomize=False, interrupted=False):
+                             env_changed=(), omitted=(),
+                             randomize=False, interrupted=False,
+                             fail_env_changed=False):
         if isinstance(tests, str):
             tests = [tests]
         if isinstance(skipped, str):
             skipped = [skipped]
         if isinstance(failed, str):
             failed = [failed]
+        if isinstance(env_changed, str):
+            env_changed = [env_changed]
         if isinstance(omitted, str):
             omitted = [omitted]
-        ntest = len(tests)
-        nskipped = len(skipped)
-        nfailed = len(failed)
-        nomitted = len(omitted)
 
         executed = self.parse_executed_tests(output)
         if randomize:
@@ -415,11 +415,17 @@
             regex = list_regex('%s test%s failed', failed)
             self.check_line(output, regex)
 
+        if env_changed:
+            regex = list_regex('%s test%s altered the execution environment',
+                               env_changed)
+            self.check_line(output, regex)
+
         if omitted:
             regex = list_regex('%s test%s omitted', omitted)
             self.check_line(output, regex)
 
-        good = ntest - nskipped - nfailed - nomitted
+        good = (len(tests) - len(skipped) - len(failed)
+                - len(omitted) - len(env_changed))
         if good:
             regex = r'%s test%s OK\.$' % (good, plural(good))
             if not skipped and not failed and good > 1:
@@ -429,10 +435,12 @@
         if interrupted:
             self.check_line(output, 'Test suite interrupted by signal SIGINT.')
 
-        if nfailed:
+        if failed:
             result = 'FAILURE'
         elif interrupted:
             result = 'INTERRUPTED'
+        elif fail_env_changed and env_changed:
+            result = 'ENV CHANGED'
         else:
             result = 'SUCCESS'
         self.check_line(output, 'Tests result: %s' % result)
@@ -604,7 +612,7 @@
         test_failing = self.create_test('failing', code=code)
         tests = [test_ok, test_failing]
 
-        output = self.run_tests(*tests, exitcode=1)
+        output = self.run_tests(*tests, exitcode=2)
         self.check_executed_tests(output, tests, failed=test_failing)
 
     def test_resources(self):
@@ -703,7 +711,7 @@
     def test_interrupted(self):
         code = TEST_INTERRUPTED
         test = self.create_test('sigint', code=code)
-        output = self.run_tests(test, exitcode=1)
+        output = self.run_tests(test, exitcode=130)
         self.check_executed_tests(output, test, omitted=test,
                                   interrupted=True)
 
@@ -732,7 +740,7 @@
                 args = ("--slowest", "-j2", test)
             else:
                 args = ("--slowest", test)
-            output = self.run_tests(*args, exitcode=1)
+            output = self.run_tests(*args, exitcode=130)
             self.check_executed_tests(output, test,
                                       omitted=test, interrupted=True)
 
@@ -772,7 +780,7 @@
                         builtins.__dict__['RUN'] = 1
         """)
         test = self.create_test('forever', code=code)
-        output = self.run_tests('--forever', test, exitcode=1)
+        output = self.run_tests('--forever', test, exitcode=2)
         self.check_executed_tests(output, [test]*3, failed=test)
 
     @unittest.skipUnless(Py_DEBUG, 'need a debug build')
@@ -804,7 +812,7 @@
         filename = 'reflog.txt'
         self.addCleanup(support.unlink, filename)
         output = self.run_tests('--huntrleaks', '3:3:', test,
-                                exitcode=1,
+                                exitcode=2,
                                 stderr=subprocess.STDOUT)
         self.check_executed_tests(output, [test], failed=test)
 
@@ -858,7 +866,7 @@
         ok_test = self.create_test(name="ok")
 
         tests = [crash_test, ok_test]
-        output = self.run_tests("-j2", *tests, exitcode=1)
+        output = self.run_tests("-j2", *tests, exitcode=2)
         self.check_executed_tests(output, tests, failed=crash_test,
                                   randomize=True)
 
@@ -907,6 +915,25 @@
         subset = ['test_method1', 'test_method3']
         self.assertEqual(methods, subset)
 
+    def test_env_changed(self):
+        code = textwrap.dedent("""
+            import unittest
+
+            class Tests(unittest.TestCase):
+                def test_env_changed(self):
+                    open("env_changed", "w").close()
+        """)
+        testname = self.create_test(code=code)
+
+        # don't fail by default
+        output = self.run_tests(testname)
+        self.check_executed_tests(output, [testname], env_changed=testname)
+
+        # fail with --fail-env-changed
+        output = self.run_tests("--fail-env-changed", testname, exitcode=3)
+        self.check_executed_tests(output, [testname], env_changed=testname,
+                                  fail_env_changed=True)
+
 
 if __name__ == '__main__':
     unittest.main()