bpo-40480 "fnmatch" exponential execution time (GH-19908)

bpo-40480:  create different regexps in the presence of multiple `*`
patterns to prevent fnmatch() from taking exponential time.
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index b98e641..d7d915d 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -77,15 +77,19 @@
     There is no way to quote meta-characters.
     """
 
+    STAR = object()
+    res = []
+    add = res.append
     i, n = 0, len(pat)
-    res = ''
     while i < n:
         c = pat[i]
         i = i+1
         if c == '*':
-            res = res + '.*'
+            # compress consecutive `*` into one
+            if (not res) or res[-1] is not STAR:
+                add(STAR)
         elif c == '?':
-            res = res + '.'
+            add('.')
         elif c == '[':
             j = i
             if j < n and pat[j] == '!':
@@ -95,7 +99,7 @@
             while j < n and pat[j] != ']':
                 j = j+1
             if j >= n:
-                res = res + '\\['
+                add('\\[')
             else:
                 stuff = pat[i:j]
                 if '--' not in stuff:
@@ -122,7 +126,49 @@
                     stuff = '^' + stuff[1:]
                 elif stuff[0] in ('^', '['):
                     stuff = '\\' + stuff
-                res = '%s[%s]' % (res, stuff)
+                add(f'[{stuff}]')
         else:
-            res = res + re.escape(c)
-    return r'(?s:%s)\Z' % res
+            add(re.escape(c))
+    assert i == n
+
+    # Deal with STARs.
+    inp = res
+    res = []
+    add = res.append
+    i, n = 0, len(inp)
+    # Fixed pieces at the start?
+    while i < n and inp[i] is not STAR:
+        add(inp[i])
+        i += 1
+    # Now deal with STAR fixed STAR fixed ...
+    # For an interior `STAR fixed` pairing, we want to do a minimal
+    # .*? match followed by `fixed`, with no possibility of backtracking.
+    # We can't spell that directly, but can trick it into working by matching
+    #    .*?fixed
+    # in a lookahead assertion, save the matched part in a group, then
+    # consume that group via a backreference. If the overall match fails,
+    # the lookahead assertion won't try alternatives. So the translation is:
+    #     (?=(P<name>.*?fixed))(?P=name)
+    # Group names are created as needed: g1, g2, g3, ...
+    groupnum = 0
+    while i < n:
+        assert inp[i] is STAR
+        i += 1
+        if i == n:
+            add(".*")
+            break
+        assert inp[i] is not STAR
+        fixed = []
+        while i < n and inp[i] is not STAR:
+            fixed.append(inp[i])
+            i += 1
+        fixed = "".join(fixed)
+        if i == n:
+            add(".*")
+            add(fixed)
+        else:
+            groupnum += 1
+            add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
+    assert i == n
+    res = "".join(res)
+    return fr'(?s:{res})\Z'
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 55f9f0d..4c17306 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -45,6 +45,13 @@
         check('\nfoo', 'foo*', False)
         check('\n', '*')
 
+    def test_slow_fnmatch(self):
+        check = self.check_match
+        check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
+        # The next "takes forever" if the regexp translation is
+        # straightforward.  See bpo-40480.
+        check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
+
     def test_mix_bytes_str(self):
         self.assertRaises(TypeError, fnmatch, 'test', b'*')
         self.assertRaises(TypeError, fnmatch, b'test', '*')
@@ -107,6 +114,16 @@
         self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
         self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
         self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
+        # from the docs
+        self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
+        # squash consecutive stars
+        self.assertEqual(translate('*********'), r'(?s:.*)\Z')
+        self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
+        self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
+        self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
+        # fancy translation to prevent exponential-time match failure
+        self.assertEqual(translate('**a*a****a'),
+             r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
 
 
 class FilterTestCase(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst
new file mode 100644
index 0000000..d046b14
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst
@@ -0,0 +1 @@
+``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters.  This was repaired by generating more elaborate regular expressions to avoid futile backtracking.
\ No newline at end of file