blob: 7c52c23067d40f3e808b160647ff3537823325ec [file] [log] [blame]
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00001"""Filename matching with shell patterns.
Guido van Rossum05e52191992-01-12 23:29:29 +00002
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00003fnmatch(FILENAME, PATTERN) matches according to the local convention.
4fnmatchcase(FILENAME, PATTERN) always takes case in account.
Guido van Rossum05e52191992-01-12 23:29:29 +00005
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00006The functions operate by translating the pattern into a regular
7expression. They cache the compiled regular expressions for speed.
8
9The function translate(PATTERN) returns a regular expression
10corresponding to PATTERN. (It does not compile it.)
11"""
Brett Cannoncc143202010-07-23 16:22:25 +000012import os
13import posixpath
Guido van Rossum9694fca1997-10-22 21:00:49 +000014import re
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000015import functools
Guido van Rossum9694fca1997-10-22 21:00:49 +000016
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000017__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
Brett Cannoncc143202010-07-23 16:22:25 +000018
Tim Petersb1b4c792020-05-11 21:19:20 -050019# Build a thread-safe incrementing counter to help create unique regexp group
20# names across calls.
21from itertools import count
22_nextgroupnum = count().__next__
23del count
24
Guido van Rossum762c39e1991-01-01 18:11:14 +000025def fnmatch(name, pat):
Tim Peters88869f92001-01-14 23:36:06 +000026 """Test whether FILENAME matches PATTERN.
27
28 Patterns are Unix shell style:
29
30 * matches everything
31 ? matches any single character
32 [seq] matches any character in seq
33 [!seq] matches any char not in seq
34
35 An initial period in FILENAME is not special.
36 Both FILENAME and PATTERN are first case-normalized
37 if the operating system requires it.
38 If you don't want this, use fnmatchcase(FILENAME, PATTERN).
39 """
Tim Peters88869f92001-01-14 23:36:06 +000040 name = os.path.normcase(name)
41 pat = os.path.normcase(pat)
42 return fnmatchcase(name, pat)
Guido van Rossum7e4b2de1995-01-27 02:41:45 +000043
Raymond Hettingerd0dbb202013-02-17 01:33:37 -080044@functools.lru_cache(maxsize=256, typed=True)
Raymond Hettingerdb848032011-10-20 09:22:10 -070045def _compile_pattern(pat):
46 if isinstance(pat, bytes):
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000047 pat_str = str(pat, 'ISO-8859-1')
48 res_str = translate(pat_str)
49 res = bytes(res_str, 'ISO-8859-1')
50 else:
51 res = translate(pat)
52 return re.compile(res).match
Brett Cannoncc143202010-07-23 16:22:25 +000053
Martin v. Löwisb5d4d2a2001-06-06 06:24:38 +000054def filter(names, pat):
Andre Delfinoe8d22642020-12-18 16:10:20 -030055 """Construct a list from those elements of the iterable NAMES that match PAT."""
Guido van Rossumf0af3e32008-10-02 18:55:37 +000056 result = []
57 pat = os.path.normcase(pat)
Raymond Hettingerdb848032011-10-20 09:22:10 -070058 match = _compile_pattern(pat)
Martin v. Löwisb5d4d2a2001-06-06 06:24:38 +000059 if os.path is posixpath:
60 # normcase on posix is NOP. Optimize it away from the loop.
61 for name in names:
62 if match(name):
63 result.append(name)
64 else:
65 for name in names:
66 if match(os.path.normcase(name)):
67 result.append(name)
68 return result
69
Guido van Rossum7e4b2de1995-01-27 02:41:45 +000070def fnmatchcase(name, pat):
Tim Peters88869f92001-01-14 23:36:06 +000071 """Test whether FILENAME matches PATTERN, including case.
72
73 This is a version of fnmatch() which doesn't case-normalize
74 its arguments.
75 """
Raymond Hettingerdb848032011-10-20 09:22:10 -070076 match = _compile_pattern(pat)
Guido van Rossumf0af3e32008-10-02 18:55:37 +000077 return match(name) is not None
Guido van Rossum762c39e1991-01-01 18:11:14 +000078
Brett Cannoncc143202010-07-23 16:22:25 +000079
Guido van Rossum05e52191992-01-12 23:29:29 +000080def translate(pat):
Tim Peters88869f92001-01-14 23:36:06 +000081 """Translate a shell PATTERN to a regular expression.
82
83 There is no way to quote meta-characters.
84 """
85
Tim Petersb9c46a22020-05-05 21:28:24 -050086 STAR = object()
87 res = []
88 add = res.append
Tim Peters88869f92001-01-14 23:36:06 +000089 i, n = 0, len(pat)
Tim Peters88869f92001-01-14 23:36:06 +000090 while i < n:
91 c = pat[i]
92 i = i+1
93 if c == '*':
Tim Petersb9c46a22020-05-05 21:28:24 -050094 # compress consecutive `*` into one
95 if (not res) or res[-1] is not STAR:
96 add(STAR)
Tim Peters88869f92001-01-14 23:36:06 +000097 elif c == '?':
Tim Petersb9c46a22020-05-05 21:28:24 -050098 add('.')
Tim Peters88869f92001-01-14 23:36:06 +000099 elif c == '[':
100 j = i
101 if j < n and pat[j] == '!':
102 j = j+1
103 if j < n and pat[j] == ']':
104 j = j+1
105 while j < n and pat[j] != ']':
106 j = j+1
107 if j >= n:
Tim Petersb9c46a22020-05-05 21:28:24 -0500108 add('\\[')
Tim Peters88869f92001-01-14 23:36:06 +0000109 else:
Serhiy Storchaka23cdbfa2018-02-09 13:30:19 +0200110 stuff = pat[i:j]
111 if '--' not in stuff:
112 stuff = stuff.replace('\\', r'\\')
113 else:
114 chunks = []
115 k = i+2 if pat[i] == '!' else i+1
116 while True:
117 k = pat.find('-', k, j)
118 if k < 0:
119 break
120 chunks.append(pat[i:k])
121 i = k+1
122 k = k+3
123 chunks.append(pat[i:j])
124 # Escape backslashes and hyphens for set difference (--).
125 # Hyphens that create ranges shouldn't be escaped.
126 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
127 for s in chunks)
128 # Escape set operations (&&, ~~ and ||).
129 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
Tim Peters88869f92001-01-14 23:36:06 +0000130 i = j+1
131 if stuff[0] == '!':
Fred Drake46d9fda2001-03-21 18:05:48 +0000132 stuff = '^' + stuff[1:]
Serhiy Storchaka23cdbfa2018-02-09 13:30:19 +0200133 elif stuff[0] in ('^', '['):
Fred Drake46d9fda2001-03-21 18:05:48 +0000134 stuff = '\\' + stuff
Tim Petersb9c46a22020-05-05 21:28:24 -0500135 add(f'[{stuff}]')
Tim Peters88869f92001-01-14 23:36:06 +0000136 else:
Tim Petersb9c46a22020-05-05 21:28:24 -0500137 add(re.escape(c))
138 assert i == n
139
140 # Deal with STARs.
141 inp = res
142 res = []
143 add = res.append
144 i, n = 0, len(inp)
145 # Fixed pieces at the start?
146 while i < n and inp[i] is not STAR:
147 add(inp[i])
148 i += 1
149 # Now deal with STAR fixed STAR fixed ...
150 # For an interior `STAR fixed` pairing, we want to do a minimal
151 # .*? match followed by `fixed`, with no possibility of backtracking.
152 # We can't spell that directly, but can trick it into working by matching
153 # .*?fixed
154 # in a lookahead assertion, save the matched part in a group, then
155 # consume that group via a backreference. If the overall match fails,
156 # the lookahead assertion won't try alternatives. So the translation is:
Tim Petersb1b4c792020-05-11 21:19:20 -0500157 # (?=(?P<name>.*?fixed))(?P=name)
158 # Group names are created as needed: g0, g1, g2, ...
159 # The numbers are obtained from _nextgroupnum() to ensure they're unique
160 # across calls and across threads. This is because people rely on the
161 # undocumented ability to join multiple translate() results together via
162 # "|" to build large regexps matching "one of many" shell patterns.
Tim Petersb9c46a22020-05-05 21:28:24 -0500163 while i < n:
164 assert inp[i] is STAR
165 i += 1
166 if i == n:
167 add(".*")
168 break
169 assert inp[i] is not STAR
170 fixed = []
171 while i < n and inp[i] is not STAR:
172 fixed.append(inp[i])
173 i += 1
174 fixed = "".join(fixed)
175 if i == n:
176 add(".*")
177 add(fixed)
178 else:
Tim Petersb1b4c792020-05-11 21:19:20 -0500179 groupnum = _nextgroupnum()
Tim Petersb9c46a22020-05-05 21:28:24 -0500180 add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
181 assert i == n
182 res = "".join(res)
183 return fr'(?s:{res})\Z'