blob: d7d915d51314da4f9c690c81fc757542c577bed9 [file] [log] [blame]
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00001"""Filename matching with shell patterns.
Guido van Rossum05e52191992-01-12 23:29:29 +00002
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00003fnmatch(FILENAME, PATTERN) matches according to the local convention.
4fnmatchcase(FILENAME, PATTERN) always takes case in account.
Guido van Rossum05e52191992-01-12 23:29:29 +00005
Guido van Rossum7e4b2de1995-01-27 02:41:45 +00006The functions operate by translating the pattern into a regular
7expression. They cache the compiled regular expressions for speed.
8
9The function translate(PATTERN) returns a regular expression
10corresponding to PATTERN. (It does not compile it.)
11"""
Brett Cannoncc143202010-07-23 16:22:25 +000012import os
13import posixpath
Guido van Rossum9694fca1997-10-22 21:00:49 +000014import re
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000015import functools
Guido van Rossum9694fca1997-10-22 21:00:49 +000016
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000017__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
Brett Cannoncc143202010-07-23 16:22:25 +000018
Guido van Rossum762c39e1991-01-01 18:11:14 +000019def fnmatch(name, pat):
Tim Peters88869f92001-01-14 23:36:06 +000020 """Test whether FILENAME matches PATTERN.
21
22 Patterns are Unix shell style:
23
24 * matches everything
25 ? matches any single character
26 [seq] matches any character in seq
27 [!seq] matches any char not in seq
28
29 An initial period in FILENAME is not special.
30 Both FILENAME and PATTERN are first case-normalized
31 if the operating system requires it.
32 If you don't want this, use fnmatchcase(FILENAME, PATTERN).
33 """
Tim Peters88869f92001-01-14 23:36:06 +000034 name = os.path.normcase(name)
35 pat = os.path.normcase(pat)
36 return fnmatchcase(name, pat)
Guido van Rossum7e4b2de1995-01-27 02:41:45 +000037
Raymond Hettingerd0dbb202013-02-17 01:33:37 -080038@functools.lru_cache(maxsize=256, typed=True)
Raymond Hettingerdb848032011-10-20 09:22:10 -070039def _compile_pattern(pat):
40 if isinstance(pat, bytes):
Antoine Pitrou6fdb74f2010-08-13 16:26:40 +000041 pat_str = str(pat, 'ISO-8859-1')
42 res_str = translate(pat_str)
43 res = bytes(res_str, 'ISO-8859-1')
44 else:
45 res = translate(pat)
46 return re.compile(res).match
Brett Cannoncc143202010-07-23 16:22:25 +000047
Martin v. Löwisb5d4d2a2001-06-06 06:24:38 +000048def filter(names, pat):
Brett Cannoncc143202010-07-23 16:22:25 +000049 """Return the subset of the list NAMES that match PAT."""
Guido van Rossumf0af3e32008-10-02 18:55:37 +000050 result = []
51 pat = os.path.normcase(pat)
Raymond Hettingerdb848032011-10-20 09:22:10 -070052 match = _compile_pattern(pat)
Martin v. Löwisb5d4d2a2001-06-06 06:24:38 +000053 if os.path is posixpath:
54 # normcase on posix is NOP. Optimize it away from the loop.
55 for name in names:
56 if match(name):
57 result.append(name)
58 else:
59 for name in names:
60 if match(os.path.normcase(name)):
61 result.append(name)
62 return result
63
Guido van Rossum7e4b2de1995-01-27 02:41:45 +000064def fnmatchcase(name, pat):
Tim Peters88869f92001-01-14 23:36:06 +000065 """Test whether FILENAME matches PATTERN, including case.
66
67 This is a version of fnmatch() which doesn't case-normalize
68 its arguments.
69 """
Raymond Hettingerdb848032011-10-20 09:22:10 -070070 match = _compile_pattern(pat)
Guido van Rossumf0af3e32008-10-02 18:55:37 +000071 return match(name) is not None
Guido van Rossum762c39e1991-01-01 18:11:14 +000072
Brett Cannoncc143202010-07-23 16:22:25 +000073
Guido van Rossum05e52191992-01-12 23:29:29 +000074def translate(pat):
Tim Peters88869f92001-01-14 23:36:06 +000075 """Translate a shell PATTERN to a regular expression.
76
77 There is no way to quote meta-characters.
78 """
79
Tim Petersb9c46a22020-05-05 21:28:24 -050080 STAR = object()
81 res = []
82 add = res.append
Tim Peters88869f92001-01-14 23:36:06 +000083 i, n = 0, len(pat)
Tim Peters88869f92001-01-14 23:36:06 +000084 while i < n:
85 c = pat[i]
86 i = i+1
87 if c == '*':
Tim Petersb9c46a22020-05-05 21:28:24 -050088 # compress consecutive `*` into one
89 if (not res) or res[-1] is not STAR:
90 add(STAR)
Tim Peters88869f92001-01-14 23:36:06 +000091 elif c == '?':
Tim Petersb9c46a22020-05-05 21:28:24 -050092 add('.')
Tim Peters88869f92001-01-14 23:36:06 +000093 elif c == '[':
94 j = i
95 if j < n and pat[j] == '!':
96 j = j+1
97 if j < n and pat[j] == ']':
98 j = j+1
99 while j < n and pat[j] != ']':
100 j = j+1
101 if j >= n:
Tim Petersb9c46a22020-05-05 21:28:24 -0500102 add('\\[')
Tim Peters88869f92001-01-14 23:36:06 +0000103 else:
Serhiy Storchaka23cdbfa2018-02-09 13:30:19 +0200104 stuff = pat[i:j]
105 if '--' not in stuff:
106 stuff = stuff.replace('\\', r'\\')
107 else:
108 chunks = []
109 k = i+2 if pat[i] == '!' else i+1
110 while True:
111 k = pat.find('-', k, j)
112 if k < 0:
113 break
114 chunks.append(pat[i:k])
115 i = k+1
116 k = k+3
117 chunks.append(pat[i:j])
118 # Escape backslashes and hyphens for set difference (--).
119 # Hyphens that create ranges shouldn't be escaped.
120 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
121 for s in chunks)
122 # Escape set operations (&&, ~~ and ||).
123 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
Tim Peters88869f92001-01-14 23:36:06 +0000124 i = j+1
125 if stuff[0] == '!':
Fred Drake46d9fda2001-03-21 18:05:48 +0000126 stuff = '^' + stuff[1:]
Serhiy Storchaka23cdbfa2018-02-09 13:30:19 +0200127 elif stuff[0] in ('^', '['):
Fred Drake46d9fda2001-03-21 18:05:48 +0000128 stuff = '\\' + stuff
Tim Petersb9c46a22020-05-05 21:28:24 -0500129 add(f'[{stuff}]')
Tim Peters88869f92001-01-14 23:36:06 +0000130 else:
Tim Petersb9c46a22020-05-05 21:28:24 -0500131 add(re.escape(c))
132 assert i == n
133
134 # Deal with STARs.
135 inp = res
136 res = []
137 add = res.append
138 i, n = 0, len(inp)
139 # Fixed pieces at the start?
140 while i < n and inp[i] is not STAR:
141 add(inp[i])
142 i += 1
143 # Now deal with STAR fixed STAR fixed ...
144 # For an interior `STAR fixed` pairing, we want to do a minimal
145 # .*? match followed by `fixed`, with no possibility of backtracking.
146 # We can't spell that directly, but can trick it into working by matching
147 # .*?fixed
148 # in a lookahead assertion, save the matched part in a group, then
149 # consume that group via a backreference. If the overall match fails,
150 # the lookahead assertion won't try alternatives. So the translation is:
151 # (?=(P<name>.*?fixed))(?P=name)
152 # Group names are created as needed: g1, g2, g3, ...
153 groupnum = 0
154 while i < n:
155 assert inp[i] is STAR
156 i += 1
157 if i == n:
158 add(".*")
159 break
160 assert inp[i] is not STAR
161 fixed = []
162 while i < n and inp[i] is not STAR:
163 fixed.append(inp[i])
164 i += 1
165 fixed = "".join(fixed)
166 if i == n:
167 add(".*")
168 add(fixed)
169 else:
170 groupnum += 1
171 add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
172 assert i == n
173 res = "".join(res)
174 return fr'(?s:{res})\Z'