| """Filename matching with shell patterns. |
| |
| fnmatch(FILENAME, PATTERN) matches according to the local convention. |
| fnmatchcase(FILENAME, PATTERN) always takes case in account. |
| |
| The functions operate by translating the pattern into a regular |
| expression. They cache the compiled regular expressions for speed. |
| |
| The function translate(PATTERN) returns a regular expression |
| corresponding to PATTERN. (It does not compile it.) |
| """ |
| import os |
| import posixpath |
| import re |
| import functools |
| |
| __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] |
| |
| # Build a thread-safe incrementing counter to help create unique regexp group |
| # names across calls. |
| from itertools import count |
| _nextgroupnum = count().__next__ |
| del count |
| |
| def fnmatch(name, pat): |
| """Test whether FILENAME matches PATTERN. |
| |
| Patterns are Unix shell style: |
| |
| * matches everything |
| ? matches any single character |
| [seq] matches any character in seq |
| [!seq] matches any char not in seq |
| |
| An initial period in FILENAME is not special. |
| Both FILENAME and PATTERN are first case-normalized |
| if the operating system requires it. |
| If you don't want this, use fnmatchcase(FILENAME, PATTERN). |
| """ |
| name = os.path.normcase(name) |
| pat = os.path.normcase(pat) |
| return fnmatchcase(name, pat) |
| |
| @functools.lru_cache(maxsize=256, typed=True) |
| def _compile_pattern(pat): |
| if isinstance(pat, bytes): |
| pat_str = str(pat, 'ISO-8859-1') |
| res_str = translate(pat_str) |
| res = bytes(res_str, 'ISO-8859-1') |
| else: |
| res = translate(pat) |
| return re.compile(res).match |
| |
| def filter(names, pat): |
| """Return the subset of the list NAMES that match PAT.""" |
| result = [] |
| pat = os.path.normcase(pat) |
| match = _compile_pattern(pat) |
| if os.path is posixpath: |
| # normcase on posix is NOP. Optimize it away from the loop. |
| for name in names: |
| if match(name): |
| result.append(name) |
| else: |
| for name in names: |
| if match(os.path.normcase(name)): |
| result.append(name) |
| return result |
| |
| def fnmatchcase(name, pat): |
| """Test whether FILENAME matches PATTERN, including case. |
| |
| This is a version of fnmatch() which doesn't case-normalize |
| its arguments. |
| """ |
| match = _compile_pattern(pat) |
| return match(name) is not None |
| |
| |
| def translate(pat): |
| """Translate a shell PATTERN to a regular expression. |
| |
| There is no way to quote meta-characters. |
| """ |
| |
| STAR = object() |
| res = [] |
| add = res.append |
| i, n = 0, len(pat) |
| while i < n: |
| c = pat[i] |
| i = i+1 |
| if c == '*': |
| # compress consecutive `*` into one |
| if (not res) or res[-1] is not STAR: |
| add(STAR) |
| elif c == '?': |
| add('.') |
| elif c == '[': |
| j = i |
| if j < n and pat[j] == '!': |
| j = j+1 |
| if j < n and pat[j] == ']': |
| j = j+1 |
| while j < n and pat[j] != ']': |
| j = j+1 |
| if j >= n: |
| add('\\[') |
| else: |
| stuff = pat[i:j] |
| if '--' not in stuff: |
| stuff = stuff.replace('\\', r'\\') |
| else: |
| chunks = [] |
| k = i+2 if pat[i] == '!' else i+1 |
| while True: |
| k = pat.find('-', k, j) |
| if k < 0: |
| break |
| chunks.append(pat[i:k]) |
| i = k+1 |
| k = k+3 |
| chunks.append(pat[i:j]) |
| # Escape backslashes and hyphens for set difference (--). |
| # Hyphens that create ranges shouldn't be escaped. |
| stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') |
| for s in chunks) |
| # Escape set operations (&&, ~~ and ||). |
| stuff = re.sub(r'([&~|])', r'\\\1', stuff) |
| i = j+1 |
| if stuff[0] == '!': |
| stuff = '^' + stuff[1:] |
| elif stuff[0] in ('^', '['): |
| stuff = '\\' + stuff |
| add(f'[{stuff}]') |
| else: |
| add(re.escape(c)) |
| assert i == n |
| |
| # Deal with STARs. |
| inp = res |
| res = [] |
| add = res.append |
| i, n = 0, len(inp) |
| # Fixed pieces at the start? |
| while i < n and inp[i] is not STAR: |
| add(inp[i]) |
| i += 1 |
| # Now deal with STAR fixed STAR fixed ... |
| # For an interior `STAR fixed` pairing, we want to do a minimal |
| # .*? match followed by `fixed`, with no possibility of backtracking. |
| # We can't spell that directly, but can trick it into working by matching |
| # .*?fixed |
| # in a lookahead assertion, save the matched part in a group, then |
| # consume that group via a backreference. If the overall match fails, |
| # the lookahead assertion won't try alternatives. So the translation is: |
| # (?=(?P<name>.*?fixed))(?P=name) |
| # Group names are created as needed: g0, g1, g2, ... |
| # The numbers are obtained from _nextgroupnum() to ensure they're unique |
| # across calls and across threads. This is because people rely on the |
| # undocumented ability to join multiple translate() results together via |
| # "|" to build large regexps matching "one of many" shell patterns. |
| while i < n: |
| assert inp[i] is STAR |
| i += 1 |
| if i == n: |
| add(".*") |
| break |
| assert inp[i] is not STAR |
| fixed = [] |
| while i < n and inp[i] is not STAR: |
| fixed.append(inp[i]) |
| i += 1 |
| fixed = "".join(fixed) |
| if i == n: |
| add(".*") |
| add(fixed) |
| else: |
| groupnum = _nextgroupnum() |
| add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})") |
| assert i == n |
| res = "".join(res) |
| return fr'(?s:{res})\Z' |