blob: 8dae6941ec7709cce1843fa0a6b4734efda19f03 [file] [log] [blame]
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +00001//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/GlobPattern.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/Optional.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18
19using namespace llvm;
20
21static bool hasWildcard(StringRef S) {
Jordan Rupprecht48993d52019-10-17 18:09:05 +000022 return S.find_first_of("?*[\\") != StringRef::npos;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000023}
24
25// Expands character ranges and returns a bitmap.
26// For example, "a-cf-hz" is expanded to "abcfghz".
27static Expected<BitVector> expand(StringRef S, StringRef Original) {
28 BitVector BV(256, false);
29
30 // Expand X-Y.
31 for (;;) {
32 if (S.size() < 3)
33 break;
34
George Rimare36d7a62017-07-31 09:26:50 +000035 uint8_t Start = S[0];
36 uint8_t End = S[2];
37
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000038 // If it doesn't start with something like X-Y,
39 // consume the first character and proceed.
40 if (S[1] != '-') {
George Rimare36d7a62017-07-31 09:26:50 +000041 BV[Start] = true;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000042 S = S.substr(1);
43 continue;
44 }
45
46 // It must be in the form of X-Y.
47 // Validate it and then interpret the range.
George Rimare36d7a62017-07-31 09:26:50 +000048 if (Start > End)
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000049 return make_error<StringError>("invalid glob pattern: " + Original,
50 errc::invalid_argument);
51
George Rimare36d7a62017-07-31 09:26:50 +000052 for (int C = Start; C <= End; ++C)
53 BV[(uint8_t)C] = true;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000054 S = S.substr(3);
55 }
56
57 for (char C : S)
George Rimare36d7a62017-07-31 09:26:50 +000058 BV[(uint8_t)C] = true;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000059 return BV;
60}
61
62// This is a scanner for the glob pattern.
Jordan Rupprecht48993d52019-10-17 18:09:05 +000063// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
64// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
65// equivalent to "[^<chars>]"), or a non-meta character.
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000066// This function returns the first token in S.
67static Expected<BitVector> scan(StringRef &S, StringRef Original) {
68 switch (S[0]) {
69 case '*':
70 S = S.substr(1);
71 // '*' is represented by an empty bitvector.
72 // All other bitvectors are 256-bit long.
73 return BitVector();
74 case '?':
75 S = S.substr(1);
76 return BitVector(256, true);
77 case '[': {
Jordan Rupprecht48993d52019-10-17 18:09:05 +000078 // ']' is allowed as the first character of a character class. '[]' is
79 // invalid. So, just skip the first character.
80 size_t End = S.find(']', 2);
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000081 if (End == StringRef::npos)
82 return make_error<StringError>("invalid glob pattern: " + Original,
83 errc::invalid_argument);
84
85 StringRef Chars = S.substr(1, End - 1);
86 S = S.substr(End + 1);
Jordan Rupprecht48993d52019-10-17 18:09:05 +000087 if (Chars.startswith("^") || Chars.startswith("!")) {
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +000088 Expected<BitVector> BV = expand(Chars.substr(1), Original);
89 if (!BV)
90 return BV.takeError();
91 return BV->flip();
92 }
93 return expand(Chars, Original);
94 }
Jordan Rupprecht48993d52019-10-17 18:09:05 +000095 case '\\':
96 // Eat this character and fall through below to treat it like a non-meta
97 // character.
98 S = S.substr(1);
99 LLVM_FALLTHROUGH;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +0000100 default:
101 BitVector BV(256, false);
George Rimare36d7a62017-07-31 09:26:50 +0000102 BV[(uint8_t)S[0]] = true;
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +0000103 S = S.substr(1);
104 return BV;
105 }
106}
107
108Expected<GlobPattern> GlobPattern::create(StringRef S) {
109 GlobPattern Pat;
110
111 // S doesn't contain any metacharacter,
112 // so the regular string comparison should work.
113 if (!hasWildcard(S)) {
114 Pat.Exact = S;
115 return Pat;
116 }
117
Jordan Rupprecht48993d52019-10-17 18:09:05 +0000118 // S is something like "foo*", and the "* is not escaped. We can use
119 // startswith().
120 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +0000121 Pat.Prefix = S.drop_back();
122 return Pat;
123 }
124
125 // S is something like "*foo". We can use endswith().
126 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
127 Pat.Suffix = S.drop_front();
128 return Pat;
129 }
130
131 // Otherwise, we need to do real glob pattern matching.
132 // Parse the pattern now.
133 StringRef Original = S;
134 while (!S.empty()) {
135 Expected<BitVector> BV = scan(S, Original);
136 if (!BV)
137 return BV.takeError();
138 Pat.Tokens.push_back(*BV);
139 }
140 return Pat;
141}
142
143bool GlobPattern::match(StringRef S) const {
144 if (Exact)
145 return S == *Exact;
146 if (Prefix)
147 return S.startswith(*Prefix);
148 if (Suffix)
149 return S.endswith(*Suffix);
150 return matchOne(Tokens, S);
151}
152
153// Runs glob pattern Pats against string S.
154bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
155 for (;;) {
156 if (Pats.empty())
157 return S.empty();
158
159 // If Pats[0] is '*', try to match Pats[1..] against all possible
160 // tail strings of S to see at least one pattern succeeds.
161 if (Pats[0].size() == 0) {
162 Pats = Pats.slice(1);
163 if (Pats.empty())
164 // Fast path. If a pattern is '*', it matches anything.
165 return true;
166 for (size_t I = 0, E = S.size(); I < E; ++I)
167 if (matchOne(Pats, S.substr(I)))
168 return true;
169 return false;
170 }
171
172 // If Pats[0] is not '*', it must consume one character.
George Rimare36d7a62017-07-31 09:26:50 +0000173 if (S.empty() || !Pats[0][(uint8_t)S[0]])
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +0000174 return false;
175 Pats = Pats.slice(1);
176 S = S.substr(1);
177 }
178}