blob: 8ee2febeeea13d022be9d38a2b00c5437cce0e04 [file] [log] [blame]
Rui Ueyamaf7a7ab52016-12-20 23:09:09 +00001//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a glob pattern matcher.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/GlobPattern.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Optional.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/Support/Errc.h"
19
20using namespace llvm;
21
22static bool hasWildcard(StringRef S) {
23 return S.find_first_of("?*[") != StringRef::npos;
24}
25
26// Expands character ranges and returns a bitmap.
27// For example, "a-cf-hz" is expanded to "abcfghz".
28static Expected<BitVector> expand(StringRef S, StringRef Original) {
29 BitVector BV(256, false);
30
31 // Expand X-Y.
32 for (;;) {
33 if (S.size() < 3)
34 break;
35
36 // If it doesn't start with something like X-Y,
37 // consume the first character and proceed.
38 if (S[1] != '-') {
39 BV[S[0]] = true;
40 S = S.substr(1);
41 continue;
42 }
43
44 // It must be in the form of X-Y.
45 // Validate it and then interpret the range.
46 if (S[0] > S[2])
47 return make_error<StringError>("invalid glob pattern: " + Original,
48 errc::invalid_argument);
49
50 for (int C = S[0]; C <= S[2]; ++C)
51 BV[C] = true;
52 S = S.substr(3);
53 }
54
55 for (char C : S)
56 BV[C] = true;
57 return BV;
58}
59
60// This is a scanner for the glob pattern.
61// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
62// (which is a negative form of "[<chars>]"), or a non-meta character.
63// This function returns the first token in S.
64static Expected<BitVector> scan(StringRef &S, StringRef Original) {
65 switch (S[0]) {
66 case '*':
67 S = S.substr(1);
68 // '*' is represented by an empty bitvector.
69 // All other bitvectors are 256-bit long.
70 return BitVector();
71 case '?':
72 S = S.substr(1);
73 return BitVector(256, true);
74 case '[': {
75 size_t End = S.find(']', 1);
76 if (End == StringRef::npos)
77 return make_error<StringError>("invalid glob pattern: " + Original,
78 errc::invalid_argument);
79
80 StringRef Chars = S.substr(1, End - 1);
81 S = S.substr(End + 1);
82 if (Chars.startswith("^")) {
83 Expected<BitVector> BV = expand(Chars.substr(1), Original);
84 if (!BV)
85 return BV.takeError();
86 return BV->flip();
87 }
88 return expand(Chars, Original);
89 }
90 default:
91 BitVector BV(256, false);
92 BV[S[0]] = true;
93 S = S.substr(1);
94 return BV;
95 }
96}
97
98Expected<GlobPattern> GlobPattern::create(StringRef S) {
99 GlobPattern Pat;
100
101 // S doesn't contain any metacharacter,
102 // so the regular string comparison should work.
103 if (!hasWildcard(S)) {
104 Pat.Exact = S;
105 return Pat;
106 }
107
108 // S is something like "foo*". We can use startswith().
109 if (S.endswith("*") && !hasWildcard(S.drop_back())) {
110 Pat.Prefix = S.drop_back();
111 return Pat;
112 }
113
114 // S is something like "*foo". We can use endswith().
115 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
116 Pat.Suffix = S.drop_front();
117 return Pat;
118 }
119
120 // Otherwise, we need to do real glob pattern matching.
121 // Parse the pattern now.
122 StringRef Original = S;
123 while (!S.empty()) {
124 Expected<BitVector> BV = scan(S, Original);
125 if (!BV)
126 return BV.takeError();
127 Pat.Tokens.push_back(*BV);
128 }
129 return Pat;
130}
131
132bool GlobPattern::match(StringRef S) const {
133 if (Exact)
134 return S == *Exact;
135 if (Prefix)
136 return S.startswith(*Prefix);
137 if (Suffix)
138 return S.endswith(*Suffix);
139 return matchOne(Tokens, S);
140}
141
142// Runs glob pattern Pats against string S.
143bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
144 for (;;) {
145 if (Pats.empty())
146 return S.empty();
147
148 // If Pats[0] is '*', try to match Pats[1..] against all possible
149 // tail strings of S to see at least one pattern succeeds.
150 if (Pats[0].size() == 0) {
151 Pats = Pats.slice(1);
152 if (Pats.empty())
153 // Fast path. If a pattern is '*', it matches anything.
154 return true;
155 for (size_t I = 0, E = S.size(); I < E; ++I)
156 if (matchOne(Pats, S.substr(I)))
157 return true;
158 return false;
159 }
160
161 // If Pats[0] is not '*', it must consume one character.
162 if (S.empty() || !Pats[0][S[0]])
163 return false;
164 Pats = Pats.slice(1);
165 S = S.substr(1);
166 }
167}