blob: e8365fc248af03e635242482128d784ae77178d5 [file] [log] [blame]
Chris Lattneree3c74f2009-07-08 18:44:05 +00001//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content. This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
Chandler Carruth91d19d82012-12-04 10:37:14 +000019#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringMap.h"
Matt Arsenault13df4622013-11-10 02:04:09 +000022#include "llvm/ADT/StringSet.h"
Chris Lattneree3c74f2009-07-08 18:44:05 +000023#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
Chris Lattnerf08d2db2009-09-24 21:47:32 +000026#include "llvm/Support/Regex.h"
Chandler Carruth91d19d82012-12-04 10:37:14 +000027#include "llvm/Support/Signals.h"
Chris Lattneree3c74f2009-07-08 18:44:05 +000028#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
Chris Lattner8879e062009-09-27 07:56:52 +000030#include <algorithm>
Will Dietz981af002013-10-12 00:55:57 +000031#include <cctype>
Eli Benderskye8b8f1b2012-12-01 21:54:48 +000032#include <map>
33#include <string>
Rafael Espindolaa6e9c3e2014-06-12 17:38:55 +000034#include <system_error>
Eli Benderskye8b8f1b2012-12-01 21:54:48 +000035#include <vector>
Chris Lattneree3c74f2009-07-08 18:44:05 +000036using namespace llvm;
37
38static cl::opt<std::string>
39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41static cl::opt<std::string>
42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
Matt Arsenault13df4622013-11-10 02:04:09 +000045static cl::list<std::string>
46CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
Chris Lattneree3c74f2009-07-08 18:44:05 +000048
Chris Lattner2c3e5cd2009-07-11 18:58:15 +000049static cl::opt<bool>
50NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +000053static cl::list<std::string> ImplicitCheckNot(
54 "implicit-check-not",
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
59
Justin Bogner1b9f9362014-08-07 18:40:37 +000060static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
64
Matt Arsenault13df4622013-11-10 02:04:09 +000065typedef cl::list<std::string>::const_iterator prefix_iterator;
66
Chris Lattner74d50732009-09-24 20:39:13 +000067//===----------------------------------------------------------------------===//
68// Pattern Handling Code.
69//===----------------------------------------------------------------------===//
70
Matt Arsenault38820972013-09-17 22:30:02 +000071namespace Check {
72 enum CheckType {
73 CheckNone = 0,
74 CheckPlain,
75 CheckNext,
76 CheckNot,
77 CheckDAG,
78 CheckLabel,
79
80 /// MatchEOF - When set, this pattern only matches the end of file. This is
81 /// used for trailing CHECK-NOTs.
82 CheckEOF
83 };
84}
85
Chris Lattner3b40b442009-09-24 20:25:55 +000086class Pattern {
Chris Lattner0a4c44b2009-09-25 17:29:36 +000087 SMLoc PatternLoc;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000088
Matt Arsenault38820972013-09-17 22:30:02 +000089 Check::CheckType CheckTy;
Michael Liao91a1b2c2013-05-14 20:34:12 +000090
Chris Lattnerb16ab0c2009-09-25 17:23:43 +000091 /// FixedStr - If non-empty, this pattern is a fixed string match with the
92 /// specified fixed string.
Chris Lattner221460e2009-09-25 17:09:12 +000093 StringRef FixedStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000094
Chris Lattnerb16ab0c2009-09-25 17:23:43 +000095 /// RegEx - If non-empty, this is a regex pattern.
96 std::string RegExStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000097
Alexander Kornienko92987fb2012-11-14 21:07:37 +000098 /// \brief Contains the number of line this pattern is in.
99 unsigned LineNumber;
100
Chris Lattner8879e062009-09-27 07:56:52 +0000101 /// VariableUses - Entries in this vector map to uses of a variable in the
102 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
103 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
104 /// value of bar at offset 3.
105 std::vector<std::pair<StringRef, unsigned> > VariableUses;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000106
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000107 /// VariableDefs - Maps definitions of variables to their parenthesized
108 /// capture numbers.
109 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
110 std::map<StringRef, unsigned> VariableDefs;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000111
Chris Lattner3b40b442009-09-24 20:25:55 +0000112public:
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000113
Matt Arsenault38820972013-09-17 22:30:02 +0000114 Pattern(Check::CheckType Ty)
115 : CheckTy(Ty) { }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000116
Michael Liao0b707eb2013-04-25 21:31:34 +0000117 /// getLoc - Return the location in source code.
118 SMLoc getLoc() const { return PatternLoc; }
119
Matt Arsenault13df4622013-11-10 02:04:09 +0000120 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
121 /// which prefix is being matched, SM provides the SourceMgr used for error
122 /// reports, and LineNumber is the line number in the input file from which
123 /// the pattern string was read. Returns true in case of an error, false
124 /// otherwise.
125 bool ParsePattern(StringRef PatternStr,
126 StringRef Prefix,
127 SourceMgr &SM,
128 unsigned LineNumber);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000129
Chris Lattner3b40b442009-09-24 20:25:55 +0000130 /// Match - Match the pattern string against the input buffer Buffer. This
131 /// returns the position that is matched or npos if there is no match. If
132 /// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattner8879e062009-09-27 07:56:52 +0000133 ///
134 /// The VariableTable StringMap provides the current values of filecheck
135 /// variables and is updated if this match defines new values.
136 size_t Match(StringRef Buffer, size_t &MatchLen,
137 StringMap<StringRef> &VariableTable) const;
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000138
139 /// PrintFailureInfo - Print additional information about a failure to match
140 /// involving this pattern.
141 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
142 const StringMap<StringRef> &VariableTable) const;
143
Stephen Linf8bd2e52013-07-12 14:51:05 +0000144 bool hasVariable() const { return !(VariableUses.empty() &&
145 VariableDefs.empty()); }
146
Matt Arsenault38820972013-09-17 22:30:02 +0000147 Check::CheckType getCheckTy() const { return CheckTy; }
Michael Liao91a1b2c2013-05-14 20:34:12 +0000148
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000149private:
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000150 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
151 void AddBackrefToRegEx(unsigned BackrefNum);
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000152
153 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
154 /// matching this pattern at the start of \arg Buffer; a distance of zero
155 /// should correspond to a perfect match.
156 unsigned ComputeMatchDistance(StringRef Buffer,
157 const StringMap<StringRef> &VariableTable) const;
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000158
159 /// \brief Evaluates expression and stores the result to \p Value.
160 /// \return true on success. false when the expression has invalid syntax.
161 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000162
163 /// \brief Finds the closing sequence of a regex variable usage or
164 /// definition. Str has to point in the beginning of the definition
165 /// (right after the opening sequence).
166 /// \return offset of the closing sequence within Str, or npos if it was not
167 /// found.
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000168 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
Chris Lattner3b40b442009-09-24 20:25:55 +0000169};
170
Chris Lattner8879e062009-09-27 07:56:52 +0000171
Matt Arsenault13df4622013-11-10 02:04:09 +0000172bool Pattern::ParsePattern(StringRef PatternStr,
173 StringRef Prefix,
174 SourceMgr &SM,
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000175 unsigned LineNumber) {
176 this->LineNumber = LineNumber;
Chris Lattner0a4c44b2009-09-25 17:29:36 +0000177 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000178
Chris Lattner74d50732009-09-24 20:39:13 +0000179 // Ignore trailing whitespace.
180 while (!PatternStr.empty() &&
181 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
182 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000183
Chris Lattner74d50732009-09-24 20:39:13 +0000184 // Check that there is something on the line.
185 if (PatternStr.empty()) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000186 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
187 "found empty check string with prefix '" +
Matt Arsenault13df4622013-11-10 02:04:09 +0000188 Prefix + ":'");
Chris Lattner74d50732009-09-24 20:39:13 +0000189 return true;
190 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000191
Chris Lattner221460e2009-09-25 17:09:12 +0000192 // Check to see if this is a fixed string, or if it has regex pieces.
Ted Kremenekd9466962012-09-08 04:32:13 +0000193 if (PatternStr.size() < 2 ||
Chris Lattner8879e062009-09-27 07:56:52 +0000194 (PatternStr.find("{{") == StringRef::npos &&
195 PatternStr.find("[[") == StringRef::npos)) {
Chris Lattner221460e2009-09-25 17:09:12 +0000196 FixedStr = PatternStr;
197 return false;
198 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000199
Chris Lattner8879e062009-09-27 07:56:52 +0000200 // Paren value #0 is for the fully matched string. Any new parenthesized
Chris Lattner53e06792011-04-09 06:18:02 +0000201 // values add from there.
Chris Lattner8879e062009-09-27 07:56:52 +0000202 unsigned CurParen = 1;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000203
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000204 // Otherwise, there is at least one regex piece. Build up the regex pattern
205 // by escaping scary characters in fixed strings, building up one big regex.
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000206 while (!PatternStr.empty()) {
Chris Lattner8879e062009-09-27 07:56:52 +0000207 // RegEx matches.
Chris Lattner53e06792011-04-09 06:18:02 +0000208 if (PatternStr.startswith("{{")) {
Eli Bendersky43d50d42012-11-30 14:22:14 +0000209 // This is the start of a regex match. Scan for the }}.
Chris Lattner8879e062009-09-27 07:56:52 +0000210 size_t End = PatternStr.find("}}");
211 if (End == StringRef::npos) {
212 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner03b80a42011-10-16 05:43:57 +0000213 SourceMgr::DK_Error,
214 "found start of regex string with no end '}}'");
Chris Lattner8879e062009-09-27 07:56:52 +0000215 return true;
216 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000217
Chris Lattnere53c95f2011-04-09 06:37:03 +0000218 // Enclose {{}} patterns in parens just like [[]] even though we're not
219 // capturing the result for any purpose. This is required in case the
220 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
221 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
222 RegExStr += '(';
223 ++CurParen;
224
Chris Lattner8879e062009-09-27 07:56:52 +0000225 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
226 return true;
Chris Lattnere53c95f2011-04-09 06:37:03 +0000227 RegExStr += ')';
Chris Lattner53e06792011-04-09 06:18:02 +0000228
Chris Lattner8879e062009-09-27 07:56:52 +0000229 PatternStr = PatternStr.substr(End+2);
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000230 continue;
231 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000232
Chris Lattner8879e062009-09-27 07:56:52 +0000233 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
234 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
235 // second form is [[foo]] which is a reference to foo. The variable name
Daniel Dunbar57cb7332009-11-22 22:07:50 +0000236 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
Chris Lattner8879e062009-09-27 07:56:52 +0000237 // it. This is to catch some common errors.
Chris Lattner53e06792011-04-09 06:18:02 +0000238 if (PatternStr.startswith("[[")) {
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000239 // Find the closing bracket pair ending the match. End is going to be an
240 // offset relative to the beginning of the match string.
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000241 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000242
Chris Lattner8879e062009-09-27 07:56:52 +0000243 if (End == StringRef::npos) {
244 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner03b80a42011-10-16 05:43:57 +0000245 SourceMgr::DK_Error,
246 "invalid named regex reference, no ]] found");
Chris Lattner8879e062009-09-27 07:56:52 +0000247 return true;
248 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000249
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000250 StringRef MatchStr = PatternStr.substr(2, End);
251 PatternStr = PatternStr.substr(End+4);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000252
Chris Lattner8879e062009-09-27 07:56:52 +0000253 // Get the regex name (e.g. "foo").
254 size_t NameEnd = MatchStr.find(':');
255 StringRef Name = MatchStr.substr(0, NameEnd);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000256
Chris Lattner8879e062009-09-27 07:56:52 +0000257 if (Name.empty()) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000258 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
259 "invalid name in named regex: empty name");
Chris Lattner8879e062009-09-27 07:56:52 +0000260 return true;
261 }
262
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000263 // Verify that the name/expression is well formed. FileCheck currently
264 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
265 // is relaxed, more strict check is performed in \c EvaluateExpression.
266 bool IsExpression = false;
267 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
268 if (i == 0 && Name[i] == '@') {
269 if (NameEnd != StringRef::npos) {
270 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
271 SourceMgr::DK_Error,
272 "invalid name in named regex definition");
273 return true;
274 }
275 IsExpression = true;
276 continue;
277 }
278 if (Name[i] != '_' && !isalnum(Name[i]) &&
279 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
Chris Lattner8879e062009-09-27 07:56:52 +0000280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
Chris Lattner03b80a42011-10-16 05:43:57 +0000281 SourceMgr::DK_Error, "invalid name in named regex");
Chris Lattner8879e062009-09-27 07:56:52 +0000282 return true;
283 }
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000284 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000285
Chris Lattner8879e062009-09-27 07:56:52 +0000286 // Name can't start with a digit.
Guy Benyei83c74e92013-02-12 21:21:59 +0000287 if (isdigit(static_cast<unsigned char>(Name[0]))) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000288 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
289 "invalid name in named regex");
Chris Lattner8879e062009-09-27 07:56:52 +0000290 return true;
291 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000292
Chris Lattner8879e062009-09-27 07:56:52 +0000293 // Handle [[foo]].
294 if (NameEnd == StringRef::npos) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000295 // Handle variables that were defined earlier on the same line by
296 // emitting a backreference.
297 if (VariableDefs.find(Name) != VariableDefs.end()) {
298 unsigned VarParenNum = VariableDefs[Name];
299 if (VarParenNum < 1 || VarParenNum > 9) {
300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
301 SourceMgr::DK_Error,
302 "Can't back-reference more than 9 variables");
303 return true;
304 }
305 AddBackrefToRegEx(VarParenNum);
306 } else {
307 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
308 }
Chris Lattner8879e062009-09-27 07:56:52 +0000309 continue;
310 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000311
Chris Lattner8879e062009-09-27 07:56:52 +0000312 // Handle [[foo:.*]].
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000313 VariableDefs[Name] = CurParen;
Chris Lattner8879e062009-09-27 07:56:52 +0000314 RegExStr += '(';
315 ++CurParen;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000316
Chris Lattner8879e062009-09-27 07:56:52 +0000317 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
318 return true;
319
320 RegExStr += ')';
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000321 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000322
Chris Lattner8879e062009-09-27 07:56:52 +0000323 // Handle fixed string matches.
324 // Find the end, which is the start of the next regex.
325 size_t FixedMatchEnd = PatternStr.find("{{");
326 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
Hans Wennborg6f4f77b2013-12-12 00:06:41 +0000327 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
Chris Lattner8879e062009-09-27 07:56:52 +0000328 PatternStr = PatternStr.substr(FixedMatchEnd);
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000329 }
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000330
Chris Lattner74d50732009-09-24 20:39:13 +0000331 return false;
332}
333
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000334bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
Chris Lattner8879e062009-09-27 07:56:52 +0000335 SourceMgr &SM) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000336 Regex R(RS);
Chris Lattner8879e062009-09-27 07:56:52 +0000337 std::string Error;
338 if (!R.isValid(Error)) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000339 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
Chris Lattner03b80a42011-10-16 05:43:57 +0000340 "invalid regex: " + Error);
Chris Lattner8879e062009-09-27 07:56:52 +0000341 return true;
342 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000343
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000344 RegExStr += RS.str();
Chris Lattner8879e062009-09-27 07:56:52 +0000345 CurParen += R.getNumMatches();
346 return false;
347}
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000348
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000349void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
350 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
351 std::string Backref = std::string("\\") +
352 std::string(1, '0' + BackrefNum);
353 RegExStr += Backref;
354}
355
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000356bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
357 // The only supported expression is @LINE([\+-]\d+)?
358 if (!Expr.startswith("@LINE"))
359 return false;
360 Expr = Expr.substr(StringRef("@LINE").size());
361 int Offset = 0;
362 if (!Expr.empty()) {
363 if (Expr[0] == '+')
364 Expr = Expr.substr(1);
365 else if (Expr[0] != '-')
366 return false;
367 if (Expr.getAsInteger(10, Offset))
368 return false;
369 }
370 Value = llvm::itostr(LineNumber + Offset);
371 return true;
372}
373
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000374/// Match - Match the pattern string against the input buffer Buffer. This
375/// returns the position that is matched or npos if there is no match. If
376/// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattner8879e062009-09-27 07:56:52 +0000377size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
378 StringMap<StringRef> &VariableTable) const {
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000379 // If this is the EOF pattern, match it immediately.
Matt Arsenault38820972013-09-17 22:30:02 +0000380 if (CheckTy == Check::CheckEOF) {
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000381 MatchLen = 0;
382 return Buffer.size();
383 }
384
Chris Lattner221460e2009-09-25 17:09:12 +0000385 // If this is a fixed string pattern, just match it now.
386 if (!FixedStr.empty()) {
387 MatchLen = FixedStr.size();
388 return Buffer.find(FixedStr);
389 }
Chris Lattner8879e062009-09-27 07:56:52 +0000390
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000391 // Regex match.
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000392
Chris Lattner8879e062009-09-27 07:56:52 +0000393 // If there are variable uses, we need to create a temporary string with the
394 // actual value.
395 StringRef RegExToMatch = RegExStr;
396 std::string TmpStr;
397 if (!VariableUses.empty()) {
398 TmpStr = RegExStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000399
Chris Lattner8879e062009-09-27 07:56:52 +0000400 unsigned InsertOffset = 0;
401 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Chris Lattner8879e062009-09-27 07:56:52 +0000402 std::string Value;
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000403
404 if (VariableUses[i].first[0] == '@') {
405 if (!EvaluateExpression(VariableUses[i].first, Value))
406 return StringRef::npos;
407 } else {
408 StringMap<StringRef>::iterator it =
409 VariableTable.find(VariableUses[i].first);
410 // If the variable is undefined, return an error.
411 if (it == VariableTable.end())
412 return StringRef::npos;
413
Hans Wennborg6f4f77b2013-12-12 00:06:41 +0000414 // Look up the value and escape it so that we can put it into the regex.
415 Value += Regex::escape(it->second);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000416 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000417
Chris Lattner8879e062009-09-27 07:56:52 +0000418 // Plop it into the regex at the adjusted offset.
419 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
420 Value.begin(), Value.end());
421 InsertOffset += Value.size();
422 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000423
Chris Lattner8879e062009-09-27 07:56:52 +0000424 // Match the newly constructed regex.
425 RegExToMatch = TmpStr;
426 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000427
428
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000429 SmallVector<StringRef, 4> MatchInfo;
Chris Lattner8879e062009-09-27 07:56:52 +0000430 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000431 return StringRef::npos;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000432
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000433 // Successful regex match.
434 assert(!MatchInfo.empty() && "Didn't get any match");
435 StringRef FullMatch = MatchInfo[0];
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000436
Chris Lattner8879e062009-09-27 07:56:52 +0000437 // If this defines any variables, remember their values.
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000438 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
439 E = VariableDefs.end();
440 I != E; ++I) {
441 assert(I->second < MatchInfo.size() && "Internal paren error");
442 VariableTable[I->first] = MatchInfo[I->second];
Chris Lattner0a4c44b2009-09-25 17:29:36 +0000443 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000444
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000445 MatchLen = FullMatch.size();
446 return FullMatch.data()-Buffer.data();
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000447}
448
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000449unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
450 const StringMap<StringRef> &VariableTable) const {
451 // Just compute the number of matching characters. For regular expressions, we
452 // just compare against the regex itself and hope for the best.
453 //
454 // FIXME: One easy improvement here is have the regex lib generate a single
455 // example regular expression which matches, and use that as the example
456 // string.
457 StringRef ExampleString(FixedStr);
458 if (ExampleString.empty())
459 ExampleString = RegExStr;
460
Daniel Dunbare9aa36c2010-01-30 00:24:06 +0000461 // Only compare up to the first line in the buffer, or the string size.
462 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
463 BufferPrefix = BufferPrefix.split('\n').first;
464 return BufferPrefix.edit_distance(ExampleString);
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000465}
466
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000467void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
468 const StringMap<StringRef> &VariableTable) const{
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000469 // If this was a regular expression using variables, print the current
470 // variable values.
471 if (!VariableUses.empty()) {
472 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Alp Tokere69170a2014-06-26 22:52:05 +0000473 SmallString<256> Msg;
474 raw_svector_ostream OS(Msg);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000475 StringRef Var = VariableUses[i].first;
476 if (Var[0] == '@') {
477 std::string Value;
478 if (EvaluateExpression(Var, Value)) {
479 OS << "with expression \"";
480 OS.write_escaped(Var) << "\" equal to \"";
481 OS.write_escaped(Value) << "\"";
482 } else {
483 OS << "uses incorrect expression \"";
484 OS.write_escaped(Var) << "\"";
485 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000486 } else {
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000487 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
488
489 // Check for undefined variable references.
490 if (it == VariableTable.end()) {
491 OS << "uses undefined variable \"";
492 OS.write_escaped(Var) << "\"";
493 } else {
494 OS << "with variable \"";
495 OS.write_escaped(Var) << "\" equal to \"";
496 OS.write_escaped(it->second) << "\"";
497 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000498 }
499
Chris Lattner03b80a42011-10-16 05:43:57 +0000500 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
501 OS.str());
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000502 }
503 }
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000504
505 // Attempt to find the closest/best fuzzy match. Usually an error happens
506 // because some string in the output didn't exactly match. In these cases, we
507 // would like to show the user a best guess at what "should have" matched, to
508 // save them having to actually check the input manually.
509 size_t NumLinesForward = 0;
510 size_t Best = StringRef::npos;
511 double BestQuality = 0;
512
513 // Use an arbitrary 4k limit on how far we will search.
Dan Gohman2bf486e2010-01-29 21:57:46 +0000514 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000515 if (Buffer[i] == '\n')
516 ++NumLinesForward;
517
Dan Gohmandf22bbf2010-01-29 21:55:16 +0000518 // Patterns have leading whitespace stripped, so skip whitespace when
519 // looking for something which looks like a pattern.
520 if (Buffer[i] == ' ' || Buffer[i] == '\t')
521 continue;
522
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000523 // Compute the "quality" of this match as an arbitrary combination of the
524 // match distance and the number of lines skipped to get to this match.
525 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
526 double Quality = Distance + (NumLinesForward / 100.);
527
528 if (Quality < BestQuality || Best == StringRef::npos) {
529 Best = i;
530 BestQuality = Quality;
531 }
532 }
533
Daniel Dunbarc069cc82010-03-19 18:07:43 +0000534 // Print the "possible intended match here" line if we found something
535 // reasonable and not equal to what we showed in the "scanning from here"
536 // line.
537 if (Best && Best != StringRef::npos && BestQuality < 50) {
538 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
Chris Lattner03b80a42011-10-16 05:43:57 +0000539 SourceMgr::DK_Note, "possible intended match here");
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000540
541 // FIXME: If we wanted to be really friendly we would show why the match
542 // failed, as it can be hard to spot simple one character differences.
543 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000544}
Chris Lattner74d50732009-09-24 20:39:13 +0000545
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000546size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000547 // Offset keeps track of the current offset within the input Str
548 size_t Offset = 0;
549 // [...] Nesting depth
550 size_t BracketDepth = 0;
551
552 while (!Str.empty()) {
553 if (Str.startswith("]]") && BracketDepth == 0)
554 return Offset;
555 if (Str[0] == '\\') {
556 // Backslash escapes the next char within regexes, so skip them both.
557 Str = Str.substr(2);
558 Offset += 2;
559 } else {
560 switch (Str[0]) {
561 default:
562 break;
563 case '[':
564 BracketDepth++;
565 break;
566 case ']':
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000567 if (BracketDepth == 0) {
568 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
569 SourceMgr::DK_Error,
570 "missing closing \"]\" for regex variable");
571 exit(1);
572 }
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000573 BracketDepth--;
574 break;
575 }
576 Str = Str.substr(1);
577 Offset++;
578 }
579 }
580
581 return StringRef::npos;
582}
583
584
Chris Lattner74d50732009-09-24 20:39:13 +0000585//===----------------------------------------------------------------------===//
586// Check Strings.
587//===----------------------------------------------------------------------===//
Chris Lattner3b40b442009-09-24 20:25:55 +0000588
589/// CheckString - This is a check that we found in the input file.
590struct CheckString {
591 /// Pat - The pattern to match.
592 Pattern Pat;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000593
Matt Arsenault13df4622013-11-10 02:04:09 +0000594 /// Prefix - Which prefix name this check matched.
595 StringRef Prefix;
596
Chris Lattner26cccfe2009-08-15 17:41:04 +0000597 /// Loc - The location in the match file that the check string was specified.
598 SMLoc Loc;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000599
Matt Arsenault38820972013-09-17 22:30:02 +0000600 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
601 /// as opposed to a CHECK: directive.
602 Check::CheckType CheckTy;
Stephen Linf8bd2e52013-07-12 14:51:05 +0000603
Michael Liao91a1b2c2013-05-14 20:34:12 +0000604 /// DagNotStrings - These are all of the strings that are disallowed from
Chris Lattner236d2d52009-09-20 22:35:26 +0000605 /// occurring between this match string and the previous one (or start of
606 /// file).
Michael Liao91a1b2c2013-05-14 20:34:12 +0000607 std::vector<Pattern> DagNotStrings;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000608
Matt Arsenault13df4622013-11-10 02:04:09 +0000609
610 CheckString(const Pattern &P,
611 StringRef S,
612 SMLoc L,
613 Check::CheckType Ty)
614 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
Michael Liaodcc7d482013-05-14 20:29:52 +0000615
Michael Liao91a1b2c2013-05-14 20:34:12 +0000616 /// Check - Match check string and its "not strings" and/or "dag strings".
Stephen Line93a3a02013-10-11 18:38:36 +0000617 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
Stephen Linf8bd2e52013-07-12 14:51:05 +0000618 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
Michael Liaodcc7d482013-05-14 20:29:52 +0000619
620 /// CheckNext - Verify there is a single line in the given buffer.
621 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
622
623 /// CheckNot - Verify there's no "not strings" in the given buffer.
624 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao91a1b2c2013-05-14 20:34:12 +0000625 const std::vector<const Pattern *> &NotStrings,
Michael Liaodcc7d482013-05-14 20:29:52 +0000626 StringMap<StringRef> &VariableTable) const;
Michael Liao91a1b2c2013-05-14 20:34:12 +0000627
628 /// CheckDag - Match "dag strings" and their mixed "not strings".
629 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
630 std::vector<const Pattern *> &NotStrings,
631 StringMap<StringRef> &VariableTable) const;
Chris Lattner26cccfe2009-08-15 17:41:04 +0000632};
633
Guy Benyei5ea04c32013-02-06 20:40:38 +0000634/// Canonicalize whitespaces in the input file. Line endings are replaced
635/// with UNIX-style '\n'.
636///
637/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
638/// characters to a single space.
Rafael Espindolace5dd1a2014-08-01 14:11:14 +0000639static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
Guy Benyei5ea04c32013-02-06 20:40:38 +0000640 bool PreserveHorizontal) {
Chris Lattner0e45d242010-04-05 22:42:30 +0000641 SmallString<128> NewFile;
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000642 NewFile.reserve(MB->getBufferSize());
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000643
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000644 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
645 Ptr != End; ++Ptr) {
NAKAMURA Takumifd781bf2010-11-14 03:28:22 +0000646 // Eliminate trailing dosish \r.
647 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
648 continue;
649 }
650
Michael Liao61bed2f2013-04-25 18:54:02 +0000651 // If current char is not a horizontal whitespace or if horizontal
Guy Benyei5ea04c32013-02-06 20:40:38 +0000652 // whitespace canonicalization is disabled, dump it to output as is.
653 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000654 NewFile.push_back(*Ptr);
655 continue;
656 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000657
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000658 // Otherwise, add one space and advance over neighboring space.
659 NewFile.push_back(' ');
660 while (Ptr+1 != End &&
661 (Ptr[1] == ' ' || Ptr[1] == '\t'))
662 ++Ptr;
663 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000664
Rafael Espindolace5dd1a2014-08-01 14:11:14 +0000665 return MemoryBuffer::getMemBufferCopy(NewFile.str(),
666 MB->getBufferIdentifier());
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000667}
668
Matt Arsenault38820972013-09-17 22:30:02 +0000669static bool IsPartOfWord(char c) {
670 return (isalnum(c) || c == '-' || c == '_');
671}
672
Matt Arsenault13df4622013-11-10 02:04:09 +0000673// Get the size of the prefix extension.
674static size_t CheckTypeSize(Check::CheckType Ty) {
675 switch (Ty) {
676 case Check::CheckNone:
677 return 0;
678
679 case Check::CheckPlain:
680 return sizeof(":") - 1;
681
682 case Check::CheckNext:
683 return sizeof("-NEXT:") - 1;
684
685 case Check::CheckNot:
686 return sizeof("-NOT:") - 1;
687
688 case Check::CheckDAG:
689 return sizeof("-DAG:") - 1;
690
691 case Check::CheckLabel:
692 return sizeof("-LABEL:") - 1;
693
694 case Check::CheckEOF:
695 llvm_unreachable("Should not be using EOF size");
696 }
697
698 llvm_unreachable("Bad check type");
699}
700
701static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
Matt Arsenaultc4d2d472013-09-17 22:45:57 +0000702 char NextChar = Buffer[Prefix.size()];
Matt Arsenault38820972013-09-17 22:30:02 +0000703
704 // Verify that the : is present after the prefix.
Matt Arsenault13df4622013-11-10 02:04:09 +0000705 if (NextChar == ':')
Matt Arsenault38820972013-09-17 22:30:02 +0000706 return Check::CheckPlain;
Matt Arsenault38820972013-09-17 22:30:02 +0000707
Matt Arsenault13df4622013-11-10 02:04:09 +0000708 if (NextChar != '-')
Matt Arsenault38820972013-09-17 22:30:02 +0000709 return Check::CheckNone;
Matt Arsenault38820972013-09-17 22:30:02 +0000710
Matt Arsenaultc4d2d472013-09-17 22:45:57 +0000711 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
Matt Arsenault13df4622013-11-10 02:04:09 +0000712 if (Rest.startswith("NEXT:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000713 return Check::CheckNext;
Matt Arsenault38820972013-09-17 22:30:02 +0000714
Matt Arsenault13df4622013-11-10 02:04:09 +0000715 if (Rest.startswith("NOT:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000716 return Check::CheckNot;
Matt Arsenault38820972013-09-17 22:30:02 +0000717
Matt Arsenault13df4622013-11-10 02:04:09 +0000718 if (Rest.startswith("DAG:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000719 return Check::CheckDAG;
Matt Arsenault38820972013-09-17 22:30:02 +0000720
Matt Arsenault13df4622013-11-10 02:04:09 +0000721 if (Rest.startswith("LABEL:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000722 return Check::CheckLabel;
Matt Arsenault13df4622013-11-10 02:04:09 +0000723
724 return Check::CheckNone;
725}
726
727// From the given position, find the next character after the word.
728static size_t SkipWord(StringRef Str, size_t Loc) {
729 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
730 ++Loc;
731 return Loc;
732}
733
734// Try to find the first match in buffer for any prefix. If a valid match is
735// found, return that prefix and set its type and location. If there are almost
736// matches (e.g. the actual prefix string is found, but is not an actual check
737// string), but no valid match, return an empty string and set the position to
738// resume searching from. If no partial matches are found, return an empty
739// string and the location will be StringRef::npos. If one prefix is a substring
740// of another, the maximal match should be found. e.g. if "A" and "AA" are
741// prefixes then AA-CHECK: should match the second one.
742static StringRef FindFirstCandidateMatch(StringRef &Buffer,
743 Check::CheckType &CheckTy,
744 size_t &CheckLoc) {
745 StringRef FirstPrefix;
746 size_t FirstLoc = StringRef::npos;
747 size_t SearchLoc = StringRef::npos;
748 Check::CheckType FirstTy = Check::CheckNone;
749
750 CheckTy = Check::CheckNone;
751 CheckLoc = StringRef::npos;
752
753 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
754 I != E; ++I) {
755 StringRef Prefix(*I);
756 size_t PrefixLoc = Buffer.find(Prefix);
757
758 if (PrefixLoc == StringRef::npos)
759 continue;
760
761 // Track where we are searching for invalid prefixes that look almost right.
762 // We need to only advance to the first partial match on the next attempt
763 // since a partial match could be a substring of a later, valid prefix.
764 // Need to skip to the end of the word, otherwise we could end up
765 // matching a prefix in a substring later.
766 if (PrefixLoc < SearchLoc)
767 SearchLoc = SkipWord(Buffer, PrefixLoc);
768
769 // We only want to find the first match to avoid skipping some.
770 if (PrefixLoc > FirstLoc)
771 continue;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000772 // If one matching check-prefix is a prefix of another, choose the
773 // longer one.
774 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
775 continue;
Matt Arsenault13df4622013-11-10 02:04:09 +0000776
777 StringRef Rest = Buffer.drop_front(PrefixLoc);
778 // Make sure we have actually found the prefix, and not a word containing
779 // it. This should also prevent matching the wrong prefix when one is a
780 // substring of another.
781 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
Daniel Sanders43b5f572013-11-20 13:25:05 +0000782 FirstTy = Check::CheckNone;
783 else
784 FirstTy = FindCheckType(Rest, Prefix);
Matt Arsenault13df4622013-11-10 02:04:09 +0000785
Matt Arsenault13df4622013-11-10 02:04:09 +0000786 FirstLoc = PrefixLoc;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000787 FirstPrefix = Prefix;
Matt Arsenault38820972013-09-17 22:30:02 +0000788 }
789
Alexey Samsonova7181a12013-11-13 14:12:52 +0000790 // If the first prefix is invalid, we should continue the search after it.
791 if (FirstTy == Check::CheckNone) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000792 CheckLoc = SearchLoc;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000793 return "";
Matt Arsenault13df4622013-11-10 02:04:09 +0000794 }
795
Alexey Samsonova7181a12013-11-13 14:12:52 +0000796 CheckTy = FirstTy;
797 CheckLoc = FirstLoc;
Matt Arsenault13df4622013-11-10 02:04:09 +0000798 return FirstPrefix;
799}
800
801static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
802 unsigned &LineNumber,
803 Check::CheckType &CheckTy,
804 size_t &CheckLoc) {
805 while (!Buffer.empty()) {
806 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
807 // If we found a real match, we are done.
808 if (!Prefix.empty()) {
809 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
810 return Prefix;
811 }
812
813 // We didn't find any almost matches either, we are also done.
814 if (CheckLoc == StringRef::npos)
815 return StringRef();
816
817 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
818
819 // Advance to the last possible match we found and try again.
820 Buffer = Buffer.drop_front(CheckLoc + 1);
821 }
822
823 return StringRef();
Matt Arsenault38820972013-09-17 22:30:02 +0000824}
Chris Lattneree3c74f2009-07-08 18:44:05 +0000825
Chris Lattneree3c74f2009-07-08 18:44:05 +0000826/// ReadCheckFile - Read the check file, which specifies the sequence of
827/// expected strings. The strings are added to the CheckStrings vector.
Eli Bendersky43d50d42012-11-30 14:22:14 +0000828/// Returns true in case of an error, false otherwise.
Chris Lattneree3c74f2009-07-08 18:44:05 +0000829static bool ReadCheckFile(SourceMgr &SM,
Chris Lattner26cccfe2009-08-15 17:41:04 +0000830 std::vector<CheckString> &CheckStrings) {
Rafael Espindolaadf21f22014-07-06 17:43:13 +0000831 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
832 MemoryBuffer::getFileOrSTDIN(CheckFilename);
833 if (std::error_code EC = FileOrErr.getError()) {
834 errs() << "Could not open check file '" << CheckFilename
835 << "': " << EC.message() << '\n';
Chris Lattneree3c74f2009-07-08 18:44:05 +0000836 return true;
837 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000838
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000839 // If we want to canonicalize whitespace, strip excess whitespace from the
Guy Benyei5ea04c32013-02-06 20:40:38 +0000840 // buffer containing the CHECK lines. Remove DOS style line endings.
Rafael Espindolace5dd1a2014-08-01 14:11:14 +0000841 MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
Rafael Espindolaadf21f22014-07-06 17:43:13 +0000842 NoCanonicalizeWhiteSpace);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000843
Chris Lattneree3c74f2009-07-08 18:44:05 +0000844 SM.AddNewSourceBuffer(F, SMLoc());
845
Chris Lattner10f10ce2009-08-15 18:00:42 +0000846 // Find all instances of CheckPrefix followed by : in the file.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000847 StringRef Buffer = F->getBuffer();
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000848
849 std::vector<Pattern> ImplicitNegativeChecks;
850 for (const auto &PatternString : ImplicitCheckNot) {
851 // Create a buffer with fake command line content in order to display the
852 // command line option responsible for the specific implicit CHECK-NOT.
853 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
854 std::string Suffix = "'";
855 MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
856 Prefix + PatternString + Suffix, "command line");
857 StringRef PatternInBuffer =
858 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
859 SM.AddNewSourceBuffer(CmdLine, SMLoc());
860
861 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
862 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
863 "IMPLICIT-CHECK", SM, 0);
864 }
865
866
867 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000868
Eli Bendersky43d50d42012-11-30 14:22:14 +0000869 // LineNumber keeps track of the line on which CheckPrefix instances are
870 // found.
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000871 unsigned LineNumber = 1;
872
Chris Lattneree3c74f2009-07-08 18:44:05 +0000873 while (1) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000874 Check::CheckType CheckTy;
875 size_t PrefixLoc;
876
877 // See if a prefix occurs in the memory buffer.
878 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
879 LineNumber,
880 CheckTy,
881 PrefixLoc);
882 if (UsedPrefix.empty())
Chris Lattneree3c74f2009-07-08 18:44:05 +0000883 break;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000884
Matt Arsenault13df4622013-11-10 02:04:09 +0000885 Buffer = Buffer.drop_front(PrefixLoc);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000886
Matt Arsenault13df4622013-11-10 02:04:09 +0000887 // Location to use for error messages.
888 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000889
Matt Arsenault13df4622013-11-10 02:04:09 +0000890 // PrefixLoc is to the start of the prefix. Skip to the end.
891 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000892
Matt Arsenault38820972013-09-17 22:30:02 +0000893 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
894 // leading and trailing whitespace.
Chris Lattner236d2d52009-09-20 22:35:26 +0000895 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000896
Chris Lattneree3c74f2009-07-08 18:44:05 +0000897 // Scan ahead to the end of line.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000898 size_t EOL = Buffer.find_first_of("\n\r");
Chris Lattner74d50732009-09-24 20:39:13 +0000899
Dan Gohman838fb092010-01-29 21:53:18 +0000900 // Remember the location of the start of the pattern, for diagnostics.
901 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
902
Chris Lattner74d50732009-09-24 20:39:13 +0000903 // Parse the pattern.
Matt Arsenault38820972013-09-17 22:30:02 +0000904 Pattern P(CheckTy);
Matt Arsenault13df4622013-11-10 02:04:09 +0000905 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
Chris Lattneree3c74f2009-07-08 18:44:05 +0000906 return true;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000907
Stephen Linf8bd2e52013-07-12 14:51:05 +0000908 // Verify that CHECK-LABEL lines do not define or use variables
Matt Arsenault38820972013-09-17 22:30:02 +0000909 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000910 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
Stephen Linf8bd2e52013-07-12 14:51:05 +0000911 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +0000912 "found '" + UsedPrefix + "-LABEL:'"
913 " with variable definition or use");
Stephen Linf8bd2e52013-07-12 14:51:05 +0000914 return true;
915 }
916
Chris Lattner74d50732009-09-24 20:39:13 +0000917 Buffer = Buffer.substr(EOL);
918
Chris Lattnerda108b42009-08-15 18:32:21 +0000919 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
Matt Arsenault38820972013-09-17 22:30:02 +0000920 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000921 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
Chris Lattner03b80a42011-10-16 05:43:57 +0000922 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +0000923 "found '" + UsedPrefix + "-NEXT:' without previous '"
924 + UsedPrefix + ": line");
Chris Lattnerda108b42009-08-15 18:32:21 +0000925 return true;
926 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000927
Michael Liao91a1b2c2013-05-14 20:34:12 +0000928 // Handle CHECK-DAG/-NOT.
Matt Arsenault38820972013-09-17 22:30:02 +0000929 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
Michael Liao91a1b2c2013-05-14 20:34:12 +0000930 DagNotMatches.push_back(P);
Chris Lattner74d50732009-09-24 20:39:13 +0000931 continue;
932 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000933
Chris Lattneree3c74f2009-07-08 18:44:05 +0000934 // Okay, add the string we captured to the output vector and move on.
Chris Lattner3b40b442009-09-24 20:25:55 +0000935 CheckStrings.push_back(CheckString(P,
Matt Arsenault13df4622013-11-10 02:04:09 +0000936 UsedPrefix,
Dan Gohman838fb092010-01-29 21:53:18 +0000937 PatternLoc,
Matt Arsenault38820972013-09-17 22:30:02 +0000938 CheckTy));
Michael Liao91a1b2c2013-05-14 20:34:12 +0000939 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000940 DagNotMatches = ImplicitNegativeChecks;
Chris Lattneree3c74f2009-07-08 18:44:05 +0000941 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000942
Matt Arsenault13df4622013-11-10 02:04:09 +0000943 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
944 // prefix as a filler for the error message.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000945 if (!DagNotMatches.empty()) {
Matt Arsenault38820972013-09-17 22:30:02 +0000946 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
Matt Arsenault13df4622013-11-10 02:04:09 +0000947 CheckPrefixes[0],
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000948 SMLoc::getFromPointer(Buffer.data()),
Matt Arsenault38820972013-09-17 22:30:02 +0000949 Check::CheckEOF));
Michael Liao91a1b2c2013-05-14 20:34:12 +0000950 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000951 }
952
Chris Lattneree3c74f2009-07-08 18:44:05 +0000953 if (CheckStrings.empty()) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000954 errs() << "error: no check strings found with prefix"
955 << (CheckPrefixes.size() > 1 ? "es " : " ");
956 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
957 StringRef Prefix(CheckPrefixes[I]);
958 errs() << '\'' << Prefix << ":'";
959 if (I != N - 1)
960 errs() << ", ";
961 }
962
963 errs() << '\n';
Chris Lattneree3c74f2009-07-08 18:44:05 +0000964 return true;
965 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000966
Chris Lattneree3c74f2009-07-08 18:44:05 +0000967 return false;
968}
969
Michael Liao91a1b2c2013-05-14 20:34:12 +0000970static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
971 const Pattern &Pat, StringRef Buffer,
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000972 StringMap<StringRef> &VariableTable) {
Chris Lattnerda108b42009-08-15 18:32:21 +0000973 // Otherwise, we have an error, emit an error message.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000974 SM.PrintMessage(Loc, SourceMgr::DK_Error,
Chris Lattner03b80a42011-10-16 05:43:57 +0000975 "expected string not found in input");
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000976
Chris Lattnerda108b42009-08-15 18:32:21 +0000977 // Print the "scanning from here" line. If the current position is at the
978 // end of a line, advance to the start of the next line.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000979 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000980
Chris Lattner03b80a42011-10-16 05:43:57 +0000981 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
982 "scanning from here");
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000983
984 // Allow the pattern to print additional information if desired.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000985 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
986}
987
988static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
989 StringRef Buffer,
990 StringMap<StringRef> &VariableTable) {
991 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
Chris Lattnerda108b42009-08-15 18:32:21 +0000992}
993
Chris Lattner37183582009-09-20 22:42:44 +0000994/// CountNumNewlinesBetween - Count the number of newlines in the specified
995/// range.
Richard Smith592fe882014-04-07 17:09:53 +0000996static unsigned CountNumNewlinesBetween(StringRef Range,
997 const char *&FirstNewLine) {
Chris Lattnerda108b42009-08-15 18:32:21 +0000998 unsigned NumNewLines = 0;
Chris Lattner37183582009-09-20 22:42:44 +0000999 while (1) {
Chris Lattnerda108b42009-08-15 18:32:21 +00001000 // Scan for newline.
Chris Lattner37183582009-09-20 22:42:44 +00001001 Range = Range.substr(Range.find_first_of("\n\r"));
1002 if (Range.empty()) return NumNewLines;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001003
Chris Lattnerda108b42009-08-15 18:32:21 +00001004 ++NumNewLines;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001005
Chris Lattnerda108b42009-08-15 18:32:21 +00001006 // Handle \n\r and \r\n as a single newline.
Chris Lattner37183582009-09-20 22:42:44 +00001007 if (Range.size() > 1 &&
1008 (Range[1] == '\n' || Range[1] == '\r') &&
1009 (Range[0] != Range[1]))
1010 Range = Range.substr(1);
1011 Range = Range.substr(1);
Richard Smith592fe882014-04-07 17:09:53 +00001012
1013 if (NumNewLines == 1)
1014 FirstNewLine = Range.begin();
Chris Lattnerda108b42009-08-15 18:32:21 +00001015 }
Chris Lattnerda108b42009-08-15 18:32:21 +00001016}
1017
Michael Liaodcc7d482013-05-14 20:29:52 +00001018size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
Stephen Line93a3a02013-10-11 18:38:36 +00001019 bool IsLabelScanMode, size_t &MatchLen,
Michael Liaodcc7d482013-05-14 20:29:52 +00001020 StringMap<StringRef> &VariableTable) const {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001021 size_t LastPos = 0;
1022 std::vector<const Pattern *> NotStrings;
1023
Stephen Line93a3a02013-10-11 18:38:36 +00001024 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1025 // bounds; we have not processed variable definitions within the bounded block
1026 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1027 // over the block again (including the last CHECK-LABEL) in normal mode.
1028 if (!IsLabelScanMode) {
1029 // Match "dag strings" (with mixed "not strings" if any).
1030 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1031 if (LastPos == StringRef::npos)
1032 return StringRef::npos;
1033 }
Michael Liao91a1b2c2013-05-14 20:34:12 +00001034
1035 // Match itself from the last position after matching CHECK-DAG.
1036 StringRef MatchBuffer = Buffer.substr(LastPos);
1037 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001038 if (MatchPos == StringRef::npos) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001039 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001040 return StringRef::npos;
1041 }
Michael Liao91a1b2c2013-05-14 20:34:12 +00001042 MatchPos += LastPos;
Michael Liaodcc7d482013-05-14 20:29:52 +00001043
Stephen Line93a3a02013-10-11 18:38:36 +00001044 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1045 // or CHECK-NOT
1046 if (!IsLabelScanMode) {
Stephen Linf8bd2e52013-07-12 14:51:05 +00001047 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Michael Liaodcc7d482013-05-14 20:29:52 +00001048
Stephen Linf8bd2e52013-07-12 14:51:05 +00001049 // If this check is a "CHECK-NEXT", verify that the previous match was on
1050 // the previous line (i.e. that there is one newline between them).
1051 if (CheckNext(SM, SkippedRegion))
1052 return StringRef::npos;
Michael Liaodcc7d482013-05-14 20:29:52 +00001053
Stephen Linf8bd2e52013-07-12 14:51:05 +00001054 // If this match had "not strings", verify that they don't exist in the
1055 // skipped region.
1056 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1057 return StringRef::npos;
1058 }
Michael Liaodcc7d482013-05-14 20:29:52 +00001059
1060 return MatchPos;
1061}
1062
1063bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
Matt Arsenault38820972013-09-17 22:30:02 +00001064 if (CheckTy != Check::CheckNext)
Michael Liaodcc7d482013-05-14 20:29:52 +00001065 return false;
1066
1067 // Count the number of newlines between the previous match and this one.
1068 assert(Buffer.data() !=
1069 SM.getMemoryBuffer(
1070 SM.FindBufferContainingLoc(
1071 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1072 "CHECK-NEXT can't be the first check in a file");
1073
Craig Topper66f09ad2014-06-08 22:29:17 +00001074 const char *FirstNewLine = nullptr;
Richard Smith592fe882014-04-07 17:09:53 +00001075 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
Michael Liaodcc7d482013-05-14 20:29:52 +00001076
1077 if (NumNewLines == 0) {
Matt Arsenault13df4622013-11-10 02:04:09 +00001078 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
Michael Liaodcc7d482013-05-14 20:29:52 +00001079 "-NEXT: is on the same line as previous match");
1080 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1081 SourceMgr::DK_Note, "'next' match was here");
1082 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1083 "previous match ended here");
1084 return true;
1085 }
1086
1087 if (NumNewLines != 1) {
Matt Arsenault13df4622013-11-10 02:04:09 +00001088 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
Michael Liaodcc7d482013-05-14 20:29:52 +00001089 "-NEXT: is not on the line after the previous match");
1090 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1091 SourceMgr::DK_Note, "'next' match was here");
1092 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1093 "previous match ended here");
Richard Smith592fe882014-04-07 17:09:53 +00001094 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1095 "non-matching line after previous match is here");
Michael Liaodcc7d482013-05-14 20:29:52 +00001096 return true;
1097 }
1098
1099 return false;
1100}
1101
1102bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao91a1b2c2013-05-14 20:34:12 +00001103 const std::vector<const Pattern *> &NotStrings,
Michael Liaodcc7d482013-05-14 20:29:52 +00001104 StringMap<StringRef> &VariableTable) const {
1105 for (unsigned ChunkNo = 0, e = NotStrings.size();
1106 ChunkNo != e; ++ChunkNo) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001107 const Pattern *Pat = NotStrings[ChunkNo];
Matt Arsenault38820972013-09-17 22:30:02 +00001108 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001109
Michael Liaodcc7d482013-05-14 20:29:52 +00001110 size_t MatchLen = 0;
Michael Liao91a1b2c2013-05-14 20:34:12 +00001111 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001112
1113 if (Pos == StringRef::npos) continue;
1114
1115 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1116 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +00001117 Prefix + "-NOT: string occurred!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001118 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001119 Prefix + "-NOT: pattern specified here");
Michael Liaodcc7d482013-05-14 20:29:52 +00001120 return true;
1121 }
1122
1123 return false;
1124}
1125
Michael Liao91a1b2c2013-05-14 20:34:12 +00001126size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1127 std::vector<const Pattern *> &NotStrings,
1128 StringMap<StringRef> &VariableTable) const {
1129 if (DagNotStrings.empty())
1130 return 0;
1131
1132 size_t LastPos = 0;
1133 size_t StartPos = LastPos;
1134
1135 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1136 ChunkNo != e; ++ChunkNo) {
1137 const Pattern &Pat = DagNotStrings[ChunkNo];
1138
Matt Arsenault38820972013-09-17 22:30:02 +00001139 assert((Pat.getCheckTy() == Check::CheckDAG ||
1140 Pat.getCheckTy() == Check::CheckNot) &&
Michael Liao91a1b2c2013-05-14 20:34:12 +00001141 "Invalid CHECK-DAG or CHECK-NOT!");
1142
Matt Arsenault38820972013-09-17 22:30:02 +00001143 if (Pat.getCheckTy() == Check::CheckNot) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001144 NotStrings.push_back(&Pat);
1145 continue;
1146 }
1147
Matt Arsenault38820972013-09-17 22:30:02 +00001148 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001149
1150 size_t MatchLen = 0, MatchPos;
1151
1152 // CHECK-DAG always matches from the start.
1153 StringRef MatchBuffer = Buffer.substr(StartPos);
1154 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1155 // With a group of CHECK-DAGs, a single mismatching means the match on
1156 // that group of CHECK-DAGs fails immediately.
1157 if (MatchPos == StringRef::npos) {
1158 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1159 return StringRef::npos;
1160 }
1161 // Re-calc it as the offset relative to the start of the original string.
1162 MatchPos += StartPos;
1163
1164 if (!NotStrings.empty()) {
1165 if (MatchPos < LastPos) {
1166 // Reordered?
1167 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1168 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +00001169 Prefix + "-DAG: found a match of CHECK-DAG"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001170 " reordering across a CHECK-NOT");
1171 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1172 SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001173 Prefix + "-DAG: the farthest match of CHECK-DAG"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001174 " is found here");
1175 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001176 Prefix + "-NOT: the crossed pattern specified"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001177 " here");
1178 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001179 Prefix + "-DAG: the reordered pattern specified"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001180 " here");
1181 return StringRef::npos;
1182 }
1183 // All subsequent CHECK-DAGs should be matched from the farthest
1184 // position of all precedent CHECK-DAGs (including this one.)
1185 StartPos = LastPos;
1186 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1187 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1188 // region.
1189 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Tim Northovercf708c32013-08-02 11:32:50 +00001190 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
Michael Liao91a1b2c2013-05-14 20:34:12 +00001191 return StringRef::npos;
1192 // Clear "not strings".
1193 NotStrings.clear();
1194 }
1195
1196 // Update the last position with CHECK-DAG matches.
1197 LastPos = std::max(MatchPos + MatchLen, LastPos);
1198 }
1199
1200 return LastPos;
1201}
1202
Matt Arsenault13df4622013-11-10 02:04:09 +00001203// A check prefix must contain only alphanumeric, hyphens and underscores.
1204static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1205 Regex Validator("^[a-zA-Z0-9_-]*$");
1206 return Validator.match(CheckPrefix);
1207}
1208
1209static bool ValidateCheckPrefixes() {
1210 StringSet<> PrefixSet;
1211
1212 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1213 I != E; ++I) {
1214 StringRef Prefix(*I);
1215
Eli Bendersky24412b12014-07-29 20:30:53 +00001216 // Reject empty prefixes.
1217 if (Prefix == "")
1218 return false;
1219
Matt Arsenault13df4622013-11-10 02:04:09 +00001220 if (!PrefixSet.insert(Prefix))
1221 return false;
1222
1223 if (!ValidateCheckPrefix(Prefix))
1224 return false;
1225 }
1226
1227 return true;
1228}
1229
1230// I don't think there's a way to specify an initial value for cl::list,
1231// so if nothing was specified, add the default
1232static void AddCheckPrefixIfNeeded() {
1233 if (CheckPrefixes.empty())
1234 CheckPrefixes.push_back("CHECK");
Rui Ueyamac27351582013-08-12 23:05:59 +00001235}
1236
Chris Lattneree3c74f2009-07-08 18:44:05 +00001237int main(int argc, char **argv) {
1238 sys::PrintStackTraceOnErrorSignal();
1239 PrettyStackTraceProgram X(argc, argv);
1240 cl::ParseCommandLineOptions(argc, argv);
1241
Matt Arsenault13df4622013-11-10 02:04:09 +00001242 if (!ValidateCheckPrefixes()) {
1243 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1244 "start with a letter and contain only alphanumeric characters, "
1245 "hyphens and underscores\n";
Rui Ueyamac27351582013-08-12 23:05:59 +00001246 return 2;
1247 }
1248
Matt Arsenault13df4622013-11-10 02:04:09 +00001249 AddCheckPrefixIfNeeded();
1250
Chris Lattneree3c74f2009-07-08 18:44:05 +00001251 SourceMgr SM;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001252
Chris Lattneree3c74f2009-07-08 18:44:05 +00001253 // Read the expected strings from the check file.
Chris Lattner26cccfe2009-08-15 17:41:04 +00001254 std::vector<CheckString> CheckStrings;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001255 if (ReadCheckFile(SM, CheckStrings))
1256 return 2;
1257
1258 // Open the file to check and add it to SourceMgr.
Rafael Espindolaadf21f22014-07-06 17:43:13 +00001259 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1260 MemoryBuffer::getFileOrSTDIN(InputFilename);
1261 if (std::error_code EC = FileOrErr.getError()) {
1262 errs() << "Could not open input file '" << InputFilename
1263 << "': " << EC.message() << '\n';
Eli Bendersky8e1c6472012-11-30 13:51:33 +00001264 return 2;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001265 }
Rafael Espindola3f6481d2014-08-01 14:31:55 +00001266 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001267
Justin Bogner1b9f9362014-08-07 18:40:37 +00001268 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
Chris Lattnerb692bed2011-02-09 16:46:02 +00001269 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
Eli Bendersky8e1c6472012-11-30 13:51:33 +00001270 return 2;
Chris Lattnerb692bed2011-02-09 16:46:02 +00001271 }
Benjamin Kramere963d662013-03-23 13:56:23 +00001272
Chris Lattner2c3e5cd2009-07-11 18:58:15 +00001273 // Remove duplicate spaces in the input file if requested.
Guy Benyei5ea04c32013-02-06 20:40:38 +00001274 // Remove DOS style line endings.
Benjamin Kramere963d662013-03-23 13:56:23 +00001275 MemoryBuffer *F =
Rafael Espindolace5dd1a2014-08-01 14:11:14 +00001276 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001277
Chris Lattneree3c74f2009-07-08 18:44:05 +00001278 SM.AddNewSourceBuffer(F, SMLoc());
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001279
Chris Lattner8879e062009-09-27 07:56:52 +00001280 /// VariableTable - This holds all the current filecheck variables.
1281 StringMap<StringRef> VariableTable;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001282
Chris Lattneree3c74f2009-07-08 18:44:05 +00001283 // Check that we have all of the expected strings, in order, in the input
1284 // file.
Chris Lattnercaa5fc02009-09-20 22:11:44 +00001285 StringRef Buffer = F->getBuffer();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001286
Stephen Linf8bd2e52013-07-12 14:51:05 +00001287 bool hasError = false;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001288
Stephen Linf8bd2e52013-07-12 14:51:05 +00001289 unsigned i = 0, j = 0, e = CheckStrings.size();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001290
Stephen Linf8bd2e52013-07-12 14:51:05 +00001291 while (true) {
1292 StringRef CheckRegion;
1293 if (j == e) {
1294 CheckRegion = Buffer;
1295 } else {
1296 const CheckString &CheckLabelStr = CheckStrings[j];
Matt Arsenault38820972013-09-17 22:30:02 +00001297 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
Stephen Linf8bd2e52013-07-12 14:51:05 +00001298 ++j;
1299 continue;
1300 }
Chris Lattner37183582009-09-20 22:42:44 +00001301
Stephen Linf8bd2e52013-07-12 14:51:05 +00001302 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1303 size_t MatchLabelLen = 0;
Stephen Line93a3a02013-10-11 18:38:36 +00001304 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
Stephen Linf8bd2e52013-07-12 14:51:05 +00001305 MatchLabelLen, VariableTable);
1306 if (MatchLabelPos == StringRef::npos) {
1307 hasError = true;
1308 break;
1309 }
1310
1311 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1312 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1313 ++j;
1314 }
1315
1316 for ( ; i != j; ++i) {
1317 const CheckString &CheckStr = CheckStrings[i];
1318
1319 // Check each string within the scanned region, including a second check
1320 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1321 size_t MatchLen = 0;
Stephen Line93a3a02013-10-11 18:38:36 +00001322 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
Stephen Linf8bd2e52013-07-12 14:51:05 +00001323 VariableTable);
1324
1325 if (MatchPos == StringRef::npos) {
1326 hasError = true;
1327 i = j;
1328 break;
1329 }
1330
1331 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1332 }
1333
1334 if (j == e)
1335 break;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001336 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001337
Stephen Linf8bd2e52013-07-12 14:51:05 +00001338 return hasError ? 1 : 0;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001339}