blob: 9b9ffb049064ea195d0a6b9484cceaf973c4dd9b [file] [log] [blame]
Chris Lattneree3c74f2009-07-08 18:44:05 +00001//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content. This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
Chandler Carruth91d19d82012-12-04 10:37:14 +000019#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringMap.h"
Matt Arsenault13df4622013-11-10 02:04:09 +000022#include "llvm/ADT/StringSet.h"
Chris Lattneree3c74f2009-07-08 18:44:05 +000023#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
Chris Lattnerf08d2db2009-09-24 21:47:32 +000026#include "llvm/Support/Regex.h"
Chandler Carruth91d19d82012-12-04 10:37:14 +000027#include "llvm/Support/Signals.h"
Chris Lattneree3c74f2009-07-08 18:44:05 +000028#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
Chris Lattner8879e062009-09-27 07:56:52 +000030#include <algorithm>
Will Dietz981af002013-10-12 00:55:57 +000031#include <cctype>
Eli Benderskye8b8f1b2012-12-01 21:54:48 +000032#include <map>
33#include <string>
Rafael Espindolaa6e9c3e2014-06-12 17:38:55 +000034#include <system_error>
Eli Benderskye8b8f1b2012-12-01 21:54:48 +000035#include <vector>
Chris Lattneree3c74f2009-07-08 18:44:05 +000036using namespace llvm;
37
38static cl::opt<std::string>
39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41static cl::opt<std::string>
42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
Matt Arsenault13df4622013-11-10 02:04:09 +000045static cl::list<std::string>
46CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
Chris Lattneree3c74f2009-07-08 18:44:05 +000048
Chris Lattner2c3e5cd2009-07-11 18:58:15 +000049static cl::opt<bool>
50NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +000053static cl::list<std::string> ImplicitCheckNot(
54 "implicit-check-not",
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
59
Justin Bogner1b9f9362014-08-07 18:40:37 +000060static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
64
Matt Arsenault13df4622013-11-10 02:04:09 +000065typedef cl::list<std::string>::const_iterator prefix_iterator;
66
Chris Lattner74d50732009-09-24 20:39:13 +000067//===----------------------------------------------------------------------===//
68// Pattern Handling Code.
69//===----------------------------------------------------------------------===//
70
Matt Arsenault38820972013-09-17 22:30:02 +000071namespace Check {
72 enum CheckType {
73 CheckNone = 0,
74 CheckPlain,
75 CheckNext,
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +000076 CheckSame,
Matt Arsenault38820972013-09-17 22:30:02 +000077 CheckNot,
78 CheckDAG,
79 CheckLabel,
80
81 /// MatchEOF - When set, this pattern only matches the end of file. This is
82 /// used for trailing CHECK-NOTs.
83 CheckEOF
84 };
85}
86
Chris Lattner3b40b442009-09-24 20:25:55 +000087class Pattern {
Chris Lattner0a4c44b2009-09-25 17:29:36 +000088 SMLoc PatternLoc;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000089
Matt Arsenault38820972013-09-17 22:30:02 +000090 Check::CheckType CheckTy;
Michael Liao91a1b2c2013-05-14 20:34:12 +000091
Chris Lattnerb16ab0c2009-09-25 17:23:43 +000092 /// FixedStr - If non-empty, this pattern is a fixed string match with the
93 /// specified fixed string.
Chris Lattner221460e2009-09-25 17:09:12 +000094 StringRef FixedStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000095
Chris Lattnerb16ab0c2009-09-25 17:23:43 +000096 /// RegEx - If non-empty, this is a regex pattern.
97 std::string RegExStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +000098
Alexander Kornienko92987fb2012-11-14 21:07:37 +000099 /// \brief Contains the number of line this pattern is in.
100 unsigned LineNumber;
101
Chris Lattner8879e062009-09-27 07:56:52 +0000102 /// VariableUses - Entries in this vector map to uses of a variable in the
103 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
104 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105 /// value of bar at offset 3.
106 std::vector<std::pair<StringRef, unsigned> > VariableUses;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000107
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000108 /// VariableDefs - Maps definitions of variables to their parenthesized
109 /// capture numbers.
110 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111 std::map<StringRef, unsigned> VariableDefs;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000112
Chris Lattner3b40b442009-09-24 20:25:55 +0000113public:
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000114
Matt Arsenault38820972013-09-17 22:30:02 +0000115 Pattern(Check::CheckType Ty)
116 : CheckTy(Ty) { }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000117
Michael Liao0b707eb2013-04-25 21:31:34 +0000118 /// getLoc - Return the location in source code.
119 SMLoc getLoc() const { return PatternLoc; }
120
Matt Arsenault13df4622013-11-10 02:04:09 +0000121 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122 /// which prefix is being matched, SM provides the SourceMgr used for error
123 /// reports, and LineNumber is the line number in the input file from which
124 /// the pattern string was read. Returns true in case of an error, false
125 /// otherwise.
126 bool ParsePattern(StringRef PatternStr,
127 StringRef Prefix,
128 SourceMgr &SM,
129 unsigned LineNumber);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000130
Chris Lattner3b40b442009-09-24 20:25:55 +0000131 /// Match - Match the pattern string against the input buffer Buffer. This
132 /// returns the position that is matched or npos if there is no match. If
133 /// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattner8879e062009-09-27 07:56:52 +0000134 ///
135 /// The VariableTable StringMap provides the current values of filecheck
136 /// variables and is updated if this match defines new values.
137 size_t Match(StringRef Buffer, size_t &MatchLen,
138 StringMap<StringRef> &VariableTable) const;
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000139
140 /// PrintFailureInfo - Print additional information about a failure to match
141 /// involving this pattern.
142 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143 const StringMap<StringRef> &VariableTable) const;
144
Stephen Linf8bd2e52013-07-12 14:51:05 +0000145 bool hasVariable() const { return !(VariableUses.empty() &&
146 VariableDefs.empty()); }
147
Matt Arsenault38820972013-09-17 22:30:02 +0000148 Check::CheckType getCheckTy() const { return CheckTy; }
Michael Liao91a1b2c2013-05-14 20:34:12 +0000149
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000150private:
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000151 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152 void AddBackrefToRegEx(unsigned BackrefNum);
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000153
154 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155 /// matching this pattern at the start of \arg Buffer; a distance of zero
156 /// should correspond to a perfect match.
157 unsigned ComputeMatchDistance(StringRef Buffer,
158 const StringMap<StringRef> &VariableTable) const;
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000159
160 /// \brief Evaluates expression and stores the result to \p Value.
161 /// \return true on success. false when the expression has invalid syntax.
162 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000163
164 /// \brief Finds the closing sequence of a regex variable usage or
165 /// definition. Str has to point in the beginning of the definition
166 /// (right after the opening sequence).
167 /// \return offset of the closing sequence within Str, or npos if it was not
168 /// found.
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000169 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
Chris Lattner3b40b442009-09-24 20:25:55 +0000170};
171
Chris Lattner8879e062009-09-27 07:56:52 +0000172
Matt Arsenault13df4622013-11-10 02:04:09 +0000173bool Pattern::ParsePattern(StringRef PatternStr,
174 StringRef Prefix,
175 SourceMgr &SM,
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000176 unsigned LineNumber) {
177 this->LineNumber = LineNumber;
Chris Lattner0a4c44b2009-09-25 17:29:36 +0000178 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000179
Chris Lattner74d50732009-09-24 20:39:13 +0000180 // Ignore trailing whitespace.
181 while (!PatternStr.empty() &&
182 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000184
Chris Lattner74d50732009-09-24 20:39:13 +0000185 // Check that there is something on the line.
186 if (PatternStr.empty()) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000187 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188 "found empty check string with prefix '" +
Matt Arsenault13df4622013-11-10 02:04:09 +0000189 Prefix + ":'");
Chris Lattner74d50732009-09-24 20:39:13 +0000190 return true;
191 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000192
Chris Lattner221460e2009-09-25 17:09:12 +0000193 // Check to see if this is a fixed string, or if it has regex pieces.
Ted Kremenekd9466962012-09-08 04:32:13 +0000194 if (PatternStr.size() < 2 ||
Chris Lattner8879e062009-09-27 07:56:52 +0000195 (PatternStr.find("{{") == StringRef::npos &&
196 PatternStr.find("[[") == StringRef::npos)) {
Chris Lattner221460e2009-09-25 17:09:12 +0000197 FixedStr = PatternStr;
198 return false;
199 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000200
Chris Lattner8879e062009-09-27 07:56:52 +0000201 // Paren value #0 is for the fully matched string. Any new parenthesized
Chris Lattner53e06792011-04-09 06:18:02 +0000202 // values add from there.
Chris Lattner8879e062009-09-27 07:56:52 +0000203 unsigned CurParen = 1;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000204
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000205 // Otherwise, there is at least one regex piece. Build up the regex pattern
206 // by escaping scary characters in fixed strings, building up one big regex.
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000207 while (!PatternStr.empty()) {
Chris Lattner8879e062009-09-27 07:56:52 +0000208 // RegEx matches.
Chris Lattner53e06792011-04-09 06:18:02 +0000209 if (PatternStr.startswith("{{")) {
Eli Bendersky43d50d42012-11-30 14:22:14 +0000210 // This is the start of a regex match. Scan for the }}.
Chris Lattner8879e062009-09-27 07:56:52 +0000211 size_t End = PatternStr.find("}}");
212 if (End == StringRef::npos) {
213 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner03b80a42011-10-16 05:43:57 +0000214 SourceMgr::DK_Error,
215 "found start of regex string with no end '}}'");
Chris Lattner8879e062009-09-27 07:56:52 +0000216 return true;
217 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000218
Chris Lattnere53c95f2011-04-09 06:37:03 +0000219 // Enclose {{}} patterns in parens just like [[]] even though we're not
220 // capturing the result for any purpose. This is required in case the
221 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
222 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
223 RegExStr += '(';
224 ++CurParen;
225
Chris Lattner8879e062009-09-27 07:56:52 +0000226 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
227 return true;
Chris Lattnere53c95f2011-04-09 06:37:03 +0000228 RegExStr += ')';
Chris Lattner53e06792011-04-09 06:18:02 +0000229
Chris Lattner8879e062009-09-27 07:56:52 +0000230 PatternStr = PatternStr.substr(End+2);
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000231 continue;
232 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000233
Chris Lattner8879e062009-09-27 07:56:52 +0000234 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
235 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236 // second form is [[foo]] which is a reference to foo. The variable name
Daniel Dunbar57cb7332009-11-22 22:07:50 +0000237 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
Chris Lattner8879e062009-09-27 07:56:52 +0000238 // it. This is to catch some common errors.
Chris Lattner53e06792011-04-09 06:18:02 +0000239 if (PatternStr.startswith("[[")) {
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000240 // Find the closing bracket pair ending the match. End is going to be an
241 // offset relative to the beginning of the match string.
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000242 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000243
Chris Lattner8879e062009-09-27 07:56:52 +0000244 if (End == StringRef::npos) {
245 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner03b80a42011-10-16 05:43:57 +0000246 SourceMgr::DK_Error,
247 "invalid named regex reference, no ]] found");
Chris Lattner8879e062009-09-27 07:56:52 +0000248 return true;
249 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000250
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000251 StringRef MatchStr = PatternStr.substr(2, End);
252 PatternStr = PatternStr.substr(End+4);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000253
Chris Lattner8879e062009-09-27 07:56:52 +0000254 // Get the regex name (e.g. "foo").
255 size_t NameEnd = MatchStr.find(':');
256 StringRef Name = MatchStr.substr(0, NameEnd);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000257
Chris Lattner8879e062009-09-27 07:56:52 +0000258 if (Name.empty()) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000259 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260 "invalid name in named regex: empty name");
Chris Lattner8879e062009-09-27 07:56:52 +0000261 return true;
262 }
263
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000264 // Verify that the name/expression is well formed. FileCheck currently
265 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266 // is relaxed, more strict check is performed in \c EvaluateExpression.
267 bool IsExpression = false;
268 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269 if (i == 0 && Name[i] == '@') {
270 if (NameEnd != StringRef::npos) {
271 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
272 SourceMgr::DK_Error,
273 "invalid name in named regex definition");
274 return true;
275 }
276 IsExpression = true;
277 continue;
278 }
279 if (Name[i] != '_' && !isalnum(Name[i]) &&
280 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
Chris Lattner8879e062009-09-27 07:56:52 +0000281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
Chris Lattner03b80a42011-10-16 05:43:57 +0000282 SourceMgr::DK_Error, "invalid name in named regex");
Chris Lattner8879e062009-09-27 07:56:52 +0000283 return true;
284 }
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000285 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000286
Chris Lattner8879e062009-09-27 07:56:52 +0000287 // Name can't start with a digit.
Guy Benyei83c74e92013-02-12 21:21:59 +0000288 if (isdigit(static_cast<unsigned char>(Name[0]))) {
Chris Lattner03b80a42011-10-16 05:43:57 +0000289 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290 "invalid name in named regex");
Chris Lattner8879e062009-09-27 07:56:52 +0000291 return true;
292 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000293
Chris Lattner8879e062009-09-27 07:56:52 +0000294 // Handle [[foo]].
295 if (NameEnd == StringRef::npos) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000296 // Handle variables that were defined earlier on the same line by
297 // emitting a backreference.
298 if (VariableDefs.find(Name) != VariableDefs.end()) {
299 unsigned VarParenNum = VariableDefs[Name];
300 if (VarParenNum < 1 || VarParenNum > 9) {
301 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
302 SourceMgr::DK_Error,
303 "Can't back-reference more than 9 variables");
304 return true;
305 }
306 AddBackrefToRegEx(VarParenNum);
307 } else {
308 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
309 }
Chris Lattner8879e062009-09-27 07:56:52 +0000310 continue;
311 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000312
Chris Lattner8879e062009-09-27 07:56:52 +0000313 // Handle [[foo:.*]].
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000314 VariableDefs[Name] = CurParen;
Chris Lattner8879e062009-09-27 07:56:52 +0000315 RegExStr += '(';
316 ++CurParen;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000317
Chris Lattner8879e062009-09-27 07:56:52 +0000318 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
319 return true;
320
321 RegExStr += ')';
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000322 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000323
Chris Lattner8879e062009-09-27 07:56:52 +0000324 // Handle fixed string matches.
325 // Find the end, which is the start of the next regex.
326 size_t FixedMatchEnd = PatternStr.find("{{");
327 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
Hans Wennborg6f4f77b2013-12-12 00:06:41 +0000328 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
Chris Lattner8879e062009-09-27 07:56:52 +0000329 PatternStr = PatternStr.substr(FixedMatchEnd);
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000330 }
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000331
Chris Lattner74d50732009-09-24 20:39:13 +0000332 return false;
333}
334
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000335bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
Chris Lattner8879e062009-09-27 07:56:52 +0000336 SourceMgr &SM) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000337 Regex R(RS);
Chris Lattner8879e062009-09-27 07:56:52 +0000338 std::string Error;
339 if (!R.isValid(Error)) {
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000340 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
Chris Lattner03b80a42011-10-16 05:43:57 +0000341 "invalid regex: " + Error);
Chris Lattner8879e062009-09-27 07:56:52 +0000342 return true;
343 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000344
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000345 RegExStr += RS.str();
Chris Lattner8879e062009-09-27 07:56:52 +0000346 CurParen += R.getNumMatches();
347 return false;
348}
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000349
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000350void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352 std::string Backref = std::string("\\") +
353 std::string(1, '0' + BackrefNum);
354 RegExStr += Backref;
355}
356
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000357bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358 // The only supported expression is @LINE([\+-]\d+)?
359 if (!Expr.startswith("@LINE"))
360 return false;
361 Expr = Expr.substr(StringRef("@LINE").size());
362 int Offset = 0;
363 if (!Expr.empty()) {
364 if (Expr[0] == '+')
365 Expr = Expr.substr(1);
366 else if (Expr[0] != '-')
367 return false;
368 if (Expr.getAsInteger(10, Offset))
369 return false;
370 }
371 Value = llvm::itostr(LineNumber + Offset);
372 return true;
373}
374
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000375/// Match - Match the pattern string against the input buffer Buffer. This
376/// returns the position that is matched or npos if there is no match. If
377/// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattner8879e062009-09-27 07:56:52 +0000378size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379 StringMap<StringRef> &VariableTable) const {
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000380 // If this is the EOF pattern, match it immediately.
Matt Arsenault38820972013-09-17 22:30:02 +0000381 if (CheckTy == Check::CheckEOF) {
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000382 MatchLen = 0;
383 return Buffer.size();
384 }
385
Chris Lattner221460e2009-09-25 17:09:12 +0000386 // If this is a fixed string pattern, just match it now.
387 if (!FixedStr.empty()) {
388 MatchLen = FixedStr.size();
389 return Buffer.find(FixedStr);
390 }
Chris Lattner8879e062009-09-27 07:56:52 +0000391
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000392 // Regex match.
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000393
Chris Lattner8879e062009-09-27 07:56:52 +0000394 // If there are variable uses, we need to create a temporary string with the
395 // actual value.
396 StringRef RegExToMatch = RegExStr;
397 std::string TmpStr;
398 if (!VariableUses.empty()) {
399 TmpStr = RegExStr;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000400
Chris Lattner8879e062009-09-27 07:56:52 +0000401 unsigned InsertOffset = 0;
402 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Chris Lattner8879e062009-09-27 07:56:52 +0000403 std::string Value;
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000404
405 if (VariableUses[i].first[0] == '@') {
406 if (!EvaluateExpression(VariableUses[i].first, Value))
407 return StringRef::npos;
408 } else {
409 StringMap<StringRef>::iterator it =
410 VariableTable.find(VariableUses[i].first);
411 // If the variable is undefined, return an error.
412 if (it == VariableTable.end())
413 return StringRef::npos;
414
Hans Wennborg6f4f77b2013-12-12 00:06:41 +0000415 // Look up the value and escape it so that we can put it into the regex.
416 Value += Regex::escape(it->second);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000417 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000418
Chris Lattner8879e062009-09-27 07:56:52 +0000419 // Plop it into the regex at the adjusted offset.
420 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
421 Value.begin(), Value.end());
422 InsertOffset += Value.size();
423 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000424
Chris Lattner8879e062009-09-27 07:56:52 +0000425 // Match the newly constructed regex.
426 RegExToMatch = TmpStr;
427 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000428
429
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000430 SmallVector<StringRef, 4> MatchInfo;
Chris Lattner8879e062009-09-27 07:56:52 +0000431 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000432 return StringRef::npos;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000433
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000434 // Successful regex match.
435 assert(!MatchInfo.empty() && "Didn't get any match");
436 StringRef FullMatch = MatchInfo[0];
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000437
Chris Lattner8879e062009-09-27 07:56:52 +0000438 // If this defines any variables, remember their values.
Eli Benderskye8b8f1b2012-12-01 21:54:48 +0000439 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
440 E = VariableDefs.end();
441 I != E; ++I) {
442 assert(I->second < MatchInfo.size() && "Internal paren error");
443 VariableTable[I->first] = MatchInfo[I->second];
Chris Lattner0a4c44b2009-09-25 17:29:36 +0000444 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000445
Chris Lattnerb16ab0c2009-09-25 17:23:43 +0000446 MatchLen = FullMatch.size();
447 return FullMatch.data()-Buffer.data();
Chris Lattnerf08d2db2009-09-24 21:47:32 +0000448}
449
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000450unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
451 const StringMap<StringRef> &VariableTable) const {
452 // Just compute the number of matching characters. For regular expressions, we
453 // just compare against the regex itself and hope for the best.
454 //
455 // FIXME: One easy improvement here is have the regex lib generate a single
456 // example regular expression which matches, and use that as the example
457 // string.
458 StringRef ExampleString(FixedStr);
459 if (ExampleString.empty())
460 ExampleString = RegExStr;
461
Daniel Dunbare9aa36c2010-01-30 00:24:06 +0000462 // Only compare up to the first line in the buffer, or the string size.
463 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
464 BufferPrefix = BufferPrefix.split('\n').first;
465 return BufferPrefix.edit_distance(ExampleString);
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000466}
467
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000468void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
469 const StringMap<StringRef> &VariableTable) const{
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000470 // If this was a regular expression using variables, print the current
471 // variable values.
472 if (!VariableUses.empty()) {
473 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Alp Tokere69170a2014-06-26 22:52:05 +0000474 SmallString<256> Msg;
475 raw_svector_ostream OS(Msg);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000476 StringRef Var = VariableUses[i].first;
477 if (Var[0] == '@') {
478 std::string Value;
479 if (EvaluateExpression(Var, Value)) {
480 OS << "with expression \"";
481 OS.write_escaped(Var) << "\" equal to \"";
482 OS.write_escaped(Value) << "\"";
483 } else {
484 OS << "uses incorrect expression \"";
485 OS.write_escaped(Var) << "\"";
486 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000487 } else {
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000488 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
489
490 // Check for undefined variable references.
491 if (it == VariableTable.end()) {
492 OS << "uses undefined variable \"";
493 OS.write_escaped(Var) << "\"";
494 } else {
495 OS << "with variable \"";
496 OS.write_escaped(Var) << "\" equal to \"";
497 OS.write_escaped(it->second) << "\"";
498 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000499 }
500
Chris Lattner03b80a42011-10-16 05:43:57 +0000501 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
502 OS.str());
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000503 }
504 }
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000505
506 // Attempt to find the closest/best fuzzy match. Usually an error happens
507 // because some string in the output didn't exactly match. In these cases, we
508 // would like to show the user a best guess at what "should have" matched, to
509 // save them having to actually check the input manually.
510 size_t NumLinesForward = 0;
511 size_t Best = StringRef::npos;
512 double BestQuality = 0;
513
514 // Use an arbitrary 4k limit on how far we will search.
Dan Gohman2bf486e2010-01-29 21:57:46 +0000515 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000516 if (Buffer[i] == '\n')
517 ++NumLinesForward;
518
Dan Gohmandf22bbf2010-01-29 21:55:16 +0000519 // Patterns have leading whitespace stripped, so skip whitespace when
520 // looking for something which looks like a pattern.
521 if (Buffer[i] == ' ' || Buffer[i] == '\t')
522 continue;
523
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000524 // Compute the "quality" of this match as an arbitrary combination of the
525 // match distance and the number of lines skipped to get to this match.
526 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
527 double Quality = Distance + (NumLinesForward / 100.);
528
529 if (Quality < BestQuality || Best == StringRef::npos) {
530 Best = i;
531 BestQuality = Quality;
532 }
533 }
534
Daniel Dunbarc069cc82010-03-19 18:07:43 +0000535 // Print the "possible intended match here" line if we found something
536 // reasonable and not equal to what we showed in the "scanning from here"
537 // line.
538 if (Best && Best != StringRef::npos && BestQuality < 50) {
539 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
Chris Lattner03b80a42011-10-16 05:43:57 +0000540 SourceMgr::DK_Note, "possible intended match here");
Daniel Dunbarfd29d882009-11-22 22:59:26 +0000541
542 // FIXME: If we wanted to be really friendly we would show why the match
543 // failed, as it can be hard to spot simple one character differences.
544 }
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000545}
Chris Lattner74d50732009-09-24 20:39:13 +0000546
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000547size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000548 // Offset keeps track of the current offset within the input Str
549 size_t Offset = 0;
550 // [...] Nesting depth
551 size_t BracketDepth = 0;
552
553 while (!Str.empty()) {
554 if (Str.startswith("]]") && BracketDepth == 0)
555 return Offset;
556 if (Str[0] == '\\') {
557 // Backslash escapes the next char within regexes, so skip them both.
558 Str = Str.substr(2);
559 Offset += 2;
560 } else {
561 switch (Str[0]) {
562 default:
563 break;
564 case '[':
565 BracketDepth++;
566 break;
567 case ']':
Adrian Prantl81e5cd92014-01-03 21:49:09 +0000568 if (BracketDepth == 0) {
569 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
570 SourceMgr::DK_Error,
571 "missing closing \"]\" for regex variable");
572 exit(1);
573 }
Eli Bendersky061d2ba2012-12-02 16:02:41 +0000574 BracketDepth--;
575 break;
576 }
577 Str = Str.substr(1);
578 Offset++;
579 }
580 }
581
582 return StringRef::npos;
583}
584
585
Chris Lattner74d50732009-09-24 20:39:13 +0000586//===----------------------------------------------------------------------===//
587// Check Strings.
588//===----------------------------------------------------------------------===//
Chris Lattner3b40b442009-09-24 20:25:55 +0000589
590/// CheckString - This is a check that we found in the input file.
591struct CheckString {
592 /// Pat - The pattern to match.
593 Pattern Pat;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000594
Matt Arsenault13df4622013-11-10 02:04:09 +0000595 /// Prefix - Which prefix name this check matched.
596 StringRef Prefix;
597
Chris Lattner26cccfe2009-08-15 17:41:04 +0000598 /// Loc - The location in the match file that the check string was specified.
599 SMLoc Loc;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000600
Matt Arsenault38820972013-09-17 22:30:02 +0000601 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
602 /// as opposed to a CHECK: directive.
603 Check::CheckType CheckTy;
Stephen Linf8bd2e52013-07-12 14:51:05 +0000604
Michael Liao91a1b2c2013-05-14 20:34:12 +0000605 /// DagNotStrings - These are all of the strings that are disallowed from
Chris Lattner236d2d52009-09-20 22:35:26 +0000606 /// occurring between this match string and the previous one (or start of
607 /// file).
Michael Liao91a1b2c2013-05-14 20:34:12 +0000608 std::vector<Pattern> DagNotStrings;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000609
Matt Arsenault13df4622013-11-10 02:04:09 +0000610
611 CheckString(const Pattern &P,
612 StringRef S,
613 SMLoc L,
614 Check::CheckType Ty)
615 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
Michael Liaodcc7d482013-05-14 20:29:52 +0000616
Michael Liao91a1b2c2013-05-14 20:34:12 +0000617 /// Check - Match check string and its "not strings" and/or "dag strings".
Stephen Line93a3a02013-10-11 18:38:36 +0000618 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
Stephen Linf8bd2e52013-07-12 14:51:05 +0000619 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
Michael Liaodcc7d482013-05-14 20:29:52 +0000620
621 /// CheckNext - Verify there is a single line in the given buffer.
622 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
623
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +0000624 /// CheckSame - Verify there is no newline in the given buffer.
625 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
626
Michael Liaodcc7d482013-05-14 20:29:52 +0000627 /// CheckNot - Verify there's no "not strings" in the given buffer.
628 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao91a1b2c2013-05-14 20:34:12 +0000629 const std::vector<const Pattern *> &NotStrings,
Michael Liaodcc7d482013-05-14 20:29:52 +0000630 StringMap<StringRef> &VariableTable) const;
Michael Liao91a1b2c2013-05-14 20:34:12 +0000631
632 /// CheckDag - Match "dag strings" and their mixed "not strings".
633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634 std::vector<const Pattern *> &NotStrings,
635 StringMap<StringRef> &VariableTable) const;
Chris Lattner26cccfe2009-08-15 17:41:04 +0000636};
637
Guy Benyei5ea04c32013-02-06 20:40:38 +0000638/// Canonicalize whitespaces in the input file. Line endings are replaced
639/// with UNIX-style '\n'.
640///
641/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642/// characters to a single space.
David Blaikie1961f142014-08-21 20:44:56 +0000643static std::unique_ptr<MemoryBuffer>
644CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
645 bool PreserveHorizontal) {
Chris Lattner0e45d242010-04-05 22:42:30 +0000646 SmallString<128> NewFile;
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000647 NewFile.reserve(MB->getBufferSize());
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000648
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000649 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
650 Ptr != End; ++Ptr) {
NAKAMURA Takumifd781bf2010-11-14 03:28:22 +0000651 // Eliminate trailing dosish \r.
652 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
653 continue;
654 }
655
Michael Liao61bed2f2013-04-25 18:54:02 +0000656 // If current char is not a horizontal whitespace or if horizontal
Guy Benyei5ea04c32013-02-06 20:40:38 +0000657 // whitespace canonicalization is disabled, dump it to output as is.
658 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000659 NewFile.push_back(*Ptr);
660 continue;
661 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000662
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000663 // Otherwise, add one space and advance over neighboring space.
664 NewFile.push_back(' ');
665 while (Ptr+1 != End &&
666 (Ptr[1] == ' ' || Ptr[1] == '\t'))
667 ++Ptr;
668 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000669
David Blaikie1961f142014-08-21 20:44:56 +0000670 return std::unique_ptr<MemoryBuffer>(
671 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000672}
673
Matt Arsenault38820972013-09-17 22:30:02 +0000674static bool IsPartOfWord(char c) {
675 return (isalnum(c) || c == '-' || c == '_');
676}
677
Matt Arsenault13df4622013-11-10 02:04:09 +0000678// Get the size of the prefix extension.
679static size_t CheckTypeSize(Check::CheckType Ty) {
680 switch (Ty) {
681 case Check::CheckNone:
682 return 0;
683
684 case Check::CheckPlain:
685 return sizeof(":") - 1;
686
687 case Check::CheckNext:
688 return sizeof("-NEXT:") - 1;
689
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +0000690 case Check::CheckSame:
691 return sizeof("-SAME:") - 1;
692
Matt Arsenault13df4622013-11-10 02:04:09 +0000693 case Check::CheckNot:
694 return sizeof("-NOT:") - 1;
695
696 case Check::CheckDAG:
697 return sizeof("-DAG:") - 1;
698
699 case Check::CheckLabel:
700 return sizeof("-LABEL:") - 1;
701
702 case Check::CheckEOF:
703 llvm_unreachable("Should not be using EOF size");
704 }
705
706 llvm_unreachable("Bad check type");
707}
708
709static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
Matt Arsenaultc4d2d472013-09-17 22:45:57 +0000710 char NextChar = Buffer[Prefix.size()];
Matt Arsenault38820972013-09-17 22:30:02 +0000711
712 // Verify that the : is present after the prefix.
Matt Arsenault13df4622013-11-10 02:04:09 +0000713 if (NextChar == ':')
Matt Arsenault38820972013-09-17 22:30:02 +0000714 return Check::CheckPlain;
Matt Arsenault38820972013-09-17 22:30:02 +0000715
Matt Arsenault13df4622013-11-10 02:04:09 +0000716 if (NextChar != '-')
Matt Arsenault38820972013-09-17 22:30:02 +0000717 return Check::CheckNone;
Matt Arsenault38820972013-09-17 22:30:02 +0000718
Matt Arsenaultc4d2d472013-09-17 22:45:57 +0000719 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
Matt Arsenault13df4622013-11-10 02:04:09 +0000720 if (Rest.startswith("NEXT:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000721 return Check::CheckNext;
Matt Arsenault38820972013-09-17 22:30:02 +0000722
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +0000723 if (Rest.startswith("SAME:"))
724 return Check::CheckSame;
725
Matt Arsenault13df4622013-11-10 02:04:09 +0000726 if (Rest.startswith("NOT:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000727 return Check::CheckNot;
Matt Arsenault38820972013-09-17 22:30:02 +0000728
Matt Arsenault13df4622013-11-10 02:04:09 +0000729 if (Rest.startswith("DAG:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000730 return Check::CheckDAG;
Matt Arsenault38820972013-09-17 22:30:02 +0000731
Matt Arsenault13df4622013-11-10 02:04:09 +0000732 if (Rest.startswith("LABEL:"))
Matt Arsenault38820972013-09-17 22:30:02 +0000733 return Check::CheckLabel;
Matt Arsenault13df4622013-11-10 02:04:09 +0000734
735 return Check::CheckNone;
736}
737
738// From the given position, find the next character after the word.
739static size_t SkipWord(StringRef Str, size_t Loc) {
740 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
741 ++Loc;
742 return Loc;
743}
744
745// Try to find the first match in buffer for any prefix. If a valid match is
746// found, return that prefix and set its type and location. If there are almost
747// matches (e.g. the actual prefix string is found, but is not an actual check
748// string), but no valid match, return an empty string and set the position to
749// resume searching from. If no partial matches are found, return an empty
750// string and the location will be StringRef::npos. If one prefix is a substring
751// of another, the maximal match should be found. e.g. if "A" and "AA" are
752// prefixes then AA-CHECK: should match the second one.
753static StringRef FindFirstCandidateMatch(StringRef &Buffer,
754 Check::CheckType &CheckTy,
755 size_t &CheckLoc) {
756 StringRef FirstPrefix;
757 size_t FirstLoc = StringRef::npos;
758 size_t SearchLoc = StringRef::npos;
759 Check::CheckType FirstTy = Check::CheckNone;
760
761 CheckTy = Check::CheckNone;
762 CheckLoc = StringRef::npos;
763
764 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
765 I != E; ++I) {
766 StringRef Prefix(*I);
767 size_t PrefixLoc = Buffer.find(Prefix);
768
769 if (PrefixLoc == StringRef::npos)
770 continue;
771
772 // Track where we are searching for invalid prefixes that look almost right.
773 // We need to only advance to the first partial match on the next attempt
774 // since a partial match could be a substring of a later, valid prefix.
775 // Need to skip to the end of the word, otherwise we could end up
776 // matching a prefix in a substring later.
777 if (PrefixLoc < SearchLoc)
778 SearchLoc = SkipWord(Buffer, PrefixLoc);
779
780 // We only want to find the first match to avoid skipping some.
781 if (PrefixLoc > FirstLoc)
782 continue;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000783 // If one matching check-prefix is a prefix of another, choose the
784 // longer one.
785 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
786 continue;
Matt Arsenault13df4622013-11-10 02:04:09 +0000787
788 StringRef Rest = Buffer.drop_front(PrefixLoc);
789 // Make sure we have actually found the prefix, and not a word containing
790 // it. This should also prevent matching the wrong prefix when one is a
791 // substring of another.
792 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
Daniel Sanders43b5f572013-11-20 13:25:05 +0000793 FirstTy = Check::CheckNone;
794 else
795 FirstTy = FindCheckType(Rest, Prefix);
Matt Arsenault13df4622013-11-10 02:04:09 +0000796
Matt Arsenault13df4622013-11-10 02:04:09 +0000797 FirstLoc = PrefixLoc;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000798 FirstPrefix = Prefix;
Matt Arsenault38820972013-09-17 22:30:02 +0000799 }
800
Alexey Samsonova7181a12013-11-13 14:12:52 +0000801 // If the first prefix is invalid, we should continue the search after it.
802 if (FirstTy == Check::CheckNone) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000803 CheckLoc = SearchLoc;
Alexey Samsonova7181a12013-11-13 14:12:52 +0000804 return "";
Matt Arsenault13df4622013-11-10 02:04:09 +0000805 }
806
Alexey Samsonova7181a12013-11-13 14:12:52 +0000807 CheckTy = FirstTy;
808 CheckLoc = FirstLoc;
Matt Arsenault13df4622013-11-10 02:04:09 +0000809 return FirstPrefix;
810}
811
812static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813 unsigned &LineNumber,
814 Check::CheckType &CheckTy,
815 size_t &CheckLoc) {
816 while (!Buffer.empty()) {
817 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818 // If we found a real match, we are done.
819 if (!Prefix.empty()) {
820 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
821 return Prefix;
822 }
823
824 // We didn't find any almost matches either, we are also done.
825 if (CheckLoc == StringRef::npos)
826 return StringRef();
827
828 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
829
830 // Advance to the last possible match we found and try again.
831 Buffer = Buffer.drop_front(CheckLoc + 1);
832 }
833
834 return StringRef();
Matt Arsenault38820972013-09-17 22:30:02 +0000835}
Chris Lattneree3c74f2009-07-08 18:44:05 +0000836
Chris Lattneree3c74f2009-07-08 18:44:05 +0000837/// ReadCheckFile - Read the check file, which specifies the sequence of
838/// expected strings. The strings are added to the CheckStrings vector.
Eli Bendersky43d50d42012-11-30 14:22:14 +0000839/// Returns true in case of an error, false otherwise.
Chris Lattneree3c74f2009-07-08 18:44:05 +0000840static bool ReadCheckFile(SourceMgr &SM,
Chris Lattner26cccfe2009-08-15 17:41:04 +0000841 std::vector<CheckString> &CheckStrings) {
Rafael Espindolaadf21f22014-07-06 17:43:13 +0000842 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
843 MemoryBuffer::getFileOrSTDIN(CheckFilename);
844 if (std::error_code EC = FileOrErr.getError()) {
845 errs() << "Could not open check file '" << CheckFilename
846 << "': " << EC.message() << '\n';
Chris Lattneree3c74f2009-07-08 18:44:05 +0000847 return true;
848 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000849
Chris Lattnera2f8fc52009-09-24 20:45:07 +0000850 // If we want to canonicalize whitespace, strip excess whitespace from the
Guy Benyei5ea04c32013-02-06 20:40:38 +0000851 // buffer containing the CHECK lines. Remove DOS style line endings.
Rafael Espindola3560ff22014-08-27 20:03:13 +0000852 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
853 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
Chris Lattneree3c74f2009-07-08 18:44:05 +0000854
Chris Lattner10f10ce2009-08-15 18:00:42 +0000855 // Find all instances of CheckPrefix followed by : in the file.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000856 StringRef Buffer = F->getBuffer();
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000857
David Blaikie1961f142014-08-21 20:44:56 +0000858 SM.AddNewSourceBuffer(std::move(F), SMLoc());
859
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000860 std::vector<Pattern> ImplicitNegativeChecks;
861 for (const auto &PatternString : ImplicitCheckNot) {
862 // Create a buffer with fake command line content in order to display the
863 // command line option responsible for the specific implicit CHECK-NOT.
864 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
865 std::string Suffix = "'";
Rafael Espindola3560ff22014-08-27 20:03:13 +0000866 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
867 Prefix + PatternString + Suffix, "command line");
868
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000869 StringRef PatternInBuffer =
870 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
David Blaikie1961f142014-08-21 20:44:56 +0000871 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000872
873 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
874 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
875 "IMPLICIT-CHECK", SM, 0);
876 }
877
878
879 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000880
Eli Bendersky43d50d42012-11-30 14:22:14 +0000881 // LineNumber keeps track of the line on which CheckPrefix instances are
882 // found.
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000883 unsigned LineNumber = 1;
884
Chris Lattneree3c74f2009-07-08 18:44:05 +0000885 while (1) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000886 Check::CheckType CheckTy;
887 size_t PrefixLoc;
888
889 // See if a prefix occurs in the memory buffer.
890 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
891 LineNumber,
892 CheckTy,
893 PrefixLoc);
894 if (UsedPrefix.empty())
Chris Lattneree3c74f2009-07-08 18:44:05 +0000895 break;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000896
Matt Arsenault13df4622013-11-10 02:04:09 +0000897 Buffer = Buffer.drop_front(PrefixLoc);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000898
Matt Arsenault13df4622013-11-10 02:04:09 +0000899 // Location to use for error messages.
900 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
Alexander Kornienko92987fb2012-11-14 21:07:37 +0000901
Matt Arsenault13df4622013-11-10 02:04:09 +0000902 // PrefixLoc is to the start of the prefix. Skip to the end.
903 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000904
Matt Arsenault38820972013-09-17 22:30:02 +0000905 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906 // leading and trailing whitespace.
Chris Lattner236d2d52009-09-20 22:35:26 +0000907 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000908
Chris Lattneree3c74f2009-07-08 18:44:05 +0000909 // Scan ahead to the end of line.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000910 size_t EOL = Buffer.find_first_of("\n\r");
Chris Lattner74d50732009-09-24 20:39:13 +0000911
Dan Gohman838fb092010-01-29 21:53:18 +0000912 // Remember the location of the start of the pattern, for diagnostics.
913 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
914
Chris Lattner74d50732009-09-24 20:39:13 +0000915 // Parse the pattern.
Matt Arsenault38820972013-09-17 22:30:02 +0000916 Pattern P(CheckTy);
Matt Arsenault13df4622013-11-10 02:04:09 +0000917 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
Chris Lattneree3c74f2009-07-08 18:44:05 +0000918 return true;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000919
Stephen Linf8bd2e52013-07-12 14:51:05 +0000920 // Verify that CHECK-LABEL lines do not define or use variables
Matt Arsenault38820972013-09-17 22:30:02 +0000921 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
Stephen Linf8bd2e52013-07-12 14:51:05 +0000923 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +0000924 "found '" + UsedPrefix + "-LABEL:'"
925 " with variable definition or use");
Stephen Linf8bd2e52013-07-12 14:51:05 +0000926 return true;
927 }
928
Chris Lattner74d50732009-09-24 20:39:13 +0000929 Buffer = Buffer.substr(EOL);
930
Chris Lattnerda108b42009-08-15 18:32:21 +0000931 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +0000932 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
933 CheckStrings.empty()) {
934 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
Matt Arsenault13df4622013-11-10 02:04:09 +0000935 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
Chris Lattner03b80a42011-10-16 05:43:57 +0000936 SourceMgr::DK_Error,
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +0000937 "found '" + UsedPrefix + "-" + Type + "' without previous '"
Matt Arsenault13df4622013-11-10 02:04:09 +0000938 + UsedPrefix + ": line");
Chris Lattnerda108b42009-08-15 18:32:21 +0000939 return true;
940 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000941
Michael Liao91a1b2c2013-05-14 20:34:12 +0000942 // Handle CHECK-DAG/-NOT.
Matt Arsenault38820972013-09-17 22:30:02 +0000943 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
Michael Liao91a1b2c2013-05-14 20:34:12 +0000944 DagNotMatches.push_back(P);
Chris Lattner74d50732009-09-24 20:39:13 +0000945 continue;
946 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000947
Chris Lattneree3c74f2009-07-08 18:44:05 +0000948 // Okay, add the string we captured to the output vector and move on.
Benjamin Kramerf5e2fc42015-05-29 19:43:39 +0000949 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc, CheckTy);
Michael Liao91a1b2c2013-05-14 20:34:12 +0000950 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Alexander Kornienko56ccdbb2014-07-11 12:39:32 +0000951 DagNotMatches = ImplicitNegativeChecks;
Chris Lattneree3c74f2009-07-08 18:44:05 +0000952 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000953
Matt Arsenault13df4622013-11-10 02:04:09 +0000954 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
955 // prefix as a filler for the error message.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000956 if (!DagNotMatches.empty()) {
Benjamin Kramerf5e2fc42015-05-29 19:43:39 +0000957 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
958 SMLoc::getFromPointer(Buffer.data()),
959 Check::CheckEOF);
Michael Liao91a1b2c2013-05-14 20:34:12 +0000960 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Jakob Stoklund Oleseneba55822010-10-15 17:47:12 +0000961 }
962
Chris Lattneree3c74f2009-07-08 18:44:05 +0000963 if (CheckStrings.empty()) {
Matt Arsenault13df4622013-11-10 02:04:09 +0000964 errs() << "error: no check strings found with prefix"
965 << (CheckPrefixes.size() > 1 ? "es " : " ");
Chris Bieneman3e3ef2f2015-04-29 21:45:24 +0000966 prefix_iterator I = CheckPrefixes.begin();
967 prefix_iterator E = CheckPrefixes.end();
968 if (I != E) {
969 errs() << "\'" << *I << ":'";
970 ++I;
Matt Arsenault13df4622013-11-10 02:04:09 +0000971 }
Chris Bieneman3e3ef2f2015-04-29 21:45:24 +0000972 for (; I != E; ++I)
973 errs() << ", \'" << *I << ":'";
Matt Arsenault13df4622013-11-10 02:04:09 +0000974
975 errs() << '\n';
Chris Lattneree3c74f2009-07-08 18:44:05 +0000976 return true;
977 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000978
Chris Lattneree3c74f2009-07-08 18:44:05 +0000979 return false;
980}
981
Michael Liao91a1b2c2013-05-14 20:34:12 +0000982static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
983 const Pattern &Pat, StringRef Buffer,
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000984 StringMap<StringRef> &VariableTable) {
Chris Lattnerda108b42009-08-15 18:32:21 +0000985 // Otherwise, we have an error, emit an error message.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000986 SM.PrintMessage(Loc, SourceMgr::DK_Error,
Chris Lattner03b80a42011-10-16 05:43:57 +0000987 "expected string not found in input");
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000988
Chris Lattnerda108b42009-08-15 18:32:21 +0000989 // Print the "scanning from here" line. If the current position is at the
990 // end of a line, advance to the start of the next line.
Chris Lattnercaa5fc02009-09-20 22:11:44 +0000991 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +0000992
Chris Lattner03b80a42011-10-16 05:43:57 +0000993 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
994 "scanning from here");
Daniel Dunbare0ef65a2009-11-22 22:08:06 +0000995
996 // Allow the pattern to print additional information if desired.
Michael Liao91a1b2c2013-05-14 20:34:12 +0000997 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
998}
999
1000static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1001 StringRef Buffer,
1002 StringMap<StringRef> &VariableTable) {
1003 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
Chris Lattnerda108b42009-08-15 18:32:21 +00001004}
1005
Chris Lattner37183582009-09-20 22:42:44 +00001006/// CountNumNewlinesBetween - Count the number of newlines in the specified
1007/// range.
Richard Smith592fe882014-04-07 17:09:53 +00001008static unsigned CountNumNewlinesBetween(StringRef Range,
1009 const char *&FirstNewLine) {
Chris Lattnerda108b42009-08-15 18:32:21 +00001010 unsigned NumNewLines = 0;
Chris Lattner37183582009-09-20 22:42:44 +00001011 while (1) {
Chris Lattnerda108b42009-08-15 18:32:21 +00001012 // Scan for newline.
Chris Lattner37183582009-09-20 22:42:44 +00001013 Range = Range.substr(Range.find_first_of("\n\r"));
1014 if (Range.empty()) return NumNewLines;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001015
Chris Lattnerda108b42009-08-15 18:32:21 +00001016 ++NumNewLines;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001017
Chris Lattnerda108b42009-08-15 18:32:21 +00001018 // Handle \n\r and \r\n as a single newline.
Chris Lattner37183582009-09-20 22:42:44 +00001019 if (Range.size() > 1 &&
1020 (Range[1] == '\n' || Range[1] == '\r') &&
1021 (Range[0] != Range[1]))
1022 Range = Range.substr(1);
1023 Range = Range.substr(1);
Richard Smith592fe882014-04-07 17:09:53 +00001024
1025 if (NumNewLines == 1)
1026 FirstNewLine = Range.begin();
Chris Lattnerda108b42009-08-15 18:32:21 +00001027 }
Chris Lattnerda108b42009-08-15 18:32:21 +00001028}
1029
Michael Liaodcc7d482013-05-14 20:29:52 +00001030size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
Stephen Line93a3a02013-10-11 18:38:36 +00001031 bool IsLabelScanMode, size_t &MatchLen,
Michael Liaodcc7d482013-05-14 20:29:52 +00001032 StringMap<StringRef> &VariableTable) const {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001033 size_t LastPos = 0;
1034 std::vector<const Pattern *> NotStrings;
1035
Stephen Line93a3a02013-10-11 18:38:36 +00001036 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1037 // bounds; we have not processed variable definitions within the bounded block
1038 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1039 // over the block again (including the last CHECK-LABEL) in normal mode.
1040 if (!IsLabelScanMode) {
1041 // Match "dag strings" (with mixed "not strings" if any).
1042 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1043 if (LastPos == StringRef::npos)
1044 return StringRef::npos;
1045 }
Michael Liao91a1b2c2013-05-14 20:34:12 +00001046
1047 // Match itself from the last position after matching CHECK-DAG.
1048 StringRef MatchBuffer = Buffer.substr(LastPos);
1049 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001050 if (MatchPos == StringRef::npos) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001051 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001052 return StringRef::npos;
1053 }
1054
Stephen Line93a3a02013-10-11 18:38:36 +00001055 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1056 // or CHECK-NOT
1057 if (!IsLabelScanMode) {
Stephen Linf8bd2e52013-07-12 14:51:05 +00001058 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Michael Liaodcc7d482013-05-14 20:29:52 +00001059
Stephen Linf8bd2e52013-07-12 14:51:05 +00001060 // If this check is a "CHECK-NEXT", verify that the previous match was on
1061 // the previous line (i.e. that there is one newline between them).
1062 if (CheckNext(SM, SkippedRegion))
1063 return StringRef::npos;
Michael Liaodcc7d482013-05-14 20:29:52 +00001064
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +00001065 // If this check is a "CHECK-SAME", verify that the previous match was on
1066 // the same line (i.e. that there is no newline between them).
1067 if (CheckSame(SM, SkippedRegion))
1068 return StringRef::npos;
1069
Stephen Linf8bd2e52013-07-12 14:51:05 +00001070 // If this match had "not strings", verify that they don't exist in the
1071 // skipped region.
1072 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1073 return StringRef::npos;
1074 }
Michael Liaodcc7d482013-05-14 20:29:52 +00001075
Mehdi Amini7dfb92b2015-03-12 00:07:29 +00001076 return LastPos + MatchPos;
Michael Liaodcc7d482013-05-14 20:29:52 +00001077}
1078
1079bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
Matt Arsenault38820972013-09-17 22:30:02 +00001080 if (CheckTy != Check::CheckNext)
Michael Liaodcc7d482013-05-14 20:29:52 +00001081 return false;
1082
1083 // Count the number of newlines between the previous match and this one.
1084 assert(Buffer.data() !=
1085 SM.getMemoryBuffer(
1086 SM.FindBufferContainingLoc(
1087 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1088 "CHECK-NEXT can't be the first check in a file");
1089
Craig Topper66f09ad2014-06-08 22:29:17 +00001090 const char *FirstNewLine = nullptr;
Richard Smith592fe882014-04-07 17:09:53 +00001091 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
Michael Liaodcc7d482013-05-14 20:29:52 +00001092
1093 if (NumNewLines == 0) {
Matt Arsenault13df4622013-11-10 02:04:09 +00001094 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
Michael Liaodcc7d482013-05-14 20:29:52 +00001095 "-NEXT: is on the same line as previous match");
1096 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1097 SourceMgr::DK_Note, "'next' match was here");
1098 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1099 "previous match ended here");
1100 return true;
1101 }
1102
1103 if (NumNewLines != 1) {
Matt Arsenault13df4622013-11-10 02:04:09 +00001104 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
Michael Liaodcc7d482013-05-14 20:29:52 +00001105 "-NEXT: is not on the line after the previous match");
1106 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1107 SourceMgr::DK_Note, "'next' match was here");
1108 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1109 "previous match ended here");
Richard Smith592fe882014-04-07 17:09:53 +00001110 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1111 "non-matching line after previous match is here");
Michael Liaodcc7d482013-05-14 20:29:52 +00001112 return true;
1113 }
1114
1115 return false;
1116}
1117
Duncan P. N. Exon Smith01ac1702015-02-26 04:53:00 +00001118bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1119 if (CheckTy != Check::CheckSame)
1120 return false;
1121
1122 // Count the number of newlines between the previous match and this one.
1123 assert(Buffer.data() !=
1124 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1125 SMLoc::getFromPointer(Buffer.data())))
1126 ->getBufferStart() &&
1127 "CHECK-SAME can't be the first check in a file");
1128
1129 const char *FirstNewLine = nullptr;
1130 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1131
1132 if (NumNewLines != 0) {
1133 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1134 Prefix +
1135 "-SAME: is not on the same line as the previous match");
1136 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1137 "'next' match was here");
1138 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1139 "previous match ended here");
1140 return true;
1141 }
1142
1143 return false;
1144}
1145
Michael Liaodcc7d482013-05-14 20:29:52 +00001146bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao91a1b2c2013-05-14 20:34:12 +00001147 const std::vector<const Pattern *> &NotStrings,
Michael Liaodcc7d482013-05-14 20:29:52 +00001148 StringMap<StringRef> &VariableTable) const {
1149 for (unsigned ChunkNo = 0, e = NotStrings.size();
1150 ChunkNo != e; ++ChunkNo) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001151 const Pattern *Pat = NotStrings[ChunkNo];
Matt Arsenault38820972013-09-17 22:30:02 +00001152 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001153
Michael Liaodcc7d482013-05-14 20:29:52 +00001154 size_t MatchLen = 0;
Michael Liao91a1b2c2013-05-14 20:34:12 +00001155 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
Michael Liaodcc7d482013-05-14 20:29:52 +00001156
1157 if (Pos == StringRef::npos) continue;
1158
1159 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1160 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +00001161 Prefix + "-NOT: string occurred!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001162 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001163 Prefix + "-NOT: pattern specified here");
Michael Liaodcc7d482013-05-14 20:29:52 +00001164 return true;
1165 }
1166
1167 return false;
1168}
1169
Michael Liao91a1b2c2013-05-14 20:34:12 +00001170size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1171 std::vector<const Pattern *> &NotStrings,
1172 StringMap<StringRef> &VariableTable) const {
1173 if (DagNotStrings.empty())
1174 return 0;
1175
1176 size_t LastPos = 0;
1177 size_t StartPos = LastPos;
1178
1179 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1180 ChunkNo != e; ++ChunkNo) {
1181 const Pattern &Pat = DagNotStrings[ChunkNo];
1182
Matt Arsenault38820972013-09-17 22:30:02 +00001183 assert((Pat.getCheckTy() == Check::CheckDAG ||
1184 Pat.getCheckTy() == Check::CheckNot) &&
Michael Liao91a1b2c2013-05-14 20:34:12 +00001185 "Invalid CHECK-DAG or CHECK-NOT!");
1186
Matt Arsenault38820972013-09-17 22:30:02 +00001187 if (Pat.getCheckTy() == Check::CheckNot) {
Michael Liao91a1b2c2013-05-14 20:34:12 +00001188 NotStrings.push_back(&Pat);
1189 continue;
1190 }
1191
Matt Arsenault38820972013-09-17 22:30:02 +00001192 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
Michael Liao91a1b2c2013-05-14 20:34:12 +00001193
1194 size_t MatchLen = 0, MatchPos;
1195
1196 // CHECK-DAG always matches from the start.
1197 StringRef MatchBuffer = Buffer.substr(StartPos);
1198 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1199 // With a group of CHECK-DAGs, a single mismatching means the match on
1200 // that group of CHECK-DAGs fails immediately.
1201 if (MatchPos == StringRef::npos) {
1202 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1203 return StringRef::npos;
1204 }
1205 // Re-calc it as the offset relative to the start of the original string.
1206 MatchPos += StartPos;
1207
1208 if (!NotStrings.empty()) {
1209 if (MatchPos < LastPos) {
1210 // Reordered?
1211 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1212 SourceMgr::DK_Error,
Matt Arsenault13df4622013-11-10 02:04:09 +00001213 Prefix + "-DAG: found a match of CHECK-DAG"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001214 " reordering across a CHECK-NOT");
1215 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1216 SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001217 Prefix + "-DAG: the farthest match of CHECK-DAG"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001218 " is found here");
1219 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001220 Prefix + "-NOT: the crossed pattern specified"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001221 " here");
1222 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
Matt Arsenault13df4622013-11-10 02:04:09 +00001223 Prefix + "-DAG: the reordered pattern specified"
Michael Liao91a1b2c2013-05-14 20:34:12 +00001224 " here");
1225 return StringRef::npos;
1226 }
1227 // All subsequent CHECK-DAGs should be matched from the farthest
1228 // position of all precedent CHECK-DAGs (including this one.)
1229 StartPos = LastPos;
1230 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1231 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1232 // region.
1233 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Tim Northovercf708c32013-08-02 11:32:50 +00001234 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
Michael Liao91a1b2c2013-05-14 20:34:12 +00001235 return StringRef::npos;
1236 // Clear "not strings".
1237 NotStrings.clear();
1238 }
1239
1240 // Update the last position with CHECK-DAG matches.
1241 LastPos = std::max(MatchPos + MatchLen, LastPos);
1242 }
1243
1244 return LastPos;
1245}
1246
Matt Arsenault13df4622013-11-10 02:04:09 +00001247// A check prefix must contain only alphanumeric, hyphens and underscores.
1248static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1249 Regex Validator("^[a-zA-Z0-9_-]*$");
1250 return Validator.match(CheckPrefix);
1251}
1252
1253static bool ValidateCheckPrefixes() {
1254 StringSet<> PrefixSet;
1255
1256 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1257 I != E; ++I) {
1258 StringRef Prefix(*I);
1259
Eli Bendersky24412b12014-07-29 20:30:53 +00001260 // Reject empty prefixes.
1261 if (Prefix == "")
1262 return false;
1263
David Blaikie03569752014-11-19 02:56:00 +00001264 if (!PrefixSet.insert(Prefix).second)
Matt Arsenault13df4622013-11-10 02:04:09 +00001265 return false;
1266
1267 if (!ValidateCheckPrefix(Prefix))
1268 return false;
1269 }
1270
1271 return true;
1272}
1273
1274// I don't think there's a way to specify an initial value for cl::list,
1275// so if nothing was specified, add the default
1276static void AddCheckPrefixIfNeeded() {
1277 if (CheckPrefixes.empty())
1278 CheckPrefixes.push_back("CHECK");
Rui Ueyamac27351582013-08-12 23:05:59 +00001279}
1280
Chris Lattneree3c74f2009-07-08 18:44:05 +00001281int main(int argc, char **argv) {
1282 sys::PrintStackTraceOnErrorSignal();
1283 PrettyStackTraceProgram X(argc, argv);
1284 cl::ParseCommandLineOptions(argc, argv);
1285
Matt Arsenault13df4622013-11-10 02:04:09 +00001286 if (!ValidateCheckPrefixes()) {
1287 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1288 "start with a letter and contain only alphanumeric characters, "
1289 "hyphens and underscores\n";
Rui Ueyamac27351582013-08-12 23:05:59 +00001290 return 2;
1291 }
1292
Matt Arsenault13df4622013-11-10 02:04:09 +00001293 AddCheckPrefixIfNeeded();
1294
Chris Lattneree3c74f2009-07-08 18:44:05 +00001295 SourceMgr SM;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001296
Chris Lattneree3c74f2009-07-08 18:44:05 +00001297 // Read the expected strings from the check file.
Chris Lattner26cccfe2009-08-15 17:41:04 +00001298 std::vector<CheckString> CheckStrings;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001299 if (ReadCheckFile(SM, CheckStrings))
1300 return 2;
1301
1302 // Open the file to check and add it to SourceMgr.
Rafael Espindolaadf21f22014-07-06 17:43:13 +00001303 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1304 MemoryBuffer::getFileOrSTDIN(InputFilename);
1305 if (std::error_code EC = FileOrErr.getError()) {
1306 errs() << "Could not open input file '" << InputFilename
1307 << "': " << EC.message() << '\n';
Eli Bendersky8e1c6472012-11-30 13:51:33 +00001308 return 2;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001309 }
Rafael Espindola3f6481d2014-08-01 14:31:55 +00001310 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001311
Justin Bogner1b9f9362014-08-07 18:40:37 +00001312 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
Chris Lattnerb692bed2011-02-09 16:46:02 +00001313 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
Eli Bendersky8e1c6472012-11-30 13:51:33 +00001314 return 2;
Chris Lattnerb692bed2011-02-09 16:46:02 +00001315 }
Benjamin Kramere963d662013-03-23 13:56:23 +00001316
Chris Lattner2c3e5cd2009-07-11 18:58:15 +00001317 // Remove duplicate spaces in the input file if requested.
Guy Benyei5ea04c32013-02-06 20:40:38 +00001318 // Remove DOS style line endings.
David Blaikie1961f142014-08-21 20:44:56 +00001319 std::unique_ptr<MemoryBuffer> F =
1320 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001321
Chris Lattneree3c74f2009-07-08 18:44:05 +00001322 // Check that we have all of the expected strings, in order, in the input
1323 // file.
Chris Lattnercaa5fc02009-09-20 22:11:44 +00001324 StringRef Buffer = F->getBuffer();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001325
David Blaikie1961f142014-08-21 20:44:56 +00001326 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1327
1328 /// VariableTable - This holds all the current filecheck variables.
1329 StringMap<StringRef> VariableTable;
1330
Stephen Linf8bd2e52013-07-12 14:51:05 +00001331 bool hasError = false;
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001332
Stephen Linf8bd2e52013-07-12 14:51:05 +00001333 unsigned i = 0, j = 0, e = CheckStrings.size();
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001334
Stephen Linf8bd2e52013-07-12 14:51:05 +00001335 while (true) {
1336 StringRef CheckRegion;
1337 if (j == e) {
1338 CheckRegion = Buffer;
1339 } else {
1340 const CheckString &CheckLabelStr = CheckStrings[j];
Matt Arsenault38820972013-09-17 22:30:02 +00001341 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
Stephen Linf8bd2e52013-07-12 14:51:05 +00001342 ++j;
1343 continue;
1344 }
Chris Lattner37183582009-09-20 22:42:44 +00001345
Stephen Linf8bd2e52013-07-12 14:51:05 +00001346 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1347 size_t MatchLabelLen = 0;
Stephen Line93a3a02013-10-11 18:38:36 +00001348 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
Stephen Linf8bd2e52013-07-12 14:51:05 +00001349 MatchLabelLen, VariableTable);
1350 if (MatchLabelPos == StringRef::npos) {
1351 hasError = true;
1352 break;
1353 }
1354
1355 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1356 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1357 ++j;
1358 }
1359
1360 for ( ; i != j; ++i) {
1361 const CheckString &CheckStr = CheckStrings[i];
1362
1363 // Check each string within the scanned region, including a second check
1364 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1365 size_t MatchLen = 0;
Stephen Line93a3a02013-10-11 18:38:36 +00001366 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
Stephen Linf8bd2e52013-07-12 14:51:05 +00001367 VariableTable);
1368
1369 if (MatchPos == StringRef::npos) {
1370 hasError = true;
1371 i = j;
1372 break;
1373 }
1374
1375 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1376 }
1377
1378 if (j == e)
1379 break;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001380 }
Mikhail Glushenkovdefcda22010-08-20 17:38:38 +00001381
Stephen Linf8bd2e52013-07-12 14:51:05 +00001382 return hasError ? 1 : 0;
Chris Lattneree3c74f2009-07-08 18:44:05 +00001383}