blob: 769749e024054617099297435f86e0b8570dab52 [file] [log] [blame]
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content. This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
Michael J. Spencer3ff95632010-12-16 03:29:14 +000019#include "llvm/ADT/OwningPtr.h"
Chandler Carruth4ffd89f2012-12-04 10:37:14 +000020#include "llvm/ADT/SmallString.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringMap.h"
Chris Lattner81cb8ca2009-07-08 18:44:05 +000023#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
Chris Lattner52870082009-09-24 21:47:32 +000026#include "llvm/Support/Regex.h"
Chandler Carruth4ffd89f2012-12-04 10:37:14 +000027#include "llvm/Support/Signals.h"
Chris Lattner81cb8ca2009-07-08 18:44:05 +000028#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
Michael J. Spencer333fb042010-12-09 17:36:48 +000030#include "llvm/Support/system_error.h"
Chris Lattnereec96952009-09-27 07:56:52 +000031#include <algorithm>
Eli Bendersky9756ca72012-12-01 21:54:48 +000032#include <map>
33#include <string>
34#include <vector>
Chris Lattner81cb8ca2009-07-08 18:44:05 +000035using namespace llvm;
36
37static cl::opt<std::string>
38CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
39
40static cl::opt<std::string>
41InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
42 cl::init("-"), cl::value_desc("filename"));
43
44static cl::opt<std::string>
45CheckPrefix("check-prefix", cl::init("CHECK"),
46 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
47
Chris Lattner88a7e9e2009-07-11 18:58:15 +000048static cl::opt<bool>
49NoCanonicalizeWhiteSpace("strict-whitespace",
50 cl::desc("Do not treat all horizontal whitespace as equivalent"));
51
Chris Lattnera29703e2009-09-24 20:39:13 +000052//===----------------------------------------------------------------------===//
53// Pattern Handling Code.
54//===----------------------------------------------------------------------===//
55
Matt Arsenault4f67afc2013-09-17 22:30:02 +000056namespace Check {
57 enum CheckType {
58 CheckNone = 0,
59 CheckPlain,
60 CheckNext,
61 CheckNot,
62 CheckDAG,
63 CheckLabel,
64
65 /// MatchEOF - When set, this pattern only matches the end of file. This is
66 /// used for trailing CHECK-NOTs.
67 CheckEOF
68 };
69}
70
Chris Lattner9fc66782009-09-24 20:25:55 +000071class Pattern {
Chris Lattner94638f02009-09-25 17:29:36 +000072 SMLoc PatternLoc;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000073
Matt Arsenault4f67afc2013-09-17 22:30:02 +000074 Check::CheckType CheckTy;
Michael Liao95ab3262013-05-14 20:34:12 +000075
Chris Lattner5d6a05f2009-09-25 17:23:43 +000076 /// FixedStr - If non-empty, this pattern is a fixed string match with the
77 /// specified fixed string.
Chris Lattner2702e6a2009-09-25 17:09:12 +000078 StringRef FixedStr;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000079
Chris Lattner5d6a05f2009-09-25 17:23:43 +000080 /// RegEx - If non-empty, this is a regex pattern.
81 std::string RegExStr;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000082
Alexander Kornienko70a870a2012-11-14 21:07:37 +000083 /// \brief Contains the number of line this pattern is in.
84 unsigned LineNumber;
85
Chris Lattnereec96952009-09-27 07:56:52 +000086 /// VariableUses - Entries in this vector map to uses of a variable in the
87 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
88 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
89 /// value of bar at offset 3.
90 std::vector<std::pair<StringRef, unsigned> > VariableUses;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000091
Eli Bendersky9756ca72012-12-01 21:54:48 +000092 /// VariableDefs - Maps definitions of variables to their parenthesized
93 /// capture numbers.
94 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
95 std::map<StringRef, unsigned> VariableDefs;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000096
Chris Lattner9fc66782009-09-24 20:25:55 +000097public:
Mikhail Glushenkov7112c862010-08-20 17:38:38 +000098
Matt Arsenault4f67afc2013-09-17 22:30:02 +000099 Pattern(Check::CheckType Ty)
100 : CheckTy(Ty) { }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000101
Michael Liao0fc71372013-04-25 21:31:34 +0000102 /// getLoc - Return the location in source code.
103 SMLoc getLoc() const { return PatternLoc; }
104
Eli Bendersky1e5cbcb2012-11-30 14:22:14 +0000105 /// ParsePattern - Parse the given string into the Pattern. SM provides the
106 /// SourceMgr used for error reports, and LineNumber is the line number in
107 /// the input file from which the pattern string was read.
108 /// Returns true in case of an error, false otherwise.
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000109 bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000110
Chris Lattner9fc66782009-09-24 20:25:55 +0000111 /// Match - Match the pattern string against the input buffer Buffer. This
112 /// returns the position that is matched or npos if there is no match. If
113 /// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattnereec96952009-09-27 07:56:52 +0000114 ///
115 /// The VariableTable StringMap provides the current values of filecheck
116 /// variables and is updated if this match defines new values.
117 size_t Match(StringRef Buffer, size_t &MatchLen,
118 StringMap<StringRef> &VariableTable) const;
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000119
120 /// PrintFailureInfo - Print additional information about a failure to match
121 /// involving this pattern.
122 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
123 const StringMap<StringRef> &VariableTable) const;
124
Stephen Lin178504b2013-07-12 14:51:05 +0000125 bool hasVariable() const { return !(VariableUses.empty() &&
126 VariableDefs.empty()); }
127
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000128 Check::CheckType getCheckTy() const { return CheckTy; }
Michael Liao95ab3262013-05-14 20:34:12 +0000129
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000130private:
Chris Lattnereec96952009-09-27 07:56:52 +0000131 static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
Eli Bendersky9756ca72012-12-01 21:54:48 +0000132 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
133 void AddBackrefToRegEx(unsigned BackrefNum);
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000134
135 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
136 /// matching this pattern at the start of \arg Buffer; a distance of zero
137 /// should correspond to a perfect match.
138 unsigned ComputeMatchDistance(StringRef Buffer,
139 const StringMap<StringRef> &VariableTable) const;
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000140
141 /// \brief Evaluates expression and stores the result to \p Value.
142 /// \return true on success. false when the expression has invalid syntax.
143 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
Eli Bendersky4db65112012-12-02 16:02:41 +0000144
145 /// \brief Finds the closing sequence of a regex variable usage or
146 /// definition. Str has to point in the beginning of the definition
147 /// (right after the opening sequence).
148 /// \return offset of the closing sequence within Str, or npos if it was not
149 /// found.
150 size_t FindRegexVarEnd(StringRef Str);
Chris Lattner9fc66782009-09-24 20:25:55 +0000151};
152
Chris Lattnereec96952009-09-27 07:56:52 +0000153
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000154bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
155 unsigned LineNumber) {
156 this->LineNumber = LineNumber;
Chris Lattner94638f02009-09-25 17:29:36 +0000157 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000158
Chris Lattnera29703e2009-09-24 20:39:13 +0000159 // Ignore trailing whitespace.
160 while (!PatternStr.empty() &&
161 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
162 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000163
Chris Lattnera29703e2009-09-24 20:39:13 +0000164 // Check that there is something on the line.
165 if (PatternStr.empty()) {
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000166 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
167 "found empty check string with prefix '" +
168 CheckPrefix+":'");
Chris Lattnera29703e2009-09-24 20:39:13 +0000169 return true;
170 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000171
Chris Lattner2702e6a2009-09-25 17:09:12 +0000172 // Check to see if this is a fixed string, or if it has regex pieces.
Ted Kremenek4f505172012-09-08 04:32:13 +0000173 if (PatternStr.size() < 2 ||
Chris Lattnereec96952009-09-27 07:56:52 +0000174 (PatternStr.find("{{") == StringRef::npos &&
175 PatternStr.find("[[") == StringRef::npos)) {
Chris Lattner2702e6a2009-09-25 17:09:12 +0000176 FixedStr = PatternStr;
177 return false;
178 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000179
Chris Lattnereec96952009-09-27 07:56:52 +0000180 // Paren value #0 is for the fully matched string. Any new parenthesized
Chris Lattner13a38c42011-04-09 06:18:02 +0000181 // values add from there.
Chris Lattnereec96952009-09-27 07:56:52 +0000182 unsigned CurParen = 1;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000183
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000184 // Otherwise, there is at least one regex piece. Build up the regex pattern
185 // by escaping scary characters in fixed strings, building up one big regex.
Chris Lattner52870082009-09-24 21:47:32 +0000186 while (!PatternStr.empty()) {
Chris Lattnereec96952009-09-27 07:56:52 +0000187 // RegEx matches.
Chris Lattner13a38c42011-04-09 06:18:02 +0000188 if (PatternStr.startswith("{{")) {
Eli Bendersky1e5cbcb2012-11-30 14:22:14 +0000189 // This is the start of a regex match. Scan for the }}.
Chris Lattnereec96952009-09-27 07:56:52 +0000190 size_t End = PatternStr.find("}}");
191 if (End == StringRef::npos) {
192 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000193 SourceMgr::DK_Error,
194 "found start of regex string with no end '}}'");
Chris Lattnereec96952009-09-27 07:56:52 +0000195 return true;
196 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000197
Chris Lattner42e31df2011-04-09 06:37:03 +0000198 // Enclose {{}} patterns in parens just like [[]] even though we're not
199 // capturing the result for any purpose. This is required in case the
200 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
201 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
202 RegExStr += '(';
203 ++CurParen;
204
Chris Lattnereec96952009-09-27 07:56:52 +0000205 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
206 return true;
Chris Lattner42e31df2011-04-09 06:37:03 +0000207 RegExStr += ')';
Chris Lattner13a38c42011-04-09 06:18:02 +0000208
Chris Lattnereec96952009-09-27 07:56:52 +0000209 PatternStr = PatternStr.substr(End+2);
Chris Lattner52870082009-09-24 21:47:32 +0000210 continue;
211 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000212
Chris Lattnereec96952009-09-27 07:56:52 +0000213 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
214 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
215 // second form is [[foo]] which is a reference to foo. The variable name
Daniel Dunbar964ac012009-11-22 22:07:50 +0000216 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
Chris Lattnereec96952009-09-27 07:56:52 +0000217 // it. This is to catch some common errors.
Chris Lattner13a38c42011-04-09 06:18:02 +0000218 if (PatternStr.startswith("[[")) {
Eli Bendersky4db65112012-12-02 16:02:41 +0000219 // Find the closing bracket pair ending the match. End is going to be an
220 // offset relative to the beginning of the match string.
221 size_t End = FindRegexVarEnd(PatternStr.substr(2));
222
Chris Lattnereec96952009-09-27 07:56:52 +0000223 if (End == StringRef::npos) {
224 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000225 SourceMgr::DK_Error,
226 "invalid named regex reference, no ]] found");
Chris Lattnereec96952009-09-27 07:56:52 +0000227 return true;
228 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000229
Eli Bendersky4db65112012-12-02 16:02:41 +0000230 StringRef MatchStr = PatternStr.substr(2, End);
231 PatternStr = PatternStr.substr(End+4);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000232
Chris Lattnereec96952009-09-27 07:56:52 +0000233 // Get the regex name (e.g. "foo").
234 size_t NameEnd = MatchStr.find(':');
235 StringRef Name = MatchStr.substr(0, NameEnd);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000236
Chris Lattnereec96952009-09-27 07:56:52 +0000237 if (Name.empty()) {
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000238 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
239 "invalid name in named regex: empty name");
Chris Lattnereec96952009-09-27 07:56:52 +0000240 return true;
241 }
242
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000243 // Verify that the name/expression is well formed. FileCheck currently
244 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
245 // is relaxed, more strict check is performed in \c EvaluateExpression.
246 bool IsExpression = false;
247 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
248 if (i == 0 && Name[i] == '@') {
249 if (NameEnd != StringRef::npos) {
250 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
251 SourceMgr::DK_Error,
252 "invalid name in named regex definition");
253 return true;
254 }
255 IsExpression = true;
256 continue;
257 }
258 if (Name[i] != '_' && !isalnum(Name[i]) &&
259 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
Chris Lattnereec96952009-09-27 07:56:52 +0000260 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000261 SourceMgr::DK_Error, "invalid name in named regex");
Chris Lattnereec96952009-09-27 07:56:52 +0000262 return true;
263 }
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000264 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000265
Chris Lattnereec96952009-09-27 07:56:52 +0000266 // Name can't start with a digit.
Guy Benyei87d0b9e2013-02-12 21:21:59 +0000267 if (isdigit(static_cast<unsigned char>(Name[0]))) {
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000268 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
269 "invalid name in named regex");
Chris Lattnereec96952009-09-27 07:56:52 +0000270 return true;
271 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000272
Chris Lattnereec96952009-09-27 07:56:52 +0000273 // Handle [[foo]].
274 if (NameEnd == StringRef::npos) {
Eli Bendersky9756ca72012-12-01 21:54:48 +0000275 // Handle variables that were defined earlier on the same line by
276 // emitting a backreference.
277 if (VariableDefs.find(Name) != VariableDefs.end()) {
278 unsigned VarParenNum = VariableDefs[Name];
279 if (VarParenNum < 1 || VarParenNum > 9) {
280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
281 SourceMgr::DK_Error,
282 "Can't back-reference more than 9 variables");
283 return true;
284 }
285 AddBackrefToRegEx(VarParenNum);
286 } else {
287 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
288 }
Chris Lattnereec96952009-09-27 07:56:52 +0000289 continue;
290 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000291
Chris Lattnereec96952009-09-27 07:56:52 +0000292 // Handle [[foo:.*]].
Eli Bendersky9756ca72012-12-01 21:54:48 +0000293 VariableDefs[Name] = CurParen;
Chris Lattnereec96952009-09-27 07:56:52 +0000294 RegExStr += '(';
295 ++CurParen;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000296
Chris Lattnereec96952009-09-27 07:56:52 +0000297 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
298 return true;
299
300 RegExStr += ')';
Chris Lattner52870082009-09-24 21:47:32 +0000301 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000302
Chris Lattnereec96952009-09-27 07:56:52 +0000303 // Handle fixed string matches.
304 // Find the end, which is the start of the next regex.
305 size_t FixedMatchEnd = PatternStr.find("{{");
306 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
307 AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
308 PatternStr = PatternStr.substr(FixedMatchEnd);
Chris Lattner52870082009-09-24 21:47:32 +0000309 }
Chris Lattneradea46e2009-09-24 20:45:07 +0000310
Chris Lattnera29703e2009-09-24 20:39:13 +0000311 return false;
312}
313
Chris Lattnereec96952009-09-27 07:56:52 +0000314void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000315 // Add the characters from FixedStr to the regex, escaping as needed. This
316 // avoids "leaning toothpicks" in common patterns.
317 for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
318 switch (FixedStr[i]) {
319 // These are the special characters matched in "p_ere_exp".
320 case '(':
321 case ')':
322 case '^':
323 case '$':
324 case '|':
325 case '*':
326 case '+':
327 case '?':
328 case '.':
329 case '[':
330 case '\\':
331 case '{':
Chris Lattnereec96952009-09-27 07:56:52 +0000332 TheStr += '\\';
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000333 // FALL THROUGH.
334 default:
Chris Lattnereec96952009-09-27 07:56:52 +0000335 TheStr += FixedStr[i];
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000336 break;
337 }
338 }
339}
340
Eli Bendersky9756ca72012-12-01 21:54:48 +0000341bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
Chris Lattnereec96952009-09-27 07:56:52 +0000342 SourceMgr &SM) {
Eli Bendersky9756ca72012-12-01 21:54:48 +0000343 Regex R(RS);
Chris Lattnereec96952009-09-27 07:56:52 +0000344 std::string Error;
345 if (!R.isValid(Error)) {
Eli Bendersky9756ca72012-12-01 21:54:48 +0000346 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000347 "invalid regex: " + Error);
Chris Lattnereec96952009-09-27 07:56:52 +0000348 return true;
349 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000350
Eli Bendersky9756ca72012-12-01 21:54:48 +0000351 RegExStr += RS.str();
Chris Lattnereec96952009-09-27 07:56:52 +0000352 CurParen += R.getNumMatches();
353 return false;
354}
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000355
Eli Bendersky9756ca72012-12-01 21:54:48 +0000356void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
357 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
358 std::string Backref = std::string("\\") +
359 std::string(1, '0' + BackrefNum);
360 RegExStr += Backref;
361}
362
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000363bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
364 // The only supported expression is @LINE([\+-]\d+)?
365 if (!Expr.startswith("@LINE"))
366 return false;
367 Expr = Expr.substr(StringRef("@LINE").size());
368 int Offset = 0;
369 if (!Expr.empty()) {
370 if (Expr[0] == '+')
371 Expr = Expr.substr(1);
372 else if (Expr[0] != '-')
373 return false;
374 if (Expr.getAsInteger(10, Offset))
375 return false;
376 }
377 Value = llvm::itostr(LineNumber + Offset);
378 return true;
379}
380
Chris Lattner52870082009-09-24 21:47:32 +0000381/// Match - Match the pattern string against the input buffer Buffer. This
382/// returns the position that is matched or npos if there is no match. If
383/// there is a match, the size of the matched string is returned in MatchLen.
Chris Lattnereec96952009-09-27 07:56:52 +0000384size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
385 StringMap<StringRef> &VariableTable) const {
Jakob Stoklund Olesen824c10e2010-10-15 17:47:12 +0000386 // If this is the EOF pattern, match it immediately.
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000387 if (CheckTy == Check::CheckEOF) {
Jakob Stoklund Olesen824c10e2010-10-15 17:47:12 +0000388 MatchLen = 0;
389 return Buffer.size();
390 }
391
Chris Lattner2702e6a2009-09-25 17:09:12 +0000392 // If this is a fixed string pattern, just match it now.
393 if (!FixedStr.empty()) {
394 MatchLen = FixedStr.size();
395 return Buffer.find(FixedStr);
396 }
Chris Lattnereec96952009-09-27 07:56:52 +0000397
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000398 // Regex match.
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000399
Chris Lattnereec96952009-09-27 07:56:52 +0000400 // If there are variable uses, we need to create a temporary string with the
401 // actual value.
402 StringRef RegExToMatch = RegExStr;
403 std::string TmpStr;
404 if (!VariableUses.empty()) {
405 TmpStr = RegExStr;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000406
Chris Lattnereec96952009-09-27 07:56:52 +0000407 unsigned InsertOffset = 0;
408 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Chris Lattnereec96952009-09-27 07:56:52 +0000409 std::string Value;
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000410
411 if (VariableUses[i].first[0] == '@') {
412 if (!EvaluateExpression(VariableUses[i].first, Value))
413 return StringRef::npos;
414 } else {
415 StringMap<StringRef>::iterator it =
416 VariableTable.find(VariableUses[i].first);
417 // If the variable is undefined, return an error.
418 if (it == VariableTable.end())
419 return StringRef::npos;
420
421 // Look up the value and escape it so that we can plop it into the regex.
422 AddFixedStringToRegEx(it->second, Value);
423 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000424
Chris Lattnereec96952009-09-27 07:56:52 +0000425 // Plop it into the regex at the adjusted offset.
426 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
427 Value.begin(), Value.end());
428 InsertOffset += Value.size();
429 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000430
Chris Lattnereec96952009-09-27 07:56:52 +0000431 // Match the newly constructed regex.
432 RegExToMatch = TmpStr;
433 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000434
435
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000436 SmallVector<StringRef, 4> MatchInfo;
Chris Lattnereec96952009-09-27 07:56:52 +0000437 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000438 return StringRef::npos;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000439
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000440 // Successful regex match.
441 assert(!MatchInfo.empty() && "Didn't get any match");
442 StringRef FullMatch = MatchInfo[0];
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000443
Chris Lattnereec96952009-09-27 07:56:52 +0000444 // If this defines any variables, remember their values.
Eli Bendersky9756ca72012-12-01 21:54:48 +0000445 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
446 E = VariableDefs.end();
447 I != E; ++I) {
448 assert(I->second < MatchInfo.size() && "Internal paren error");
449 VariableTable[I->first] = MatchInfo[I->second];
Chris Lattner94638f02009-09-25 17:29:36 +0000450 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000451
Chris Lattner5d6a05f2009-09-25 17:23:43 +0000452 MatchLen = FullMatch.size();
453 return FullMatch.data()-Buffer.data();
Chris Lattner52870082009-09-24 21:47:32 +0000454}
455
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000456unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
457 const StringMap<StringRef> &VariableTable) const {
458 // Just compute the number of matching characters. For regular expressions, we
459 // just compare against the regex itself and hope for the best.
460 //
461 // FIXME: One easy improvement here is have the regex lib generate a single
462 // example regular expression which matches, and use that as the example
463 // string.
464 StringRef ExampleString(FixedStr);
465 if (ExampleString.empty())
466 ExampleString = RegExStr;
467
Daniel Dunbar0806f9f2010-01-30 00:24:06 +0000468 // Only compare up to the first line in the buffer, or the string size.
469 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
470 BufferPrefix = BufferPrefix.split('\n').first;
471 return BufferPrefix.edit_distance(ExampleString);
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000472}
473
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000474void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
475 const StringMap<StringRef> &VariableTable) const{
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000476 // If this was a regular expression using variables, print the current
477 // variable values.
478 if (!VariableUses.empty()) {
479 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000480 SmallString<256> Msg;
481 raw_svector_ostream OS(Msg);
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000482 StringRef Var = VariableUses[i].first;
483 if (Var[0] == '@') {
484 std::string Value;
485 if (EvaluateExpression(Var, Value)) {
486 OS << "with expression \"";
487 OS.write_escaped(Var) << "\" equal to \"";
488 OS.write_escaped(Value) << "\"";
489 } else {
490 OS << "uses incorrect expression \"";
491 OS.write_escaped(Var) << "\"";
492 }
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000493 } else {
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000494 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
495
496 // Check for undefined variable references.
497 if (it == VariableTable.end()) {
498 OS << "uses undefined variable \"";
499 OS.write_escaped(Var) << "\"";
500 } else {
501 OS << "with variable \"";
502 OS.write_escaped(Var) << "\" equal to \"";
503 OS.write_escaped(it->second) << "\"";
504 }
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000505 }
506
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000507 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
508 OS.str());
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000509 }
510 }
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000511
512 // Attempt to find the closest/best fuzzy match. Usually an error happens
513 // because some string in the output didn't exactly match. In these cases, we
514 // would like to show the user a best guess at what "should have" matched, to
515 // save them having to actually check the input manually.
516 size_t NumLinesForward = 0;
517 size_t Best = StringRef::npos;
518 double BestQuality = 0;
519
520 // Use an arbitrary 4k limit on how far we will search.
Dan Gohmane3a1e502010-01-29 21:57:46 +0000521 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000522 if (Buffer[i] == '\n')
523 ++NumLinesForward;
524
Dan Gohmand8a55412010-01-29 21:55:16 +0000525 // Patterns have leading whitespace stripped, so skip whitespace when
526 // looking for something which looks like a pattern.
527 if (Buffer[i] == ' ' || Buffer[i] == '\t')
528 continue;
529
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000530 // Compute the "quality" of this match as an arbitrary combination of the
531 // match distance and the number of lines skipped to get to this match.
532 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
533 double Quality = Distance + (NumLinesForward / 100.);
534
535 if (Quality < BestQuality || Best == StringRef::npos) {
536 Best = i;
537 BestQuality = Quality;
538 }
539 }
540
Daniel Dunbar7a68e0d2010-03-19 18:07:43 +0000541 // Print the "possible intended match here" line if we found something
542 // reasonable and not equal to what we showed in the "scanning from here"
543 // line.
544 if (Best && Best != StringRef::npos && BestQuality < 50) {
545 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000546 SourceMgr::DK_Note, "possible intended match here");
Daniel Dunbaread2dac2009-11-22 22:59:26 +0000547
548 // FIXME: If we wanted to be really friendly we would show why the match
549 // failed, as it can be hard to spot simple one character differences.
550 }
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000551}
Chris Lattnera29703e2009-09-24 20:39:13 +0000552
Eli Bendersky4db65112012-12-02 16:02:41 +0000553size_t Pattern::FindRegexVarEnd(StringRef Str) {
554 // Offset keeps track of the current offset within the input Str
555 size_t Offset = 0;
556 // [...] Nesting depth
557 size_t BracketDepth = 0;
558
559 while (!Str.empty()) {
560 if (Str.startswith("]]") && BracketDepth == 0)
561 return Offset;
562 if (Str[0] == '\\') {
563 // Backslash escapes the next char within regexes, so skip them both.
564 Str = Str.substr(2);
565 Offset += 2;
566 } else {
567 switch (Str[0]) {
568 default:
569 break;
570 case '[':
571 BracketDepth++;
572 break;
573 case ']':
574 assert(BracketDepth > 0 && "Invalid regex");
575 BracketDepth--;
576 break;
577 }
578 Str = Str.substr(1);
579 Offset++;
580 }
581 }
582
583 return StringRef::npos;
584}
585
586
Chris Lattnera29703e2009-09-24 20:39:13 +0000587//===----------------------------------------------------------------------===//
588// Check Strings.
589//===----------------------------------------------------------------------===//
Chris Lattner9fc66782009-09-24 20:25:55 +0000590
591/// CheckString - This is a check that we found in the input file.
592struct CheckString {
593 /// Pat - The pattern to match.
594 Pattern Pat;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000595
Chris Lattner207e1bc2009-08-15 17:41:04 +0000596 /// Loc - The location in the match file that the check string was specified.
597 SMLoc Loc;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000598
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000599 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
600 /// as opposed to a CHECK: directive.
601 Check::CheckType CheckTy;
Stephen Lin178504b2013-07-12 14:51:05 +0000602
Michael Liao95ab3262013-05-14 20:34:12 +0000603 /// DagNotStrings - These are all of the strings that are disallowed from
Chris Lattnerf15380b2009-09-20 22:35:26 +0000604 /// occurring between this match string and the previous one (or start of
605 /// file).
Michael Liao95ab3262013-05-14 20:34:12 +0000606 std::vector<Pattern> DagNotStrings;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000607
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000608 CheckString(const Pattern &P, SMLoc L, Check::CheckType Ty)
609 : Pat(P), Loc(L), CheckTy(Ty) {}
Michael Liao7efbbd62013-05-14 20:29:52 +0000610
Michael Liao95ab3262013-05-14 20:34:12 +0000611 /// Check - Match check string and its "not strings" and/or "dag strings".
Stephen Lin178504b2013-07-12 14:51:05 +0000612 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabel,
613 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
Michael Liao7efbbd62013-05-14 20:29:52 +0000614
615 /// CheckNext - Verify there is a single line in the given buffer.
616 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
617
618 /// CheckNot - Verify there's no "not strings" in the given buffer.
619 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao95ab3262013-05-14 20:34:12 +0000620 const std::vector<const Pattern *> &NotStrings,
Michael Liao7efbbd62013-05-14 20:29:52 +0000621 StringMap<StringRef> &VariableTable) const;
Michael Liao95ab3262013-05-14 20:34:12 +0000622
623 /// CheckDag - Match "dag strings" and their mixed "not strings".
624 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
625 std::vector<const Pattern *> &NotStrings,
626 StringMap<StringRef> &VariableTable) const;
Chris Lattner207e1bc2009-08-15 17:41:04 +0000627};
628
Guy Benyei4cc74fc2013-02-06 20:40:38 +0000629/// Canonicalize whitespaces in the input file. Line endings are replaced
630/// with UNIX-style '\n'.
631///
632/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
633/// characters to a single space.
634static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
635 bool PreserveHorizontal) {
Chris Lattner4c842dd2010-04-05 22:42:30 +0000636 SmallString<128> NewFile;
Chris Lattneradea46e2009-09-24 20:45:07 +0000637 NewFile.reserve(MB->getBufferSize());
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000638
Chris Lattneradea46e2009-09-24 20:45:07 +0000639 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
640 Ptr != End; ++Ptr) {
NAKAMURA Takumi9f6e03f2010-11-14 03:28:22 +0000641 // Eliminate trailing dosish \r.
642 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
643 continue;
644 }
645
Michael Liaoc16f8c52013-04-25 18:54:02 +0000646 // If current char is not a horizontal whitespace or if horizontal
Guy Benyei4cc74fc2013-02-06 20:40:38 +0000647 // whitespace canonicalization is disabled, dump it to output as is.
648 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
Chris Lattneradea46e2009-09-24 20:45:07 +0000649 NewFile.push_back(*Ptr);
650 continue;
651 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000652
Chris Lattneradea46e2009-09-24 20:45:07 +0000653 // Otherwise, add one space and advance over neighboring space.
654 NewFile.push_back(' ');
655 while (Ptr+1 != End &&
656 (Ptr[1] == ' ' || Ptr[1] == '\t'))
657 ++Ptr;
658 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000659
Chris Lattneradea46e2009-09-24 20:45:07 +0000660 // Free the old buffer and return a new one.
661 MemoryBuffer *MB2 =
Chris Lattner4c842dd2010-04-05 22:42:30 +0000662 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000663
Chris Lattneradea46e2009-09-24 20:45:07 +0000664 delete MB;
665 return MB2;
666}
667
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000668static bool IsPartOfWord(char c) {
669 return (isalnum(c) || c == '-' || c == '_');
670}
671
672static Check::CheckType FindCheckType(StringRef &Buffer, StringRef Prefix) {
673 char NextChar = Buffer[CheckPrefix.size()];
674
675 // Verify that the : is present after the prefix.
676 if (NextChar == ':') {
677 Buffer = Buffer.substr(CheckPrefix.size() + 1);
678 return Check::CheckPlain;
679 }
680
681 if (NextChar != '-') {
682 Buffer = Buffer.drop_front(1);
683 return Check::CheckNone;
684 }
685
686 StringRef Rest = Buffer.drop_front(CheckPrefix.size() + 1);
687 if (Rest.startswith("NEXT:")) {
688 Buffer = Rest.drop_front(sizeof("NEXT:") - 1);
689 return Check::CheckNext;
690 }
691
692 if (Rest.startswith("NOT:")) {
693 Buffer = Rest.drop_front(sizeof("NOT:") - 1);
694 return Check::CheckNot;
695 }
696
697 if (Rest.startswith("DAG:")) {
698 Buffer = Rest.drop_front(sizeof("DAG:") - 1);
699 return Check::CheckDAG;
700 }
701
702 if (Rest.startswith("LABEL:")) {
703 Buffer = Rest.drop_front(sizeof("LABEL:") - 1);
704 return Check::CheckLabel;
705 }
706
707 Buffer = Buffer.drop_front(1);
708 return Check::CheckNone;
709}
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000710
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000711/// ReadCheckFile - Read the check file, which specifies the sequence of
712/// expected strings. The strings are added to the CheckStrings vector.
Eli Bendersky1e5cbcb2012-11-30 14:22:14 +0000713/// Returns true in case of an error, false otherwise.
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000714static bool ReadCheckFile(SourceMgr &SM,
Chris Lattner207e1bc2009-08-15 17:41:04 +0000715 std::vector<CheckString> &CheckStrings) {
Michael J. Spencer3ff95632010-12-16 03:29:14 +0000716 OwningPtr<MemoryBuffer> File;
717 if (error_code ec =
Rafael Espindoladd5af272013-06-25 05:28:34 +0000718 MemoryBuffer::getFileOrSTDIN(CheckFilename, File)) {
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000719 errs() << "Could not open check file '" << CheckFilename << "': "
Michael J. Spencer333fb042010-12-09 17:36:48 +0000720 << ec.message() << '\n';
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000721 return true;
722 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000723
Chris Lattneradea46e2009-09-24 20:45:07 +0000724 // If we want to canonicalize whitespace, strip excess whitespace from the
Guy Benyei4cc74fc2013-02-06 20:40:38 +0000725 // buffer containing the CHECK lines. Remove DOS style line endings.
Benjamin Kramer7cdba152013-03-23 13:56:23 +0000726 MemoryBuffer *F =
727 CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000728
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000729 SM.AddNewSourceBuffer(F, SMLoc());
730
Chris Lattnerd7e25052009-08-15 18:00:42 +0000731 // Find all instances of CheckPrefix followed by : in the file.
Chris Lattner96077032009-09-20 22:11:44 +0000732 StringRef Buffer = F->getBuffer();
Michael Liao95ab3262013-05-14 20:34:12 +0000733 std::vector<Pattern> DagNotMatches;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000734
Eli Bendersky1e5cbcb2012-11-30 14:22:14 +0000735 // LineNumber keeps track of the line on which CheckPrefix instances are
736 // found.
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000737 unsigned LineNumber = 1;
738
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000739 while (1) {
740 // See if Prefix occurs in the memory buffer.
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000741 size_t PrefixLoc = Buffer.find(CheckPrefix);
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000742 // If we didn't find a match, we're done.
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000743 if (PrefixLoc == StringRef::npos)
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000744 break;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000745
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000746 LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
747
Rui Ueyamad9a84ef2013-08-12 23:05:59 +0000748 // Keep the charcter before our prefix so we can validate that we have
749 // found our prefix, and account for cases when PrefixLoc is 0.
750 Buffer = Buffer.substr(std::min(PrefixLoc-1, PrefixLoc));
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000751
Rui Ueyamad9a84ef2013-08-12 23:05:59 +0000752 const char *CheckPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000753
Rui Ueyamad9a84ef2013-08-12 23:05:59 +0000754 // Make sure we have actually found our prefix, and not a word containing
755 // our prefix.
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000756 if (PrefixLoc != 0 && IsPartOfWord(Buffer[0])) {
Rui Ueyamad9a84ef2013-08-12 23:05:59 +0000757 Buffer = Buffer.substr(CheckPrefix.size());
758 continue;
759 }
760
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000761 // When we find a check prefix, keep track of what kind of type of CHECK we
762 // have.
763 Check::CheckType CheckTy = FindCheckType(Buffer, CheckPrefix);
764 if (CheckTy == Check::CheckNone)
Chris Lattnerd7e25052009-08-15 18:00:42 +0000765 continue;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000766
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000767 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
768 // leading and trailing whitespace.
Chris Lattnerf15380b2009-09-20 22:35:26 +0000769 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000770
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000771 // Scan ahead to the end of line.
Chris Lattner96077032009-09-20 22:11:44 +0000772 size_t EOL = Buffer.find_first_of("\n\r");
Chris Lattnera29703e2009-09-24 20:39:13 +0000773
Dan Gohmane5463432010-01-29 21:53:18 +0000774 // Remember the location of the start of the pattern, for diagnostics.
775 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
776
Chris Lattnera29703e2009-09-24 20:39:13 +0000777 // Parse the pattern.
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000778 Pattern P(CheckTy);
Alexander Kornienko70a870a2012-11-14 21:07:37 +0000779 if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000780 return true;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000781
Stephen Lin178504b2013-07-12 14:51:05 +0000782 // Verify that CHECK-LABEL lines do not define or use variables
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000783 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
Stephen Lin178504b2013-07-12 14:51:05 +0000784 SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
785 SourceMgr::DK_Error,
786 "found '"+CheckPrefix+"-LABEL:' with variable definition"
Stephen Lin6d3aa542013-08-16 17:29:01 +0000787 " or use");
Stephen Lin178504b2013-07-12 14:51:05 +0000788 return true;
789 }
790
Chris Lattnera29703e2009-09-24 20:39:13 +0000791 Buffer = Buffer.substr(EOL);
792
Chris Lattner5dafafd2009-08-15 18:32:21 +0000793 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000794 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
Chris Lattner5dafafd2009-08-15 18:32:21 +0000795 SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000796 SourceMgr::DK_Error,
Chris Lattner5dafafd2009-08-15 18:32:21 +0000797 "found '"+CheckPrefix+"-NEXT:' without previous '"+
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000798 CheckPrefix+ ": line");
Chris Lattner5dafafd2009-08-15 18:32:21 +0000799 return true;
800 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000801
Michael Liao95ab3262013-05-14 20:34:12 +0000802 // Handle CHECK-DAG/-NOT.
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000803 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
Michael Liao95ab3262013-05-14 20:34:12 +0000804 DagNotMatches.push_back(P);
Chris Lattnera29703e2009-09-24 20:39:13 +0000805 continue;
806 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000807
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000808 // Okay, add the string we captured to the output vector and move on.
Chris Lattner9fc66782009-09-24 20:25:55 +0000809 CheckStrings.push_back(CheckString(P,
Dan Gohmane5463432010-01-29 21:53:18 +0000810 PatternLoc,
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000811 CheckTy));
Michael Liao95ab3262013-05-14 20:34:12 +0000812 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000813 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000814
Michael Liao95ab3262013-05-14 20:34:12 +0000815 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs.
816 if (!DagNotMatches.empty()) {
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000817 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
Jakob Stoklund Olesen824c10e2010-10-15 17:47:12 +0000818 SMLoc::getFromPointer(Buffer.data()),
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000819 Check::CheckEOF));
Michael Liao95ab3262013-05-14 20:34:12 +0000820 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
Jakob Stoklund Olesen824c10e2010-10-15 17:47:12 +0000821 }
822
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000823 if (CheckStrings.empty()) {
Chris Lattnerd7e25052009-08-15 18:00:42 +0000824 errs() << "error: no check strings found with prefix '" << CheckPrefix
825 << ":'\n";
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000826 return true;
827 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000828
Chris Lattner81cb8ca2009-07-08 18:44:05 +0000829 return false;
830}
831
Michael Liao95ab3262013-05-14 20:34:12 +0000832static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
833 const Pattern &Pat, StringRef Buffer,
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000834 StringMap<StringRef> &VariableTable) {
Chris Lattner5dafafd2009-08-15 18:32:21 +0000835 // Otherwise, we have an error, emit an error message.
Michael Liao95ab3262013-05-14 20:34:12 +0000836 SM.PrintMessage(Loc, SourceMgr::DK_Error,
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000837 "expected string not found in input");
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000838
Chris Lattner5dafafd2009-08-15 18:32:21 +0000839 // Print the "scanning from here" line. If the current position is at the
840 // end of a line, advance to the start of the next line.
Chris Lattner96077032009-09-20 22:11:44 +0000841 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000842
Chris Lattner3f2d5f62011-10-16 05:43:57 +0000843 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
844 "scanning from here");
Daniel Dunbarfafe93c2009-11-22 22:08:06 +0000845
846 // Allow the pattern to print additional information if desired.
Michael Liao95ab3262013-05-14 20:34:12 +0000847 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
848}
849
850static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
851 StringRef Buffer,
852 StringMap<StringRef> &VariableTable) {
853 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
Chris Lattner5dafafd2009-08-15 18:32:21 +0000854}
855
Chris Lattner3711b7a2009-09-20 22:42:44 +0000856/// CountNumNewlinesBetween - Count the number of newlines in the specified
857/// range.
858static unsigned CountNumNewlinesBetween(StringRef Range) {
Chris Lattner5dafafd2009-08-15 18:32:21 +0000859 unsigned NumNewLines = 0;
Chris Lattner3711b7a2009-09-20 22:42:44 +0000860 while (1) {
Chris Lattner5dafafd2009-08-15 18:32:21 +0000861 // Scan for newline.
Chris Lattner3711b7a2009-09-20 22:42:44 +0000862 Range = Range.substr(Range.find_first_of("\n\r"));
863 if (Range.empty()) return NumNewLines;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000864
Chris Lattner5dafafd2009-08-15 18:32:21 +0000865 ++NumNewLines;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +0000866
Chris Lattner5dafafd2009-08-15 18:32:21 +0000867 // Handle \n\r and \r\n as a single newline.
Chris Lattner3711b7a2009-09-20 22:42:44 +0000868 if (Range.size() > 1 &&
869 (Range[1] == '\n' || Range[1] == '\r') &&
870 (Range[0] != Range[1]))
871 Range = Range.substr(1);
872 Range = Range.substr(1);
Chris Lattner5dafafd2009-08-15 18:32:21 +0000873 }
Chris Lattner5dafafd2009-08-15 18:32:21 +0000874}
875
Michael Liao7efbbd62013-05-14 20:29:52 +0000876size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
Stephen Lin178504b2013-07-12 14:51:05 +0000877 bool IsLabel, size_t &MatchLen,
Michael Liao7efbbd62013-05-14 20:29:52 +0000878 StringMap<StringRef> &VariableTable) const {
Michael Liao95ab3262013-05-14 20:34:12 +0000879 size_t LastPos = 0;
880 std::vector<const Pattern *> NotStrings;
881
Stephen Lin178504b2013-07-12 14:51:05 +0000882 if (!IsLabel) {
883 // Match "dag strings" (with mixed "not strings" if any).
884 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
885 if (LastPos == StringRef::npos)
886 return StringRef::npos;
887 }
Michael Liao95ab3262013-05-14 20:34:12 +0000888
889 // Match itself from the last position after matching CHECK-DAG.
890 StringRef MatchBuffer = Buffer.substr(LastPos);
891 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
Michael Liao7efbbd62013-05-14 20:29:52 +0000892 if (MatchPos == StringRef::npos) {
Michael Liao95ab3262013-05-14 20:34:12 +0000893 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
Michael Liao7efbbd62013-05-14 20:29:52 +0000894 return StringRef::npos;
895 }
Michael Liao95ab3262013-05-14 20:34:12 +0000896 MatchPos += LastPos;
Michael Liao7efbbd62013-05-14 20:29:52 +0000897
Stephen Lin178504b2013-07-12 14:51:05 +0000898 if (!IsLabel) {
899 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Michael Liao7efbbd62013-05-14 20:29:52 +0000900
Stephen Lin178504b2013-07-12 14:51:05 +0000901 // If this check is a "CHECK-NEXT", verify that the previous match was on
902 // the previous line (i.e. that there is one newline between them).
903 if (CheckNext(SM, SkippedRegion))
904 return StringRef::npos;
Michael Liao7efbbd62013-05-14 20:29:52 +0000905
Stephen Lin178504b2013-07-12 14:51:05 +0000906 // If this match had "not strings", verify that they don't exist in the
907 // skipped region.
908 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
909 return StringRef::npos;
910 }
Michael Liao7efbbd62013-05-14 20:29:52 +0000911
912 return MatchPos;
913}
914
915bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000916 if (CheckTy != Check::CheckNext)
Michael Liao7efbbd62013-05-14 20:29:52 +0000917 return false;
918
919 // Count the number of newlines between the previous match and this one.
920 assert(Buffer.data() !=
921 SM.getMemoryBuffer(
922 SM.FindBufferContainingLoc(
923 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
924 "CHECK-NEXT can't be the first check in a file");
925
926 unsigned NumNewLines = CountNumNewlinesBetween(Buffer);
927
928 if (NumNewLines == 0) {
929 SM.PrintMessage(Loc, SourceMgr::DK_Error, CheckPrefix+
930 "-NEXT: is on the same line as previous match");
931 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
932 SourceMgr::DK_Note, "'next' match was here");
933 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
934 "previous match ended here");
935 return true;
936 }
937
938 if (NumNewLines != 1) {
939 SM.PrintMessage(Loc, SourceMgr::DK_Error, CheckPrefix+
940 "-NEXT: is not on the line after the previous match");
941 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
942 SourceMgr::DK_Note, "'next' match was here");
943 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
944 "previous match ended here");
945 return true;
946 }
947
948 return false;
949}
950
951bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
Michael Liao95ab3262013-05-14 20:34:12 +0000952 const std::vector<const Pattern *> &NotStrings,
Michael Liao7efbbd62013-05-14 20:29:52 +0000953 StringMap<StringRef> &VariableTable) const {
954 for (unsigned ChunkNo = 0, e = NotStrings.size();
955 ChunkNo != e; ++ChunkNo) {
Michael Liao95ab3262013-05-14 20:34:12 +0000956 const Pattern *Pat = NotStrings[ChunkNo];
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000957 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
Michael Liao95ab3262013-05-14 20:34:12 +0000958
Michael Liao7efbbd62013-05-14 20:29:52 +0000959 size_t MatchLen = 0;
Michael Liao95ab3262013-05-14 20:34:12 +0000960 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
Michael Liao7efbbd62013-05-14 20:29:52 +0000961
962 if (Pos == StringRef::npos) continue;
963
964 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
965 SourceMgr::DK_Error,
966 CheckPrefix+"-NOT: string occurred!");
Michael Liao95ab3262013-05-14 20:34:12 +0000967 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
Michael Liao7efbbd62013-05-14 20:29:52 +0000968 CheckPrefix+"-NOT: pattern specified here");
969 return true;
970 }
971
972 return false;
973}
974
Michael Liao95ab3262013-05-14 20:34:12 +0000975size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
976 std::vector<const Pattern *> &NotStrings,
977 StringMap<StringRef> &VariableTable) const {
978 if (DagNotStrings.empty())
979 return 0;
980
981 size_t LastPos = 0;
982 size_t StartPos = LastPos;
983
984 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
985 ChunkNo != e; ++ChunkNo) {
986 const Pattern &Pat = DagNotStrings[ChunkNo];
987
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000988 assert((Pat.getCheckTy() == Check::CheckDAG ||
989 Pat.getCheckTy() == Check::CheckNot) &&
Michael Liao95ab3262013-05-14 20:34:12 +0000990 "Invalid CHECK-DAG or CHECK-NOT!");
991
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000992 if (Pat.getCheckTy() == Check::CheckNot) {
Michael Liao95ab3262013-05-14 20:34:12 +0000993 NotStrings.push_back(&Pat);
994 continue;
995 }
996
Matt Arsenault4f67afc2013-09-17 22:30:02 +0000997 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
Michael Liao95ab3262013-05-14 20:34:12 +0000998
999 size_t MatchLen = 0, MatchPos;
1000
1001 // CHECK-DAG always matches from the start.
1002 StringRef MatchBuffer = Buffer.substr(StartPos);
1003 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1004 // With a group of CHECK-DAGs, a single mismatching means the match on
1005 // that group of CHECK-DAGs fails immediately.
1006 if (MatchPos == StringRef::npos) {
1007 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1008 return StringRef::npos;
1009 }
1010 // Re-calc it as the offset relative to the start of the original string.
1011 MatchPos += StartPos;
1012
1013 if (!NotStrings.empty()) {
1014 if (MatchPos < LastPos) {
1015 // Reordered?
1016 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1017 SourceMgr::DK_Error,
1018 CheckPrefix+"-DAG: found a match of CHECK-DAG"
1019 " reordering across a CHECK-NOT");
1020 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1021 SourceMgr::DK_Note,
1022 CheckPrefix+"-DAG: the farthest match of CHECK-DAG"
1023 " is found here");
1024 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1025 CheckPrefix+"-NOT: the crossed pattern specified"
1026 " here");
1027 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1028 CheckPrefix+"-DAG: the reordered pattern specified"
1029 " here");
1030 return StringRef::npos;
1031 }
1032 // All subsequent CHECK-DAGs should be matched from the farthest
1033 // position of all precedent CHECK-DAGs (including this one.)
1034 StartPos = LastPos;
1035 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1036 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1037 // region.
1038 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
Tim Northovere57343b2013-08-02 11:32:50 +00001039 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
Michael Liao95ab3262013-05-14 20:34:12 +00001040 return StringRef::npos;
1041 // Clear "not strings".
1042 NotStrings.clear();
1043 }
1044
1045 // Update the last position with CHECK-DAG matches.
1046 LastPos = std::max(MatchPos + MatchLen, LastPos);
1047 }
1048
1049 return LastPos;
1050}
1051
Rui Ueyamad9a84ef2013-08-12 23:05:59 +00001052bool ValidateCheckPrefix() {
1053 // The check prefix must contain only alphanumeric, hyphens and underscores.
1054 Regex prefixValidator("^[a-zA-Z0-9_-]*$");
1055 return prefixValidator.match(CheckPrefix);
1056}
1057
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001058int main(int argc, char **argv) {
1059 sys::PrintStackTraceOnErrorSignal();
1060 PrettyStackTraceProgram X(argc, argv);
1061 cl::ParseCommandLineOptions(argc, argv);
1062
Rui Ueyamad9a84ef2013-08-12 23:05:59 +00001063 if (!ValidateCheckPrefix()) {
1064 errs() << "Supplied check-prefix is invalid! Prefixes must start with a "
1065 "letter and contain only alphanumeric characters, hyphens and "
1066 "underscores\n";
1067 return 2;
1068 }
1069
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001070 SourceMgr SM;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001071
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001072 // Read the expected strings from the check file.
Chris Lattner207e1bc2009-08-15 17:41:04 +00001073 std::vector<CheckString> CheckStrings;
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001074 if (ReadCheckFile(SM, CheckStrings))
1075 return 2;
1076
1077 // Open the file to check and add it to SourceMgr.
Michael J. Spencer3ff95632010-12-16 03:29:14 +00001078 OwningPtr<MemoryBuffer> File;
1079 if (error_code ec =
Rafael Espindoladd5af272013-06-25 05:28:34 +00001080 MemoryBuffer::getFileOrSTDIN(InputFilename, File)) {
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001081 errs() << "Could not open input file '" << InputFilename << "': "
Michael J. Spencer333fb042010-12-09 17:36:48 +00001082 << ec.message() << '\n';
Eli Bendersky7f8e76f2012-11-30 13:51:33 +00001083 return 2;
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001084 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001085
Benjamin Kramer7cdba152013-03-23 13:56:23 +00001086 if (File->getBufferSize() == 0) {
Chris Lattner1aac1862011-02-09 16:46:02 +00001087 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
Eli Bendersky7f8e76f2012-11-30 13:51:33 +00001088 return 2;
Chris Lattner1aac1862011-02-09 16:46:02 +00001089 }
Benjamin Kramer7cdba152013-03-23 13:56:23 +00001090
Chris Lattner88a7e9e2009-07-11 18:58:15 +00001091 // Remove duplicate spaces in the input file if requested.
Guy Benyei4cc74fc2013-02-06 20:40:38 +00001092 // Remove DOS style line endings.
Benjamin Kramer7cdba152013-03-23 13:56:23 +00001093 MemoryBuffer *F =
1094 CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001095
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001096 SM.AddNewSourceBuffer(F, SMLoc());
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001097
Chris Lattnereec96952009-09-27 07:56:52 +00001098 /// VariableTable - This holds all the current filecheck variables.
1099 StringMap<StringRef> VariableTable;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001100
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001101 // Check that we have all of the expected strings, in order, in the input
1102 // file.
Chris Lattner96077032009-09-20 22:11:44 +00001103 StringRef Buffer = F->getBuffer();
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001104
Stephen Lin178504b2013-07-12 14:51:05 +00001105 bool hasError = false;
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001106
Stephen Lin178504b2013-07-12 14:51:05 +00001107 unsigned i = 0, j = 0, e = CheckStrings.size();
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001108
Stephen Lin178504b2013-07-12 14:51:05 +00001109 while (true) {
1110 StringRef CheckRegion;
1111 if (j == e) {
1112 CheckRegion = Buffer;
1113 } else {
1114 const CheckString &CheckLabelStr = CheckStrings[j];
Matt Arsenault4f67afc2013-09-17 22:30:02 +00001115 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
Stephen Lin178504b2013-07-12 14:51:05 +00001116 ++j;
1117 continue;
1118 }
Chris Lattner3711b7a2009-09-20 22:42:44 +00001119
Stephen Lin178504b2013-07-12 14:51:05 +00001120 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1121 size_t MatchLabelLen = 0;
1122 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1123 MatchLabelLen, VariableTable);
1124 if (MatchLabelPos == StringRef::npos) {
1125 hasError = true;
1126 break;
1127 }
1128
1129 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1130 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1131 ++j;
1132 }
1133
1134 for ( ; i != j; ++i) {
1135 const CheckString &CheckStr = CheckStrings[i];
1136
1137 // Check each string within the scanned region, including a second check
1138 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1139 size_t MatchLen = 0;
1140 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1141 VariableTable);
1142
1143 if (MatchPos == StringRef::npos) {
1144 hasError = true;
1145 i = j;
1146 break;
1147 }
1148
1149 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1150 }
1151
1152 if (j == e)
1153 break;
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001154 }
Mikhail Glushenkov7112c862010-08-20 17:38:38 +00001155
Stephen Lin178504b2013-07-12 14:51:05 +00001156 return hasError ? 1 : 0;
Chris Lattner81cb8ca2009-07-08 18:44:05 +00001157}