blob: 24adba28a2949db0b786dfcda2631aad3ee91856 [file] [log] [blame]
Ilya Biryukove7230ea2019-05-22 14:44:45 +00001//===- TokensTest.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/Syntax/Tokens.h"
10#include "clang/AST/ASTConsumer.h"
11#include "clang/AST/Expr.h"
12#include "clang/Basic/Diagnostic.h"
13#include "clang/Basic/DiagnosticIDs.h"
14#include "clang/Basic/DiagnosticOptions.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/FileSystemOptions.h"
17#include "clang/Basic/LLVM.h"
18#include "clang/Basic/LangOptions.h"
19#include "clang/Basic/SourceLocation.h"
20#include "clang/Basic/SourceManager.h"
21#include "clang/Basic/TokenKinds.def"
22#include "clang/Basic/TokenKinds.h"
23#include "clang/Frontend/CompilerInstance.h"
24#include "clang/Frontend/FrontendAction.h"
25#include "clang/Frontend/Utils.h"
26#include "clang/Lex/Lexer.h"
27#include "clang/Lex/PreprocessorOptions.h"
28#include "clang/Lex/Token.h"
29#include "clang/Tooling/Tooling.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/IntrusiveRefCntPtr.h"
32#include "llvm/ADT/None.h"
33#include "llvm/ADT/Optional.h"
34#include "llvm/ADT/STLExtras.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/Support/FormatVariadic.h"
37#include "llvm/Support/MemoryBuffer.h"
38#include "llvm/Support/VirtualFileSystem.h"
39#include "llvm/Support/raw_os_ostream.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Testing/Support/Annotations.h"
42#include "llvm/Testing/Support/SupportHelpers.h"
43#include <cassert>
44#include <cstdlib>
45#include <gmock/gmock.h>
46#include <gtest/gtest.h>
47#include <memory>
48#include <ostream>
49#include <string>
50
51using namespace clang;
52using namespace clang::syntax;
53
54using llvm::ValueIs;
55using ::testing::AllOf;
56using ::testing::Contains;
57using ::testing::ElementsAre;
Ilya Biryukov5aed3092019-06-18 16:27:27 +000058using ::testing::Field;
Ilya Biryukove7230ea2019-05-22 14:44:45 +000059using ::testing::Matcher;
60using ::testing::Not;
61using ::testing::StartsWith;
62
63namespace {
64// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
65// argument.
66MATCHER_P(SameRange, A, "") {
67 return A.begin() == arg.begin() && A.end() == arg.end();
68}
Ilya Biryukov5aed3092019-06-18 16:27:27 +000069
70Matcher<TokenBuffer::Expansion>
71IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
72 Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
73 return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
74 Field(&TokenBuffer::Expansion::Expanded, Expanded));
75}
Ilya Biryukove7230ea2019-05-22 14:44:45 +000076// Matchers for syntax::Token.
77MATCHER_P(Kind, K, "") { return arg.kind() == K; }
78MATCHER_P2(HasText, Text, SourceMgr, "") {
79 return arg.text(*SourceMgr) == Text;
80}
81/// Checks the start and end location of a token are equal to SourceRng.
82MATCHER_P(RangeIs, SourceRng, "") {
83 return arg.location() == SourceRng.first &&
84 arg.endLocation() == SourceRng.second;
85}
86
87class TokenCollectorTest : public ::testing::Test {
88public:
89 /// Run the clang frontend, collect the preprocessed tokens from the frontend
90 /// invocation and store them in this->Buffer.
91 /// This also clears SourceManager before running the compiler.
92 void recordTokens(llvm::StringRef Code) {
93 class RecordTokens : public ASTFrontendAction {
94 public:
95 explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
96
97 bool BeginSourceFileAction(CompilerInstance &CI) override {
98 assert(!Collector && "expected only a single call to BeginSourceFile");
99 Collector.emplace(CI.getPreprocessor());
100 return true;
101 }
102 void EndSourceFileAction() override {
103 assert(Collector && "BeginSourceFileAction was never called");
104 Result = std::move(*Collector).consume();
105 }
106
107 std::unique_ptr<ASTConsumer>
108 CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
109 return llvm::make_unique<ASTConsumer>();
110 }
111
112 private:
113 TokenBuffer &Result;
114 llvm::Optional<TokenCollector> Collector;
115 };
116
117 constexpr const char *FileName = "./input.cpp";
118 FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
119 // Prepare to run a compiler.
120 if (!Diags->getClient())
121 Diags->setClient(new IgnoringDiagConsumer);
122 std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
123 FileName};
124 auto CI = createInvocationFromCommandLine(Args, Diags, FS);
125 assert(CI);
126 CI->getFrontendOpts().DisableFree = false;
127 CI->getPreprocessorOpts().addRemappedFile(
128 FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
129 CompilerInstance Compiler;
130 Compiler.setInvocation(std::move(CI));
131 Compiler.setDiagnostics(Diags.get());
132 Compiler.setFileManager(FileMgr.get());
133 Compiler.setSourceManager(SourceMgr.get());
134
135 this->Buffer = TokenBuffer(*SourceMgr);
136 RecordTokens Recorder(this->Buffer);
137 ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
138 << "failed to run the frontend";
139 }
140
141 /// Record the tokens and return a test dump of the resulting buffer.
142 std::string collectAndDump(llvm::StringRef Code) {
143 recordTokens(Code);
144 return Buffer.dumpForTests();
145 }
146
147 // Adds a file to the test VFS.
148 void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
149 if (!FS->addFile(Path, time_t(),
150 llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
151 ADD_FAILURE() << "could not add a file to VFS: " << Path;
152 }
153 }
154
155 /// Add a new file, run syntax::tokenize() on it and return the results.
156 std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
157 // FIXME: pass proper LangOptions.
158 return syntax::tokenize(
159 SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
160 *SourceMgr, LangOptions());
161 }
162
163 // Specialized versions of matchers that hide the SourceManager from clients.
164 Matcher<syntax::Token> HasText(std::string Text) const {
165 return ::HasText(Text, SourceMgr.get());
166 }
167 Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
168 std::pair<SourceLocation, SourceLocation> Ls;
169 Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
170 .getLocWithOffset(R.Begin);
171 Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
172 .getLocWithOffset(R.End);
173 return ::RangeIs(Ls);
174 }
175
176 /// Finds a subrange in O(n * m).
177 template <class T, class U, class Eq>
178 llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
179 llvm::ArrayRef<T> Range, Eq F) {
180 for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
181 auto It = Begin;
182 for (auto ItSub = Subrange.begin();
183 ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
184 if (!F(*ItSub, *It))
185 goto continue_outer;
186 }
187 return llvm::makeArrayRef(Begin, It);
188 continue_outer:;
189 }
190 return llvm::makeArrayRef(Range.end(), Range.end());
191 }
192
193 /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
194 /// The match should be unique. \p Query is a whitespace-separated list of
195 /// tokens to search for.
196 llvm::ArrayRef<syntax::Token>
197 findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
198 llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
199 Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
200 if (QueryTokens.empty()) {
201 ADD_FAILURE() << "will not look for an empty list of tokens";
202 std::abort();
203 }
204 // An equality test for search.
205 auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
206 return Q == T.text(*SourceMgr);
207 };
208 // Find a match.
209 auto Found =
210 findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
211 if (Found.begin() == Tokens.end()) {
212 ADD_FAILURE() << "could not find the subrange for " << Query;
213 std::abort();
214 }
215 // Check that the match is unique.
216 if (findSubrange(llvm::makeArrayRef(QueryTokens),
217 llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
218 .begin() != Tokens.end()) {
219 ADD_FAILURE() << "match is not unique for " << Query;
220 std::abort();
221 }
222 return Found;
223 };
224
225 // Specialized versions of findTokenRange for expanded and spelled tokens.
226 llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
227 return findTokenRange(Query, Buffer.expandedTokens());
228 }
229 llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
230 FileID File = FileID()) {
231 if (!File.isValid())
232 File = SourceMgr->getMainFileID();
233 return findTokenRange(Query, Buffer.spelledTokens(File));
234 }
235
236 // Data fields.
237 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
238 new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
239 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
240 new llvm::vfs::InMemoryFileSystem;
241 llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
242 new FileManager(FileSystemOptions(), FS);
243 llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
244 new SourceManager(*Diags, *FileMgr);
245 /// Contains last result of calling recordTokens().
246 TokenBuffer Buffer = TokenBuffer(*SourceMgr);
247};
248
249TEST_F(TokenCollectorTest, RawMode) {
250 EXPECT_THAT(tokenize("int main() {}"),
251 ElementsAre(Kind(tok::kw_int),
252 AllOf(HasText("main"), Kind(tok::identifier)),
253 Kind(tok::l_paren), Kind(tok::r_paren),
254 Kind(tok::l_brace), Kind(tok::r_brace)));
255 // Comments are ignored for now.
256 EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
257 ElementsAre(Kind(tok::kw_int),
258 AllOf(HasText("a"), Kind(tok::identifier)),
259 Kind(tok::semi)));
260}
261
262TEST_F(TokenCollectorTest, Basic) {
263 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
264 {"int main() {}",
265 R"(expanded tokens:
266 int main ( ) { }
267file './input.cpp'
268 spelled tokens:
269 int main ( ) { }
270 no mappings.
271)"},
272 // All kinds of whitespace are ignored.
273 {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
274 R"(expanded tokens:
275 int main ( ) { }
276file './input.cpp'
277 spelled tokens:
278 int main ( ) { }
279 no mappings.
280)"},
281 // Annotation tokens are ignored.
282 {R"cpp(
283 #pragma GCC visibility push (public)
284 #pragma GCC visibility pop
285 )cpp",
286 R"(expanded tokens:
287 <empty>
288file './input.cpp'
289 spelled tokens:
290 # pragma GCC visibility push ( public ) # pragma GCC visibility pop
291 mappings:
292 ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
Ilya Biryukov26c066d2019-06-19 13:56:36 +0000293)"},
294 // Empty files should not crash.
295 {R"cpp()cpp", R"(expanded tokens:
296 <empty>
297file './input.cpp'
298 spelled tokens:
299 <empty>
300 no mappings.
Ilya Biryukove7230ea2019-05-22 14:44:45 +0000301)"}};
302 for (auto &Test : TestCases)
303 EXPECT_EQ(collectAndDump(Test.first), Test.second)
304 << collectAndDump(Test.first);
305}
306
307TEST_F(TokenCollectorTest, Locations) {
308 // Check locations of the tokens.
309 llvm::Annotations Code(R"cpp(
310 $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
311 )cpp");
312 recordTokens(Code.code());
313 // Check expanded tokens.
314 EXPECT_THAT(
315 Buffer.expandedTokens(),
316 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
317 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
318 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
319 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
320 AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
321 Kind(tok::eof)));
322 // Check spelled tokens.
323 EXPECT_THAT(
324 Buffer.spelledTokens(SourceMgr->getMainFileID()),
325 ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
326 AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
327 AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
328 AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
329 AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
330}
331
332TEST_F(TokenCollectorTest, MacroDirectives) {
333 // Macro directives are not stored anywhere at the moment.
334 std::string Code = R"cpp(
335 #define FOO a
336 #include "unresolved_file.h"
337 #undef FOO
338 #ifdef X
339 #else
340 #endif
341 #ifndef Y
342 #endif
343 #if 1
344 #elif 2
345 #else
346 #endif
347 #pragma once
348 #pragma something lalala
349
350 int a;
351 )cpp";
352 std::string Expected =
353 "expanded tokens:\n"
354 " int a ;\n"
355 "file './input.cpp'\n"
356 " spelled tokens:\n"
357 " # define FOO a # include \"unresolved_file.h\" # undef FOO "
358 "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
359 "# endif # pragma once # pragma something lalala int a ;\n"
360 " mappings:\n"
361 " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
362 EXPECT_EQ(collectAndDump(Code), Expected);
363}
364
365TEST_F(TokenCollectorTest, MacroReplacements) {
366 std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
367 // A simple object-like macro.
368 {R"cpp(
369 #define INT int const
370 INT a;
371 )cpp",
372 R"(expanded tokens:
373 int const a ;
374file './input.cpp'
375 spelled tokens:
376 # define INT int const INT a ;
377 mappings:
378 ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
379 ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
380)"},
381 // A simple function-like macro.
382 {R"cpp(
383 #define INT(a) const int
384 INT(10+10) a;
385 )cpp",
386 R"(expanded tokens:
387 const int a ;
388file './input.cpp'
389 spelled tokens:
390 # define INT ( a ) const int INT ( 10 + 10 ) a ;
391 mappings:
392 ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
393 ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
394)"},
395 // Recursive macro replacements.
396 {R"cpp(
397 #define ID(X) X
398 #define INT int const
399 ID(ID(INT)) a;
400 )cpp",
401 R"(expanded tokens:
402 int const a ;
403file './input.cpp'
404 spelled tokens:
405 # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
406 mappings:
407 ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
408 ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
409)"},
410 // A little more complicated recursive macro replacements.
411 {R"cpp(
412 #define ADD(X, Y) X+Y
413 #define MULT(X, Y) X*Y
414
415 int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
416 )cpp",
417 "expanded tokens:\n"
418 " int a = 1 * 2 + 3 * 4 + 5 ;\n"
419 "file './input.cpp'\n"
420 " spelled tokens:\n"
421 " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
422 "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
423 " mappings:\n"
424 " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
425 " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
426 // Empty macro replacement.
Ilya Biryukov5e69f272019-06-24 21:39:51 +0000427 // FIXME: the #define directives should not be glued together.
Ilya Biryukove7230ea2019-05-22 14:44:45 +0000428 {R"cpp(
429 #define EMPTY
430 #define EMPTY_FUNC(X)
431 EMPTY
432 EMPTY_FUNC(1+2+3)
433 )cpp",
434 R"(expanded tokens:
435 <empty>
436file './input.cpp'
437 spelled tokens:
438 # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
439 mappings:
Ilya Biryukov5e69f272019-06-24 21:39:51 +0000440 ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
441 ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
442 ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
Ilya Biryukove7230ea2019-05-22 14:44:45 +0000443)"},
444 // File ends with a macro replacement.
445 {R"cpp(
446 #define FOO 10+10;
447 int a = FOO
448 )cpp",
449 R"(expanded tokens:
450 int a = 10 + 10 ;
451file './input.cpp'
452 spelled tokens:
453 # define FOO 10 + 10 ; int a = FOO
454 mappings:
455 ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
456 ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
457)"}};
458
459 for (auto &Test : TestCases)
460 EXPECT_EQ(Test.second, collectAndDump(Test.first))
461 << collectAndDump(Test.first);
462}
463
464TEST_F(TokenCollectorTest, SpecialTokens) {
465 // Tokens coming from concatenations.
466 recordTokens(R"cpp(
467 #define CONCAT(a, b) a ## b
468 int a = CONCAT(1, 2);
469 )cpp");
470 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
471 Contains(HasText("12")));
472 // Multi-line tokens with slashes at the end.
473 recordTokens("i\\\nn\\\nt");
474 EXPECT_THAT(Buffer.expandedTokens(),
475 ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
476 Kind(tok::eof)));
477 // FIXME: test tokens with digraphs and UCN identifiers.
478}
479
480TEST_F(TokenCollectorTest, LateBoundTokens) {
481 // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
482 // but we choose to record them as a single token (for now).
483 llvm::Annotations Code(R"cpp(
484 template <class T>
485 struct foo { int a; };
486 int bar = foo<foo<int$br[[>>]]().a;
487 int baz = 10 $op[[>>]] 2;
488 )cpp");
489 recordTokens(Code.code());
490 EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
491 AllOf(Contains(AllOf(Kind(tok::greatergreater),
492 RangeIs(Code.range("br")))),
493 Contains(AllOf(Kind(tok::greatergreater),
494 RangeIs(Code.range("op"))))));
495}
496
497TEST_F(TokenCollectorTest, DelayedParsing) {
498 llvm::StringLiteral Code = R"cpp(
499 struct Foo {
500 int method() {
501 // Parser will visit method bodies and initializers multiple times, but
502 // TokenBuffer should only record the first walk over the tokens;
503 return 100;
504 }
505 int a = 10;
506
507 struct Subclass {
508 void foo() {
509 Foo().method();
510 }
511 };
512 };
513 )cpp";
514 std::string ExpectedTokens =
515 "expanded tokens:\n"
516 " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
517 "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
518 EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
519}
520
521TEST_F(TokenCollectorTest, MultiFile) {
522 addFile("./foo.h", R"cpp(
523 #define ADD(X, Y) X+Y
524 int a = 100;
525 #include "bar.h"
526 )cpp");
527 addFile("./bar.h", R"cpp(
528 int b = ADD(1, 2);
529 #define MULT(X, Y) X*Y
530 )cpp");
531 llvm::StringLiteral Code = R"cpp(
532 #include "foo.h"
533 int c = ADD(1, MULT(2,3));
534 )cpp";
535
536 std::string Expected = R"(expanded tokens:
537 int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
538file './input.cpp'
539 spelled tokens:
540 # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
541 mappings:
542 ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
543 ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
544file './foo.h'
545 spelled tokens:
546 # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
547 mappings:
548 ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
549 ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
550file './bar.h'
551 spelled tokens:
552 int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
553 mappings:
554 ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
555 ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
556)";
557
558 EXPECT_EQ(Expected, collectAndDump(Code))
559 << "input: " << Code << "\nresults: " << collectAndDump(Code);
560}
561
562class TokenBufferTest : public TokenCollectorTest {};
563
564TEST_F(TokenBufferTest, SpelledByExpanded) {
565 recordTokens(R"cpp(
566 a1 a2 a3 b1 b2
567 )cpp");
568
569 // Sanity check: expanded and spelled tokens are stored separately.
570 EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
571 // Searching for subranges of expanded tokens should give the corresponding
572 // spelled ones.
573 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
574 ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
575 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
576 ValueIs(SameRange(findSpelled("a1 a2 a3"))));
577 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
578 ValueIs(SameRange(findSpelled("b1 b2"))));
579
580 // Test search on simple macro expansions.
581 recordTokens(R"cpp(
582 #define A a1 a2 a3
583 #define B b1 b2
584
585 A split B
586 )cpp");
587 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
588 ValueIs(SameRange(findSpelled("A split B"))));
589 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
590 ValueIs(SameRange(findSpelled("A split").drop_back())));
591 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
592 ValueIs(SameRange(findSpelled("split B").drop_front())));
593 // Ranges not fully covering macro invocations should fail.
594 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
595 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
596 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
597 llvm::None);
598
599 // Recursive macro invocations.
600 recordTokens(R"cpp(
601 #define ID(x) x
602 #define B b1 b2
603
604 ID(ID(ID(a1) a2 a3)) split ID(B)
605 )cpp");
606
607 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
608 ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
609 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
610 ValueIs(SameRange(findSpelled("ID ( B )"))));
611 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
612 ValueIs(SameRange(findSpelled(
613 "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
614 // Ranges crossing macro call boundaries.
615 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
616 llvm::None);
617 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
618 llvm::None);
619 // FIXME: next two examples should map to macro arguments, but currently they
620 // fail.
621 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
622 EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
623
624 // Empty macro expansions.
625 recordTokens(R"cpp(
626 #define EMPTY
627 #define ID(X) X
628
629 EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
630 EMPTY EMPTY ID(4 5 6) split2
631 ID(7 8 9) EMPTY EMPTY
632 )cpp");
633 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
634 ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
635 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
636 ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
637 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
638 ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
639
640 // Empty mappings coming from various directives.
641 recordTokens(R"cpp(
642 #define ID(X) X
643 ID(1)
644 #pragma lalala
645 not_mapped
646 )cpp");
647 EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
648 ValueIs(SameRange(findSpelled("not_mapped"))));
649}
650
Ilya Biryukov5aed3092019-06-18 16:27:27 +0000651TEST_F(TokenBufferTest, ExpansionStartingAt) {
652 // Object-like macro expansions.
653 recordTokens(R"cpp(
654 #define FOO 3+4
655 int a = FOO 1;
656 int b = FOO 2;
657 )cpp");
658
659 llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
660 EXPECT_THAT(
661 Buffer.expansionStartingAt(Foo1.data()),
662 ValueIs(IsExpansion(SameRange(Foo1),
663 SameRange(findExpanded("3 + 4 1").drop_back()))));
664
665 llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
666 EXPECT_THAT(
667 Buffer.expansionStartingAt(Foo2.data()),
668 ValueIs(IsExpansion(SameRange(Foo2),
669 SameRange(findExpanded("3 + 4 2").drop_back()))));
670
671 // Function-like macro expansions.
672 recordTokens(R"cpp(
673 #define ID(X) X
674 int a = ID(1+2+3);
675 int b = ID(ID(2+3+4));
676 )cpp");
677
678 llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
679 EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
680 ValueIs(IsExpansion(SameRange(ID1),
681 SameRange(findExpanded("1 + 2 + 3")))));
682 // Only the first spelled token should be found.
683 for (const auto &T : ID1.drop_front())
684 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
685
686 llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
687 EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
688 ValueIs(IsExpansion(SameRange(ID2),
689 SameRange(findExpanded("2 + 3 + 4")))));
690 // Only the first spelled token should be found.
691 for (const auto &T : ID2.drop_front())
692 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
693
694 // PP directives.
695 recordTokens(R"cpp(
696#define FOO 1
697int a = FOO;
698#pragma once
699int b = 1;
700 )cpp");
701
702 llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
703 EXPECT_THAT(
704 Buffer.expansionStartingAt(&DefineFoo.front()),
705 ValueIs(IsExpansion(SameRange(DefineFoo),
706 SameRange(findExpanded("int a").take_front(0)))));
707 // Only the first spelled token should be found.
708 for (const auto &T : DefineFoo.drop_front())
709 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
710
711 llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
712 EXPECT_THAT(
713 Buffer.expansionStartingAt(&PragmaOnce.front()),
714 ValueIs(IsExpansion(SameRange(PragmaOnce),
715 SameRange(findExpanded("int b").take_front(0)))));
716 // Only the first spelled token should be found.
717 for (const auto &T : PragmaOnce.drop_front())
718 EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
719}
720
Ilya Biryukove7230ea2019-05-22 14:44:45 +0000721TEST_F(TokenBufferTest, TokensToFileRange) {
722 addFile("./foo.h", "token_from_header");
723 llvm::Annotations Code(R"cpp(
724 #define FOO token_from_expansion
725 #include "./foo.h"
726 $all[[$i[[int]] a = FOO;]]
727 )cpp");
728 recordTokens(Code.code());
729
730 auto &SM = *SourceMgr;
731
732 // Two simple examples.
733 auto Int = findExpanded("int").front();
734 auto Semi = findExpanded(";").front();
735 EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
736 Code.range("i").End));
737 EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
738 FileRange(SM.getMainFileID(), Code.range("all").Begin,
739 Code.range("all").End));
740 // We don't test assertion failures because death tests are slow.
741}
742
Duncan P. N. Exon Smithd4a9cae2019-05-25 22:38:02 +0000743} // namespace