Blame - clang/unittests/Tooling/Syntax/TokensTest.cpp - toolchain/llvm-project

blob: 24adba28a2949db0b786dfcda2631aad3ee91856 [file] [log] [blame]

Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	1	//===- TokensTest.cpp -----------------------------------------------------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8
				9	#include "clang/Tooling/Syntax/Tokens.h"
				10	#include "clang/AST/ASTConsumer.h"
				11	#include "clang/AST/Expr.h"
				12	#include "clang/Basic/Diagnostic.h"
				13	#include "clang/Basic/DiagnosticIDs.h"
				14	#include "clang/Basic/DiagnosticOptions.h"
				15	#include "clang/Basic/FileManager.h"
				16	#include "clang/Basic/FileSystemOptions.h"
				17	#include "clang/Basic/LLVM.h"
				18	#include "clang/Basic/LangOptions.h"
				19	#include "clang/Basic/SourceLocation.h"
				20	#include "clang/Basic/SourceManager.h"
				21	#include "clang/Basic/TokenKinds.def"
				22	#include "clang/Basic/TokenKinds.h"
				23	#include "clang/Frontend/CompilerInstance.h"
				24	#include "clang/Frontend/FrontendAction.h"
				25	#include "clang/Frontend/Utils.h"
				26	#include "clang/Lex/Lexer.h"
				27	#include "clang/Lex/PreprocessorOptions.h"
				28	#include "clang/Lex/Token.h"
				29	#include "clang/Tooling/Tooling.h"
				30	#include "llvm/ADT/ArrayRef.h"
				31	#include "llvm/ADT/IntrusiveRefCntPtr.h"
				32	#include "llvm/ADT/None.h"
				33	#include "llvm/ADT/Optional.h"
				34	#include "llvm/ADT/STLExtras.h"
				35	#include "llvm/ADT/StringRef.h"
				36	#include "llvm/Support/FormatVariadic.h"
				37	#include "llvm/Support/MemoryBuffer.h"
				38	#include "llvm/Support/VirtualFileSystem.h"
				39	#include "llvm/Support/raw_os_ostream.h"
				40	#include "llvm/Support/raw_ostream.h"
				41	#include "llvm/Testing/Support/Annotations.h"
				42	#include "llvm/Testing/Support/SupportHelpers.h"
				43	#include <cassert>
				44	#include <cstdlib>
				45	#include <gmock/gmock.h>
				46	#include <gtest/gtest.h>
				47	#include <memory>
				48	#include <ostream>
				49	#include <string>
				50
				51	using namespace clang;
				52	using namespace clang::syntax;
				53
				54	using llvm::ValueIs;
				55	using ::testing::AllOf;
				56	using ::testing::Contains;
				57	using ::testing::ElementsAre;
Ilya Biryukov	5aed309	2019-06-18 16:27:27 +0000	[diff] [blame]	58	using ::testing::Field;
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	59	using ::testing::Matcher;
				60	using ::testing::Not;
				61	using ::testing::StartsWith;
				62
				63	namespace {
				64	// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
				65	// argument.
				66	MATCHER_P(SameRange, A, "") {
				67	return A.begin() == arg.begin() && A.end() == arg.end();
				68	}
Ilya Biryukov	5aed309	2019-06-18 16:27:27 +0000	[diff] [blame]	69
				70	Matcher<TokenBuffer::Expansion>
				71	IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
				72	Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
				73	return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
				74	Field(&TokenBuffer::Expansion::Expanded, Expanded));
				75	}
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	76	// Matchers for syntax::Token.
				77	MATCHER_P(Kind, K, "") { return arg.kind() == K; }
				78	MATCHER_P2(HasText, Text, SourceMgr, "") {
				79	return arg.text(*SourceMgr) == Text;
				80	}
				81	/// Checks the start and end location of a token are equal to SourceRng.
				82	MATCHER_P(RangeIs, SourceRng, "") {
				83	return arg.location() == SourceRng.first &&
				84	arg.endLocation() == SourceRng.second;
				85	}
				86
				87	class TokenCollectorTest : public ::testing::Test {
				88	public:
				89	/// Run the clang frontend, collect the preprocessed tokens from the frontend
				90	/// invocation and store them in this->Buffer.
				91	/// This also clears SourceManager before running the compiler.
				92	void recordTokens(llvm::StringRef Code) {
				93	class RecordTokens : public ASTFrontendAction {
				94	public:
				95	explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
				96
				97	bool BeginSourceFileAction(CompilerInstance &CI) override {
				98	assert(!Collector && "expected only a single call to BeginSourceFile");
				99	Collector.emplace(CI.getPreprocessor());
				100	return true;
				101	}
				102	void EndSourceFileAction() override {
				103	assert(Collector && "BeginSourceFileAction was never called");
				104	Result = std::move(*Collector).consume();
				105	}
				106
				107	std::unique_ptr<ASTConsumer>
				108	CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
				109	return llvm::make_unique<ASTConsumer>();
				110	}
				111
				112	private:
				113	TokenBuffer &Result;
				114	llvm::Optional<TokenCollector> Collector;
				115	};
				116
				117	constexpr const char *FileName = "./input.cpp";
				118	FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
				119	// Prepare to run a compiler.
				120	if (!Diags->getClient())
				121	Diags->setClient(new IgnoringDiagConsumer);
				122	std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
				123	FileName};
				124	auto CI = createInvocationFromCommandLine(Args, Diags, FS);
				125	assert(CI);
				126	CI->getFrontendOpts().DisableFree = false;
				127	CI->getPreprocessorOpts().addRemappedFile(
				128	FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
				129	CompilerInstance Compiler;
				130	Compiler.setInvocation(std::move(CI));
				131	Compiler.setDiagnostics(Diags.get());
				132	Compiler.setFileManager(FileMgr.get());
				133	Compiler.setSourceManager(SourceMgr.get());
				134
				135	this->Buffer = TokenBuffer(*SourceMgr);
				136	RecordTokens Recorder(this->Buffer);
				137	ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
				138	<< "failed to run the frontend";
				139	}
				140
				141	/// Record the tokens and return a test dump of the resulting buffer.
				142	std::string collectAndDump(llvm::StringRef Code) {
				143	recordTokens(Code);
				144	return Buffer.dumpForTests();
				145	}
				146
				147	// Adds a file to the test VFS.
				148	void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
				149	if (!FS->addFile(Path, time_t(),
				150	llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
				151	ADD_FAILURE() << "could not add a file to VFS: " << Path;
				152	}
				153	}
				154
				155	/// Add a new file, run syntax::tokenize() on it and return the results.
				156	std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
				157	// FIXME: pass proper LangOptions.
				158	return syntax::tokenize(
				159	SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
				160	*SourceMgr, LangOptions());
				161	}
				162
				163	// Specialized versions of matchers that hide the SourceManager from clients.
				164	Matcher<syntax::Token> HasText(std::string Text) const {
				165	return ::HasText(Text, SourceMgr.get());
				166	}
				167	Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
				168	std::pair<SourceLocation, SourceLocation> Ls;
				169	Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
				170	.getLocWithOffset(R.Begin);
				171	Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
				172	.getLocWithOffset(R.End);
				173	return ::RangeIs(Ls);
				174	}
				175
				176	/// Finds a subrange in O(n * m).
				177	template <class T, class U, class Eq>
				178	llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
				179	llvm::ArrayRef<T> Range, Eq F) {
				180	for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
				181	auto It = Begin;
				182	for (auto ItSub = Subrange.begin();
				183	ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
				184	if (!F(ItSub, It))
				185	goto continue_outer;
				186	}
				187	return llvm::makeArrayRef(Begin, It);
				188	continue_outer:;
				189	}
				190	return llvm::makeArrayRef(Range.end(), Range.end());
				191	}
				192
				193	/// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
				194	/// The match should be unique. \p Query is a whitespace-separated list of
				195	/// tokens to search for.
				196	llvm::ArrayRef<syntax::Token>
				197	findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
				198	llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
				199	Query.split(QueryTokens, ' ', /MaxSplit=/-1, /KeepEmpty=/false);
				200	if (QueryTokens.empty()) {
				201	ADD_FAILURE() << "will not look for an empty list of tokens";
				202	std::abort();
				203	}
				204	// An equality test for search.
				205	auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
				206	return Q == T.text(*SourceMgr);
				207	};
				208	// Find a match.
				209	auto Found =
				210	findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
				211	if (Found.begin() == Tokens.end()) {
				212	ADD_FAILURE() << "could not find the subrange for " << Query;
				213	std::abort();
				214	}
				215	// Check that the match is unique.
				216	if (findSubrange(llvm::makeArrayRef(QueryTokens),
				217	llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
				218	.begin() != Tokens.end()) {
				219	ADD_FAILURE() << "match is not unique for " << Query;
				220	std::abort();
				221	}
				222	return Found;
				223	};
				224
				225	// Specialized versions of findTokenRange for expanded and spelled tokens.
				226	llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
				227	return findTokenRange(Query, Buffer.expandedTokens());
				228	}
				229	llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
				230	FileID File = FileID()) {
				231	if (!File.isValid())
				232	File = SourceMgr->getMainFileID();
				233	return findTokenRange(Query, Buffer.spelledTokens(File));
				234	}
				235
				236	// Data fields.
				237	llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
				238	new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
				239	IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
				240	new llvm::vfs::InMemoryFileSystem;
				241	llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
				242	new FileManager(FileSystemOptions(), FS);
				243	llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
				244	new SourceManager(Diags, FileMgr);
				245	/// Contains last result of calling recordTokens().
				246	TokenBuffer Buffer = TokenBuffer(*SourceMgr);
				247	};
				248
				249	TEST_F(TokenCollectorTest, RawMode) {
				250	EXPECT_THAT(tokenize("int main() {}"),
				251	ElementsAre(Kind(tok::kw_int),
				252	AllOf(HasText("main"), Kind(tok::identifier)),
				253	Kind(tok::l_paren), Kind(tok::r_paren),
				254	Kind(tok::l_brace), Kind(tok::r_brace)));
				255	// Comments are ignored for now.
				256	EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
				257	ElementsAre(Kind(tok::kw_int),
				258	AllOf(HasText("a"), Kind(tok::identifier)),
				259	Kind(tok::semi)));
				260	}
				261
				262	TEST_F(TokenCollectorTest, Basic) {
				263	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
				264	{"int main() {}",
				265	R"(expanded tokens:
				266	int main ( ) { }
				267	file './input.cpp'
				268	spelled tokens:
				269	int main ( ) { }
				270	no mappings.
				271	)"},
				272	// All kinds of whitespace are ignored.
				273	{"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
				274	R"(expanded tokens:
				275	int main ( ) { }
				276	file './input.cpp'
				277	spelled tokens:
				278	int main ( ) { }
				279	no mappings.
				280	)"},
				281	// Annotation tokens are ignored.
				282	{R"cpp(
				283	#pragma GCC visibility push (public)
				284	#pragma GCC visibility pop
				285	)cpp",
				286	R"(expanded tokens:
				287	<empty>
				288	file './input.cpp'
				289	spelled tokens:
				290	# pragma GCC visibility push ( public ) # pragma GCC visibility pop
				291	mappings:
				292	['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
Ilya Biryukov	26c066d	2019-06-19 13:56:36 +0000	[diff] [blame]	293	)"},
				294	// Empty files should not crash.
				295	{R"cpp()cpp", R"(expanded tokens:
				296	<empty>
				297	file './input.cpp'
				298	spelled tokens:
				299	<empty>
				300	no mappings.
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	301	)"}};
				302	for (auto &Test : TestCases)
				303	EXPECT_EQ(collectAndDump(Test.first), Test.second)
				304	<< collectAndDump(Test.first);
				305	}
				306
				307	TEST_F(TokenCollectorTest, Locations) {
				308	// Check locations of the tokens.
				309	llvm::Annotations Code(R"cpp(
				310	$r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
				311	)cpp");
				312	recordTokens(Code.code());
				313	// Check expanded tokens.
				314	EXPECT_THAT(
				315	Buffer.expandedTokens(),
				316	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
				317	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
				318	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
				319	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
				320	AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
				321	Kind(tok::eof)));
				322	// Check spelled tokens.
				323	EXPECT_THAT(
				324	Buffer.spelledTokens(SourceMgr->getMainFileID()),
				325	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
				326	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
				327	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
				328	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
				329	AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
				330	}
				331
				332	TEST_F(TokenCollectorTest, MacroDirectives) {
				333	// Macro directives are not stored anywhere at the moment.
				334	std::string Code = R"cpp(
				335	#define FOO a
				336	#include "unresolved_file.h"
				337	#undef FOO
				338	#ifdef X
				339	#else
				340	#endif
				341	#ifndef Y
				342	#endif
				343	#if 1
				344	#elif 2
				345	#else
				346	#endif
				347	#pragma once
				348	#pragma something lalala
				349
				350	int a;
				351	)cpp";
				352	std::string Expected =
				353	"expanded tokens:\n"
				354	" int a ;\n"
				355	"file './input.cpp'\n"
				356	" spelled tokens:\n"
				357	" # define FOO a # include \"unresolved_file.h\" # undef FOO "
				358	"# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
				359	"# endif # pragma once # pragma something lalala int a ;\n"
				360	" mappings:\n"
				361	" ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
				362	EXPECT_EQ(collectAndDump(Code), Expected);
				363	}
				364
				365	TEST_F(TokenCollectorTest, MacroReplacements) {
				366	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
				367	// A simple object-like macro.
				368	{R"cpp(
				369	#define INT int const
				370	INT a;
				371	)cpp",
				372	R"(expanded tokens:
				373	int const a ;
				374	file './input.cpp'
				375	spelled tokens:
				376	# define INT int const INT a ;
				377	mappings:
				378	['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
				379	['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
				380	)"},
				381	// A simple function-like macro.
				382	{R"cpp(
				383	#define INT(a) const int
				384	INT(10+10) a;
				385	)cpp",
				386	R"(expanded tokens:
				387	const int a ;
				388	file './input.cpp'
				389	spelled tokens:
				390	# define INT ( a ) const int INT ( 10 + 10 ) a ;
				391	mappings:
				392	['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
				393	['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
				394	)"},
				395	// Recursive macro replacements.
				396	{R"cpp(
				397	#define ID(X) X
				398	#define INT int const
				399	ID(ID(INT)) a;
				400	)cpp",
				401	R"(expanded tokens:
				402	int const a ;
				403	file './input.cpp'
				404	spelled tokens:
				405	# define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
				406	mappings:
				407	['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
				408	['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
				409	)"},
				410	// A little more complicated recursive macro replacements.
				411	{R"cpp(
				412	#define ADD(X, Y) X+Y
				413	#define MULT(X, Y) X*Y
				414
				415	int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
				416	)cpp",
				417	"expanded tokens:\n"
				418	" int a = 1 * 2 + 3 * 4 + 5 ;\n"
				419	"file './input.cpp'\n"
				420	" spelled tokens:\n"
				421	" # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
				422	"a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
				423	" mappings:\n"
				424	" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
				425	" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
				426	// Empty macro replacement.
Ilya Biryukov	5e69f27	2019-06-24 21:39:51 +0000	[diff] [blame]	427	// FIXME: the #define directives should not be glued together.
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	428	{R"cpp(
				429	#define EMPTY
				430	#define EMPTY_FUNC(X)
				431	EMPTY
				432	EMPTY_FUNC(1+2+3)
				433	)cpp",
				434	R"(expanded tokens:
				435	<empty>
				436	file './input.cpp'
				437	spelled tokens:
				438	# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
				439	mappings:
Ilya Biryukov	5e69f27	2019-06-24 21:39:51 +0000	[diff] [blame]	440	['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
				441	['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
				442	['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	443	)"},
				444	// File ends with a macro replacement.
				445	{R"cpp(
				446	#define FOO 10+10;
				447	int a = FOO
				448	)cpp",
				449	R"(expanded tokens:
				450	int a = 10 + 10 ;
				451	file './input.cpp'
				452	spelled tokens:
				453	# define FOO 10 + 10 ; int a = FOO
				454	mappings:
				455	['#'_0, 'int'_7) => ['int'_0, 'int'_0)
				456	['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
				457	)"}};
				458
				459	for (auto &Test : TestCases)
				460	EXPECT_EQ(Test.second, collectAndDump(Test.first))
				461	<< collectAndDump(Test.first);
				462	}
				463
				464	TEST_F(TokenCollectorTest, SpecialTokens) {
				465	// Tokens coming from concatenations.
				466	recordTokens(R"cpp(
				467	#define CONCAT(a, b) a ## b
				468	int a = CONCAT(1, 2);
				469	)cpp");
				470	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
				471	Contains(HasText("12")));
				472	// Multi-line tokens with slashes at the end.
				473	recordTokens("i\\\nn\\\nt");
				474	EXPECT_THAT(Buffer.expandedTokens(),
				475	ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
				476	Kind(tok::eof)));
				477	// FIXME: test tokens with digraphs and UCN identifiers.
				478	}
				479
				480	TEST_F(TokenCollectorTest, LateBoundTokens) {
				481	// The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
				482	// but we choose to record them as a single token (for now).
				483	llvm::Annotations Code(R"cpp(
				484	template <class T>
				485	struct foo { int a; };
				486	int bar = foo<foo<int$br[[>>]]().a;
				487	int baz = 10 $op[[>>]] 2;
				488	)cpp");
				489	recordTokens(Code.code());
				490	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
				491	AllOf(Contains(AllOf(Kind(tok::greatergreater),
				492	RangeIs(Code.range("br")))),
				493	Contains(AllOf(Kind(tok::greatergreater),
				494	RangeIs(Code.range("op"))))));
				495	}
				496
				497	TEST_F(TokenCollectorTest, DelayedParsing) {
				498	llvm::StringLiteral Code = R"cpp(
				499	struct Foo {
				500	int method() {
				501	// Parser will visit method bodies and initializers multiple times, but
				502	// TokenBuffer should only record the first walk over the tokens;
				503	return 100;
				504	}
				505	int a = 10;
				506
				507	struct Subclass {
				508	void foo() {
				509	Foo().method();
				510	}
				511	};
				512	};
				513	)cpp";
				514	std::string ExpectedTokens =
				515	"expanded tokens:\n"
				516	" struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
				517	"Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
				518	EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
				519	}
				520
				521	TEST_F(TokenCollectorTest, MultiFile) {
				522	addFile("./foo.h", R"cpp(
				523	#define ADD(X, Y) X+Y
				524	int a = 100;
				525	#include "bar.h"
				526	)cpp");
				527	addFile("./bar.h", R"cpp(
				528	int b = ADD(1, 2);
				529	#define MULT(X, Y) X*Y
				530	)cpp");
				531	llvm::StringLiteral Code = R"cpp(
				532	#include "foo.h"
				533	int c = ADD(1, MULT(2,3));
				534	)cpp";
				535
				536	std::string Expected = R"(expanded tokens:
				537	int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
				538	file './input.cpp'
				539	spelled tokens:
				540	# include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
				541	mappings:
				542	['#'_0, 'int'_3) => ['int'_12, 'int'_12)
				543	['ADD'_6, ';'_17) => ['1'_15, ';'_20)
				544	file './foo.h'
				545	spelled tokens:
				546	# define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
				547	mappings:
				548	['#'_0, 'int'_11) => ['int'_0, 'int'_0)
				549	['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
				550	file './bar.h'
				551	spelled tokens:
				552	int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
				553	mappings:
				554	['ADD'_3, ';'_9) => ['1'_8, ';'_11)
				555	['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
				556	)";
				557
				558	EXPECT_EQ(Expected, collectAndDump(Code))
				559	<< "input: " << Code << "\nresults: " << collectAndDump(Code);
				560	}
				561
				562	class TokenBufferTest : public TokenCollectorTest {};
				563
				564	TEST_F(TokenBufferTest, SpelledByExpanded) {
				565	recordTokens(R"cpp(
				566	a1 a2 a3 b1 b2
				567	)cpp");
				568
				569	// Sanity check: expanded and spelled tokens are stored separately.
				570	EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
				571	// Searching for subranges of expanded tokens should give the corresponding
				572	// spelled ones.
				573	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
				574	ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
				575	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				576	ValueIs(SameRange(findSpelled("a1 a2 a3"))));
				577	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				578	ValueIs(SameRange(findSpelled("b1 b2"))));
				579
				580	// Test search on simple macro expansions.
				581	recordTokens(R"cpp(
				582	#define A a1 a2 a3
				583	#define B b1 b2
				584
				585	A split B
				586	)cpp");
				587	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
				588	ValueIs(SameRange(findSpelled("A split B"))));
				589	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				590	ValueIs(SameRange(findSpelled("A split").drop_back())));
				591	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				592	ValueIs(SameRange(findSpelled("split B").drop_front())));
				593	// Ranges not fully covering macro invocations should fail.
				594	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
				595	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
				596	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
				597	llvm::None);
				598
				599	// Recursive macro invocations.
				600	recordTokens(R"cpp(
				601	#define ID(x) x
				602	#define B b1 b2
				603
				604	ID(ID(ID(a1) a2 a3)) split ID(B)
				605	)cpp");
				606
				607	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				608	ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
				609	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				610	ValueIs(SameRange(findSpelled("ID ( B )"))));
				611	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
				612	ValueIs(SameRange(findSpelled(
				613	"ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
				614	// Ranges crossing macro call boundaries.
				615	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
				616	llvm::None);
				617	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
				618	llvm::None);
				619	// FIXME: next two examples should map to macro arguments, but currently they
				620	// fail.
				621	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
				622	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
				623
				624	// Empty macro expansions.
				625	recordTokens(R"cpp(
				626	#define EMPTY
				627	#define ID(X) X
				628
				629	EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
				630	EMPTY EMPTY ID(4 5 6) split2
				631	ID(7 8 9) EMPTY EMPTY
				632	)cpp");
				633	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
				634	ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
				635	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
				636	ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
				637	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
				638	ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
				639
				640	// Empty mappings coming from various directives.
				641	recordTokens(R"cpp(
				642	#define ID(X) X
				643	ID(1)
				644	#pragma lalala
				645	not_mapped
				646	)cpp");
				647	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
				648	ValueIs(SameRange(findSpelled("not_mapped"))));
				649	}
				650
Ilya Biryukov	5aed309	2019-06-18 16:27:27 +0000	[diff] [blame]	651	TEST_F(TokenBufferTest, ExpansionStartingAt) {
				652	// Object-like macro expansions.
				653	recordTokens(R"cpp(
				654	#define FOO 3+4
				655	int a = FOO 1;
				656	int b = FOO 2;
				657	)cpp");
				658
				659	llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
				660	EXPECT_THAT(
				661	Buffer.expansionStartingAt(Foo1.data()),
				662	ValueIs(IsExpansion(SameRange(Foo1),
				663	SameRange(findExpanded("3 + 4 1").drop_back()))));
				664
				665	llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
				666	EXPECT_THAT(
				667	Buffer.expansionStartingAt(Foo2.data()),
				668	ValueIs(IsExpansion(SameRange(Foo2),
				669	SameRange(findExpanded("3 + 4 2").drop_back()))));
				670
				671	// Function-like macro expansions.
				672	recordTokens(R"cpp(
				673	#define ID(X) X
				674	int a = ID(1+2+3);
				675	int b = ID(ID(2+3+4));
				676	)cpp");
				677
				678	llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
				679	EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
				680	ValueIs(IsExpansion(SameRange(ID1),
				681	SameRange(findExpanded("1 + 2 + 3")))));
				682	// Only the first spelled token should be found.
				683	for (const auto &T : ID1.drop_front())
				684	EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
				685
				686	llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
				687	EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
				688	ValueIs(IsExpansion(SameRange(ID2),
				689	SameRange(findExpanded("2 + 3 + 4")))));
				690	// Only the first spelled token should be found.
				691	for (const auto &T : ID2.drop_front())
				692	EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
				693
				694	// PP directives.
				695	recordTokens(R"cpp(
				696	#define FOO 1
				697	int a = FOO;
				698	#pragma once
				699	int b = 1;
				700	)cpp");
				701
				702	llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
				703	EXPECT_THAT(
				704	Buffer.expansionStartingAt(&DefineFoo.front()),
				705	ValueIs(IsExpansion(SameRange(DefineFoo),
				706	SameRange(findExpanded("int a").take_front(0)))));
				707	// Only the first spelled token should be found.
				708	for (const auto &T : DefineFoo.drop_front())
				709	EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
				710
				711	llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
				712	EXPECT_THAT(
				713	Buffer.expansionStartingAt(&PragmaOnce.front()),
				714	ValueIs(IsExpansion(SameRange(PragmaOnce),
				715	SameRange(findExpanded("int b").take_front(0)))));
				716	// Only the first spelled token should be found.
				717	for (const auto &T : PragmaOnce.drop_front())
				718	EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
				719	}
				720
Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	721	TEST_F(TokenBufferTest, TokensToFileRange) {
				722	addFile("./foo.h", "token_from_header");
				723	llvm::Annotations Code(R"cpp(
				724	#define FOO token_from_expansion
				725	#include "./foo.h"
				726	$all[[$i[[int]] a = FOO;]]
				727	)cpp");
				728	recordTokens(Code.code());
				729
				730	auto &SM = *SourceMgr;
				731
				732	// Two simple examples.
				733	auto Int = findExpanded("int").front();
				734	auto Semi = findExpanded(";").front();
				735	EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
				736	Code.range("i").End));
				737	EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
				738	FileRange(SM.getMainFileID(), Code.range("all").Begin,
				739	Code.range("all").End));
				740	// We don't test assertion failures because death tests are slow.
				741	}
				742
Duncan P. N. Exon Smith	d4a9cae	2019-05-25 22:38:02 +0000	[diff] [blame]	743	} // namespace