Blame - clang/unittests/Tooling/Syntax/TokensTest.cpp - toolchain/llvm-project

blob: 1d931faa704885fb7f7f26405d5f9ed302fd565e [file] [log] [blame]

Ilya Biryukov	e7230ea	2019-05-22 14:44:45 +0000	[diff] [blame]	1	//===- TokensTest.cpp -----------------------------------------------------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8
				9	#include "clang/Tooling/Syntax/Tokens.h"
				10	#include "clang/AST/ASTConsumer.h"
				11	#include "clang/AST/Expr.h"
				12	#include "clang/Basic/Diagnostic.h"
				13	#include "clang/Basic/DiagnosticIDs.h"
				14	#include "clang/Basic/DiagnosticOptions.h"
				15	#include "clang/Basic/FileManager.h"
				16	#include "clang/Basic/FileSystemOptions.h"
				17	#include "clang/Basic/LLVM.h"
				18	#include "clang/Basic/LangOptions.h"
				19	#include "clang/Basic/SourceLocation.h"
				20	#include "clang/Basic/SourceManager.h"
				21	#include "clang/Basic/TokenKinds.def"
				22	#include "clang/Basic/TokenKinds.h"
				23	#include "clang/Frontend/CompilerInstance.h"
				24	#include "clang/Frontend/FrontendAction.h"
				25	#include "clang/Frontend/Utils.h"
				26	#include "clang/Lex/Lexer.h"
				27	#include "clang/Lex/PreprocessorOptions.h"
				28	#include "clang/Lex/Token.h"
				29	#include "clang/Tooling/Tooling.h"
				30	#include "llvm/ADT/ArrayRef.h"
				31	#include "llvm/ADT/IntrusiveRefCntPtr.h"
				32	#include "llvm/ADT/None.h"
				33	#include "llvm/ADT/Optional.h"
				34	#include "llvm/ADT/STLExtras.h"
				35	#include "llvm/ADT/StringRef.h"
				36	#include "llvm/Support/FormatVariadic.h"
				37	#include "llvm/Support/MemoryBuffer.h"
				38	#include "llvm/Support/VirtualFileSystem.h"
				39	#include "llvm/Support/raw_os_ostream.h"
				40	#include "llvm/Support/raw_ostream.h"
				41	#include "llvm/Testing/Support/Annotations.h"
				42	#include "llvm/Testing/Support/SupportHelpers.h"
				43	#include <cassert>
				44	#include <cstdlib>
				45	#include <gmock/gmock.h>
				46	#include <gtest/gtest.h>
				47	#include <memory>
				48	#include <ostream>
				49	#include <string>
				50
				51	using namespace clang;
				52	using namespace clang::syntax;
				53
				54	using llvm::ValueIs;
				55	using ::testing::AllOf;
				56	using ::testing::Contains;
				57	using ::testing::ElementsAre;
				58	using ::testing::Matcher;
				59	using ::testing::Not;
				60	using ::testing::StartsWith;
				61
				62	namespace {
				63	// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
				64	// argument.
				65	MATCHER_P(SameRange, A, "") {
				66	return A.begin() == arg.begin() && A.end() == arg.end();
				67	}
				68	// Matchers for syntax::Token.
				69	MATCHER_P(Kind, K, "") { return arg.kind() == K; }
				70	MATCHER_P2(HasText, Text, SourceMgr, "") {
				71	return arg.text(*SourceMgr) == Text;
				72	}
				73	/// Checks the start and end location of a token are equal to SourceRng.
				74	MATCHER_P(RangeIs, SourceRng, "") {
				75	return arg.location() == SourceRng.first &&
				76	arg.endLocation() == SourceRng.second;
				77	}
				78
				79	class TokenCollectorTest : public ::testing::Test {
				80	public:
				81	/// Run the clang frontend, collect the preprocessed tokens from the frontend
				82	/// invocation and store them in this->Buffer.
				83	/// This also clears SourceManager before running the compiler.
				84	void recordTokens(llvm::StringRef Code) {
				85	class RecordTokens : public ASTFrontendAction {
				86	public:
				87	explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
				88
				89	bool BeginSourceFileAction(CompilerInstance &CI) override {
				90	assert(!Collector && "expected only a single call to BeginSourceFile");
				91	Collector.emplace(CI.getPreprocessor());
				92	return true;
				93	}
				94	void EndSourceFileAction() override {
				95	assert(Collector && "BeginSourceFileAction was never called");
				96	Result = std::move(*Collector).consume();
				97	}
				98
				99	std::unique_ptr<ASTConsumer>
				100	CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
				101	return llvm::make_unique<ASTConsumer>();
				102	}
				103
				104	private:
				105	TokenBuffer &Result;
				106	llvm::Optional<TokenCollector> Collector;
				107	};
				108
				109	constexpr const char *FileName = "./input.cpp";
				110	FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
				111	// Prepare to run a compiler.
				112	if (!Diags->getClient())
				113	Diags->setClient(new IgnoringDiagConsumer);
				114	std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
				115	FileName};
				116	auto CI = createInvocationFromCommandLine(Args, Diags, FS);
				117	assert(CI);
				118	CI->getFrontendOpts().DisableFree = false;
				119	CI->getPreprocessorOpts().addRemappedFile(
				120	FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
				121	CompilerInstance Compiler;
				122	Compiler.setInvocation(std::move(CI));
				123	Compiler.setDiagnostics(Diags.get());
				124	Compiler.setFileManager(FileMgr.get());
				125	Compiler.setSourceManager(SourceMgr.get());
				126
				127	this->Buffer = TokenBuffer(*SourceMgr);
				128	RecordTokens Recorder(this->Buffer);
				129	ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
				130	<< "failed to run the frontend";
				131	}
				132
				133	/// Record the tokens and return a test dump of the resulting buffer.
				134	std::string collectAndDump(llvm::StringRef Code) {
				135	recordTokens(Code);
				136	return Buffer.dumpForTests();
				137	}
				138
				139	// Adds a file to the test VFS.
				140	void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
				141	if (!FS->addFile(Path, time_t(),
				142	llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
				143	ADD_FAILURE() << "could not add a file to VFS: " << Path;
				144	}
				145	}
				146
				147	/// Add a new file, run syntax::tokenize() on it and return the results.
				148	std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
				149	// FIXME: pass proper LangOptions.
				150	return syntax::tokenize(
				151	SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
				152	*SourceMgr, LangOptions());
				153	}
				154
				155	// Specialized versions of matchers that hide the SourceManager from clients.
				156	Matcher<syntax::Token> HasText(std::string Text) const {
				157	return ::HasText(Text, SourceMgr.get());
				158	}
				159	Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
				160	std::pair<SourceLocation, SourceLocation> Ls;
				161	Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
				162	.getLocWithOffset(R.Begin);
				163	Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
				164	.getLocWithOffset(R.End);
				165	return ::RangeIs(Ls);
				166	}
				167
				168	/// Finds a subrange in O(n * m).
				169	template <class T, class U, class Eq>
				170	llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
				171	llvm::ArrayRef<T> Range, Eq F) {
				172	for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
				173	auto It = Begin;
				174	for (auto ItSub = Subrange.begin();
				175	ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
				176	if (!F(ItSub, It))
				177	goto continue_outer;
				178	}
				179	return llvm::makeArrayRef(Begin, It);
				180	continue_outer:;
				181	}
				182	return llvm::makeArrayRef(Range.end(), Range.end());
				183	}
				184
				185	/// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
				186	/// The match should be unique. \p Query is a whitespace-separated list of
				187	/// tokens to search for.
				188	llvm::ArrayRef<syntax::Token>
				189	findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
				190	llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
				191	Query.split(QueryTokens, ' ', /MaxSplit=/-1, /KeepEmpty=/false);
				192	if (QueryTokens.empty()) {
				193	ADD_FAILURE() << "will not look for an empty list of tokens";
				194	std::abort();
				195	}
				196	// An equality test for search.
				197	auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
				198	return Q == T.text(*SourceMgr);
				199	};
				200	// Find a match.
				201	auto Found =
				202	findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
				203	if (Found.begin() == Tokens.end()) {
				204	ADD_FAILURE() << "could not find the subrange for " << Query;
				205	std::abort();
				206	}
				207	// Check that the match is unique.
				208	if (findSubrange(llvm::makeArrayRef(QueryTokens),
				209	llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
				210	.begin() != Tokens.end()) {
				211	ADD_FAILURE() << "match is not unique for " << Query;
				212	std::abort();
				213	}
				214	return Found;
				215	};
				216
				217	// Specialized versions of findTokenRange for expanded and spelled tokens.
				218	llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
				219	return findTokenRange(Query, Buffer.expandedTokens());
				220	}
				221	llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
				222	FileID File = FileID()) {
				223	if (!File.isValid())
				224	File = SourceMgr->getMainFileID();
				225	return findTokenRange(Query, Buffer.spelledTokens(File));
				226	}
				227
				228	// Data fields.
				229	llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
				230	new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
				231	IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
				232	new llvm::vfs::InMemoryFileSystem;
				233	llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
				234	new FileManager(FileSystemOptions(), FS);
				235	llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
				236	new SourceManager(Diags, FileMgr);
				237	/// Contains last result of calling recordTokens().
				238	TokenBuffer Buffer = TokenBuffer(*SourceMgr);
				239	};
				240
				241	TEST_F(TokenCollectorTest, RawMode) {
				242	EXPECT_THAT(tokenize("int main() {}"),
				243	ElementsAre(Kind(tok::kw_int),
				244	AllOf(HasText("main"), Kind(tok::identifier)),
				245	Kind(tok::l_paren), Kind(tok::r_paren),
				246	Kind(tok::l_brace), Kind(tok::r_brace)));
				247	// Comments are ignored for now.
				248	EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
				249	ElementsAre(Kind(tok::kw_int),
				250	AllOf(HasText("a"), Kind(tok::identifier)),
				251	Kind(tok::semi)));
				252	}
				253
				254	TEST_F(TokenCollectorTest, Basic) {
				255	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
				256	{"int main() {}",
				257	R"(expanded tokens:
				258	int main ( ) { }
				259	file './input.cpp'
				260	spelled tokens:
				261	int main ( ) { }
				262	no mappings.
				263	)"},
				264	// All kinds of whitespace are ignored.
				265	{"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
				266	R"(expanded tokens:
				267	int main ( ) { }
				268	file './input.cpp'
				269	spelled tokens:
				270	int main ( ) { }
				271	no mappings.
				272	)"},
				273	// Annotation tokens are ignored.
				274	{R"cpp(
				275	#pragma GCC visibility push (public)
				276	#pragma GCC visibility pop
				277	)cpp",
				278	R"(expanded tokens:
				279	<empty>
				280	file './input.cpp'
				281	spelled tokens:
				282	# pragma GCC visibility push ( public ) # pragma GCC visibility pop
				283	mappings:
				284	['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
				285	)"}};
				286	for (auto &Test : TestCases)
				287	EXPECT_EQ(collectAndDump(Test.first), Test.second)
				288	<< collectAndDump(Test.first);
				289	}
				290
				291	TEST_F(TokenCollectorTest, Locations) {
				292	// Check locations of the tokens.
				293	llvm::Annotations Code(R"cpp(
				294	$r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
				295	)cpp");
				296	recordTokens(Code.code());
				297	// Check expanded tokens.
				298	EXPECT_THAT(
				299	Buffer.expandedTokens(),
				300	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
				301	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
				302	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
				303	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
				304	AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
				305	Kind(tok::eof)));
				306	// Check spelled tokens.
				307	EXPECT_THAT(
				308	Buffer.spelledTokens(SourceMgr->getMainFileID()),
				309	ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
				310	AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
				311	AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
				312	AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
				313	AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
				314	}
				315
				316	TEST_F(TokenCollectorTest, MacroDirectives) {
				317	// Macro directives are not stored anywhere at the moment.
				318	std::string Code = R"cpp(
				319	#define FOO a
				320	#include "unresolved_file.h"
				321	#undef FOO
				322	#ifdef X
				323	#else
				324	#endif
				325	#ifndef Y
				326	#endif
				327	#if 1
				328	#elif 2
				329	#else
				330	#endif
				331	#pragma once
				332	#pragma something lalala
				333
				334	int a;
				335	)cpp";
				336	std::string Expected =
				337	"expanded tokens:\n"
				338	" int a ;\n"
				339	"file './input.cpp'\n"
				340	" spelled tokens:\n"
				341	" # define FOO a # include \"unresolved_file.h\" # undef FOO "
				342	"# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
				343	"# endif # pragma once # pragma something lalala int a ;\n"
				344	" mappings:\n"
				345	" ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
				346	EXPECT_EQ(collectAndDump(Code), Expected);
				347	}
				348
				349	TEST_F(TokenCollectorTest, MacroReplacements) {
				350	std::pair</Input/ std::string, /Expected/ std::string> TestCases[] = {
				351	// A simple object-like macro.
				352	{R"cpp(
				353	#define INT int const
				354	INT a;
				355	)cpp",
				356	R"(expanded tokens:
				357	int const a ;
				358	file './input.cpp'
				359	spelled tokens:
				360	# define INT int const INT a ;
				361	mappings:
				362	['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
				363	['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
				364	)"},
				365	// A simple function-like macro.
				366	{R"cpp(
				367	#define INT(a) const int
				368	INT(10+10) a;
				369	)cpp",
				370	R"(expanded tokens:
				371	const int a ;
				372	file './input.cpp'
				373	spelled tokens:
				374	# define INT ( a ) const int INT ( 10 + 10 ) a ;
				375	mappings:
				376	['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
				377	['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
				378	)"},
				379	// Recursive macro replacements.
				380	{R"cpp(
				381	#define ID(X) X
				382	#define INT int const
				383	ID(ID(INT)) a;
				384	)cpp",
				385	R"(expanded tokens:
				386	int const a ;
				387	file './input.cpp'
				388	spelled tokens:
				389	# define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
				390	mappings:
				391	['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
				392	['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
				393	)"},
				394	// A little more complicated recursive macro replacements.
				395	{R"cpp(
				396	#define ADD(X, Y) X+Y
				397	#define MULT(X, Y) X*Y
				398
				399	int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
				400	)cpp",
				401	"expanded tokens:\n"
				402	" int a = 1 * 2 + 3 * 4 + 5 ;\n"
				403	"file './input.cpp'\n"
				404	" spelled tokens:\n"
				405	" # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
				406	"a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
				407	" mappings:\n"
				408	" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
				409	" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
				410	// Empty macro replacement.
				411	{R"cpp(
				412	#define EMPTY
				413	#define EMPTY_FUNC(X)
				414	EMPTY
				415	EMPTY_FUNC(1+2+3)
				416	)cpp",
				417	R"(expanded tokens:
				418	<empty>
				419	file './input.cpp'
				420	spelled tokens:
				421	# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
				422	mappings:
				423	['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
				424	)"},
				425	// File ends with a macro replacement.
				426	{R"cpp(
				427	#define FOO 10+10;
				428	int a = FOO
				429	)cpp",
				430	R"(expanded tokens:
				431	int a = 10 + 10 ;
				432	file './input.cpp'
				433	spelled tokens:
				434	# define FOO 10 + 10 ; int a = FOO
				435	mappings:
				436	['#'_0, 'int'_7) => ['int'_0, 'int'_0)
				437	['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
				438	)"}};
				439
				440	for (auto &Test : TestCases)
				441	EXPECT_EQ(Test.second, collectAndDump(Test.first))
				442	<< collectAndDump(Test.first);
				443	}
				444
				445	TEST_F(TokenCollectorTest, SpecialTokens) {
				446	// Tokens coming from concatenations.
				447	recordTokens(R"cpp(
				448	#define CONCAT(a, b) a ## b
				449	int a = CONCAT(1, 2);
				450	)cpp");
				451	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
				452	Contains(HasText("12")));
				453	// Multi-line tokens with slashes at the end.
				454	recordTokens("i\\\nn\\\nt");
				455	EXPECT_THAT(Buffer.expandedTokens(),
				456	ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
				457	Kind(tok::eof)));
				458	// FIXME: test tokens with digraphs and UCN identifiers.
				459	}
				460
				461	TEST_F(TokenCollectorTest, LateBoundTokens) {
				462	// The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
				463	// but we choose to record them as a single token (for now).
				464	llvm::Annotations Code(R"cpp(
				465	template <class T>
				466	struct foo { int a; };
				467	int bar = foo<foo<int$br[[>>]]().a;
				468	int baz = 10 $op[[>>]] 2;
				469	)cpp");
				470	recordTokens(Code.code());
				471	EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
				472	AllOf(Contains(AllOf(Kind(tok::greatergreater),
				473	RangeIs(Code.range("br")))),
				474	Contains(AllOf(Kind(tok::greatergreater),
				475	RangeIs(Code.range("op"))))));
				476	}
				477
				478	TEST_F(TokenCollectorTest, DelayedParsing) {
				479	llvm::StringLiteral Code = R"cpp(
				480	struct Foo {
				481	int method() {
				482	// Parser will visit method bodies and initializers multiple times, but
				483	// TokenBuffer should only record the first walk over the tokens;
				484	return 100;
				485	}
				486	int a = 10;
				487
				488	struct Subclass {
				489	void foo() {
				490	Foo().method();
				491	}
				492	};
				493	};
				494	)cpp";
				495	std::string ExpectedTokens =
				496	"expanded tokens:\n"
				497	" struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
				498	"Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
				499	EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
				500	}
				501
				502	TEST_F(TokenCollectorTest, MultiFile) {
				503	addFile("./foo.h", R"cpp(
				504	#define ADD(X, Y) X+Y
				505	int a = 100;
				506	#include "bar.h"
				507	)cpp");
				508	addFile("./bar.h", R"cpp(
				509	int b = ADD(1, 2);
				510	#define MULT(X, Y) X*Y
				511	)cpp");
				512	llvm::StringLiteral Code = R"cpp(
				513	#include "foo.h"
				514	int c = ADD(1, MULT(2,3));
				515	)cpp";
				516
				517	std::string Expected = R"(expanded tokens:
				518	int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
				519	file './input.cpp'
				520	spelled tokens:
				521	# include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
				522	mappings:
				523	['#'_0, 'int'_3) => ['int'_12, 'int'_12)
				524	['ADD'_6, ';'_17) => ['1'_15, ';'_20)
				525	file './foo.h'
				526	spelled tokens:
				527	# define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
				528	mappings:
				529	['#'_0, 'int'_11) => ['int'_0, 'int'_0)
				530	['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
				531	file './bar.h'
				532	spelled tokens:
				533	int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
				534	mappings:
				535	['ADD'_3, ';'_9) => ['1'_8, ';'_11)
				536	['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
				537	)";
				538
				539	EXPECT_EQ(Expected, collectAndDump(Code))
				540	<< "input: " << Code << "\nresults: " << collectAndDump(Code);
				541	}
				542
				543	class TokenBufferTest : public TokenCollectorTest {};
				544
				545	TEST_F(TokenBufferTest, SpelledByExpanded) {
				546	recordTokens(R"cpp(
				547	a1 a2 a3 b1 b2
				548	)cpp");
				549
				550	// Sanity check: expanded and spelled tokens are stored separately.
				551	EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
				552	// Searching for subranges of expanded tokens should give the corresponding
				553	// spelled ones.
				554	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
				555	ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
				556	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				557	ValueIs(SameRange(findSpelled("a1 a2 a3"))));
				558	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				559	ValueIs(SameRange(findSpelled("b1 b2"))));
				560
				561	// Test search on simple macro expansions.
				562	recordTokens(R"cpp(
				563	#define A a1 a2 a3
				564	#define B b1 b2
				565
				566	A split B
				567	)cpp");
				568	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
				569	ValueIs(SameRange(findSpelled("A split B"))));
				570	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				571	ValueIs(SameRange(findSpelled("A split").drop_back())));
				572	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				573	ValueIs(SameRange(findSpelled("split B").drop_front())));
				574	// Ranges not fully covering macro invocations should fail.
				575	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
				576	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
				577	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
				578	llvm::None);
				579
				580	// Recursive macro invocations.
				581	recordTokens(R"cpp(
				582	#define ID(x) x
				583	#define B b1 b2
				584
				585	ID(ID(ID(a1) a2 a3)) split ID(B)
				586	)cpp");
				587
				588	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
				589	ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
				590	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
				591	ValueIs(SameRange(findSpelled("ID ( B )"))));
				592	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
				593	ValueIs(SameRange(findSpelled(
				594	"ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
				595	// Ranges crossing macro call boundaries.
				596	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
				597	llvm::None);
				598	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
				599	llvm::None);
				600	// FIXME: next two examples should map to macro arguments, but currently they
				601	// fail.
				602	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
				603	EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
				604
				605	// Empty macro expansions.
				606	recordTokens(R"cpp(
				607	#define EMPTY
				608	#define ID(X) X
				609
				610	EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
				611	EMPTY EMPTY ID(4 5 6) split2
				612	ID(7 8 9) EMPTY EMPTY
				613	)cpp");
				614	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
				615	ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
				616	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
				617	ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
				618	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
				619	ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
				620
				621	// Empty mappings coming from various directives.
				622	recordTokens(R"cpp(
				623	#define ID(X) X
				624	ID(1)
				625	#pragma lalala
				626	not_mapped
				627	)cpp");
				628	EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
				629	ValueIs(SameRange(findSpelled("not_mapped"))));
				630	}
				631
				632	TEST_F(TokenBufferTest, TokensToFileRange) {
				633	addFile("./foo.h", "token_from_header");
				634	llvm::Annotations Code(R"cpp(
				635	#define FOO token_from_expansion
				636	#include "./foo.h"
				637	$all[[$i[[int]] a = FOO;]]
				638	)cpp");
				639	recordTokens(Code.code());
				640
				641	auto &SM = *SourceMgr;
				642
				643	// Two simple examples.
				644	auto Int = findExpanded("int").front();
				645	auto Semi = findExpanded(";").front();
				646	EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
				647	Code.range("i").End));
				648	EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
				649	FileRange(SM.getMainFileID(), Code.range("all").Begin,
				650	Code.range("all").End));
				651	// We don't test assertion failures because death tests are slow.
				652	}
				653
Duncan P. N. Exon Smith	d4a9cae	2019-05-25 22:38:02 +0000	[diff] [blame^]	654	} // namespace