Blame - clang-tools-extra/clangd/SourceCode.cpp - toolchain/llvm-project

blob: dd4c863cb96abdc7f6779a2e94b9389bb5500662 [file] [log] [blame]

Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	1	//===--- SourceCode.h - Manipulating source code as strings ------ C++ --===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	#include "SourceCode.h"
				9
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	10	#include "Context.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	11	#include "FuzzyMatch.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	12	#include "Logger.h"
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	13	#include "Protocol.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	14	#include "refactor/Tweak.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	15	#include "clang/AST/ASTContext.h"
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	16	#include "clang/Basic/LangOptions.h"
				17	#include "clang/Basic/SourceLocation.h"
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	18	#include "clang/Basic/SourceManager.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	19	#include "clang/Basic/TokenKinds.h"
Haojian Wu	509efe5	2019-11-13 16:30:07 +0100	[diff] [blame]	20	#include "clang/Driver/Types.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	21	#include "clang/Format/Format.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	22	#include "clang/Lex/Lexer.h"
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	23	#include "clang/Lex/Preprocessor.h"
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	24	#include "clang/Lex/Token.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	25	#include "clang/Tooling/Core/Replacement.h"
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	26	#include "clang/Tooling/Syntax/Tokens.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	27	#include "llvm/ADT/ArrayRef.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	28	#include "llvm/ADT/None.h"
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	29	#include "llvm/ADT/STLExtras.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	30	#include "llvm/ADT/StringExtras.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	31	#include "llvm/ADT/StringMap.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	32	#include "llvm/ADT/StringRef.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	33	#include "llvm/Support/Compiler.h"
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	34	#include "llvm/Support/Errc.h"
				35	#include "llvm/Support/Error.h"
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	36	#include "llvm/Support/ErrorHandling.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	37	#include "llvm/Support/LineIterator.h"
				38	#include "llvm/Support/MemoryBuffer.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	39	#include "llvm/Support/Path.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	40	#include "llvm/Support/SHA1.h"
				41	#include "llvm/Support/VirtualFileSystem.h"
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	42	#include "llvm/Support/xxhash.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	43	#include <algorithm>
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	44	#include <cstddef>
				45	#include <string>
				46	#include <vector>
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	47
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	48	namespace clang {
				49	namespace clangd {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	50
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	51	// Here be dragons. LSP positions use columns measured in UTF-16 code units!
				52	// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
				53
				54	// Iterates over unicode codepoints in the (UTF-8) string. For each,
				55	// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
				56	// Returns true if CB returned true, false if we hit the end of string.
				57	template <typename Callback>
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	58	static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	59	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				60	// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	61	for (size_t I = 0; I < U8.size();) {
				62	unsigned char C = static_cast<unsigned char>(U8[I]);
				63	if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
				64	if (CB(1, 1))
				65	return true;
				66	++I;
				67	continue;
				68	}
				69	// This convenient property of UTF-8 holds for all non-ASCII characters.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	70	size_t UTF8Length = llvm::countLeadingOnes(C);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	71	// 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
				72	// 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
				73	assert((UTF8Length >= 2 && UTF8Length <= 4) &&
				74	"Invalid UTF-8, or transcoding bug?");
				75	I += UTF8Length; // Skip over all trailing bytes.
				76	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				77	// Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
				78	if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
				79	return true;
				80	}
				81	return false;
				82	}
				83
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	84	// Returns the byte offset into the string that is an offset of \p Units in
				85	// the specified encoding.
				86	// Conceptually, this converts to the encoding, truncates to CodeUnits,
				87	// converts back to UTF-8, and returns the length in bytes.
				88	static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
				89	bool &Valid) {
				90	Valid = Units >= 0;
				91	if (Units <= 0)
				92	return 0;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	93	size_t Result = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	94	switch (Enc) {
				95	case OffsetEncoding::UTF8:
				96	Result = Units;
				97	break;
				98	case OffsetEncoding::UTF16:
				99	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				100	Result += U8Len;
				101	Units -= U16Len;
				102	return Units <= 0;
				103	});
				104	if (Units < 0) // Offset in the middle of a surrogate pair.
				105	Valid = false;
				106	break;
				107	case OffsetEncoding::UTF32:
				108	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				109	Result += U8Len;
				110	Units--;
				111	return Units <= 0;
				112	});
				113	break;
				114	case OffsetEncoding::UnsupportedEncoding:
				115	llvm_unreachable("unsupported encoding");
				116	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	117	// Don't return an out-of-range index if we overran.
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	118	if (Result > U8.size()) {
				119	Valid = false;
				120	return U8.size();
				121	}
				122	return Result;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	123	}
				124
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	125	Key<OffsetEncoding> kCurrentOffsetEncoding;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	126	static OffsetEncoding lspEncoding() {
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	127	auto *Enc = Context::current().get(kCurrentOffsetEncoding);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	128	return Enc ? *Enc : OffsetEncoding::UTF16;
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	129	}
				130
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	131	// Like most strings in clangd, the input is UTF-8 encoded.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	132	size_t lspLength(llvm::StringRef Code) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	133	size_t Count = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	134	switch (lspEncoding()) {
				135	case OffsetEncoding::UTF8:
				136	Count = Code.size();
				137	break;
				138	case OffsetEncoding::UTF16:
				139	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				140	Count += U16Len;
				141	return false;
				142	});
				143	break;
				144	case OffsetEncoding::UTF32:
				145	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				146	++Count;
				147	return false;
				148	});
				149	break;
				150	case OffsetEncoding::UnsupportedEncoding:
				151	llvm_unreachable("unsupported encoding");
				152	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	153	return Count;
				154	}
				155
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	156	llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
				157	bool AllowColumnsBeyondLineLength) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	158	if (P.line < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	159	return llvm::make_error<llvm::StringError>(
				160	llvm::formatv("Line value can't be negative ({0})", P.line),
				161	llvm::errc::invalid_argument);
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	162	if (P.character < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	163	return llvm::make_error<llvm::StringError>(
				164	llvm::formatv("Character value can't be negative ({0})", P.character),
				165	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	166	size_t StartOfLine = 0;
				167	for (int I = 0; I != P.line; ++I) {
				168	size_t NextNL = Code.find('\n', StartOfLine);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	169	if (NextNL == llvm::StringRef::npos)
				170	return llvm::make_error<llvm::StringError>(
				171	llvm::formatv("Line value is out of range ({0})", P.line),
				172	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	173	StartOfLine = NextNL + 1;
				174	}
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	175	StringRef Line =
				176	Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	177
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	178	// P.character may be in UTF-16, transcode if necessary.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	179	bool Valid;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	180	size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	181	if (!Valid && !AllowColumnsBeyondLineLength)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	182	return llvm::make_error<llvm::StringError>(
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	183	llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
				184	P.character, P.line),
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	185	llvm::errc::invalid_argument);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	186	return StartOfLine + ByteInLine;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	187	}
				188
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	189	Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	190	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	191	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	192	int Lines = Before.count('\n');
				193	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	194	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	195	Position Pos;
				196	Pos.line = Lines;
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	197	Pos.character = lspLength(Before.substr(StartOfLine));
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	198	return Pos;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	199	}
				200
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	201	Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	202	// We use the SourceManager's line tables, but its column number is in bytes.
				203	FileID FID;
				204	unsigned Offset;
				205	std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	206	Position P;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	207	P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1;
				208	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	209	llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	210	if (!Invalid) {
				211	auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
				212	auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	213	P.character = lspLength(LineSoFar);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	214	}
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	215	return P;
				216	}
				217
Sam McCall	9573807	2019-08-06 20:25:59 +0000	[diff] [blame]	218	bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) {
				219	if (Loc.isMacroID()) {
				220	std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM);
				221	if (llvm::StringRef(PrintLoc).startswith("<scratch") \|\|
				222	llvm::StringRef(PrintLoc).startswith("<command line>"))
				223	return false;
				224	}
				225	return true;
				226	}
				227
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	228	bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
				229	if (!R.getBegin().isValid() \|\| !R.getEnd().isValid())
				230	return false;
				231
				232	FileID BeginFID;
				233	size_t BeginOffset = 0;
				234	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
				235
				236	FileID EndFID;
				237	size_t EndOffset = 0;
				238	std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
				239
				240	return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
				241	}
				242
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	243	SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
				244	assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
				245	FileID IncludingFile;
				246	unsigned Offset;
				247	std::tie(IncludingFile, Offset) =
				248	SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
				249	bool Invalid = false;
				250	llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
				251	if (Invalid)
				252	return SourceLocation();
				253	// Now buf is "...\n#include <foo>\n..."
				254	// and Offset points here: ^
				255	// Rewind to the preceding # on the line.
				256	assert(Offset < Buf.size());
				257	for (;; --Offset) {
				258	if (Buf[Offset] == '#')
				259	return SM.getComposedLoc(IncludingFile, Offset);
				260	if (Buf[Offset] == '\n' \|\| Offset == 0) // no hash, what's going on?
				261	return SourceLocation();
				262	}
				263	}
				264
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	265	static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
				266	const LangOptions &LangOpts) {
				267	Token TheTok;
				268	if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts))
				269	return 0;
				270	// FIXME: Here we check whether the token at the location is a greatergreater
				271	// (>>) token and consider it as a single greater (>). This is to get it
				272	// working for templates but it isn't correct for the right shift operator. We
				273	// can avoid this by using half open char ranges in getFileRange() but getting
				274	// token ending is not well supported in macroIDs.
				275	if (TheTok.is(tok::greatergreater))
				276	return 1;
				277	return TheTok.getLength();
				278	}
				279
				280	// Returns location of the last character of the token at a given loc
				281	static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc,
				282	const SourceManager &SM,
				283	const LangOptions &LangOpts) {
				284	unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts);
				285	return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
				286	}
				287
				288	// Returns location of the starting of the token at a given EndLoc
				289	static SourceLocation getLocForTokenBegin(SourceLocation EndLoc,
				290	const SourceManager &SM,
				291	const LangOptions &LangOpts) {
				292	return EndLoc.getLocWithOffset(
				293	-(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts));
				294	}
				295
				296	// Converts a char source range to a token range.
				297	static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM,
				298	const LangOptions &LangOpts) {
				299	if (!Range.isTokenRange())
				300	Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts));
				301	return Range.getAsRange();
				302	}
				303	// Returns the union of two token ranges.
				304	// To find the maximum of the Ends of the ranges, we compare the location of the
				305	// last character of the token.
				306	static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
				307	const SourceManager &SM,
				308	const LangOptions &LangOpts) {
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	309	SourceLocation Begin =
				310	SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
				311	? R1.getBegin()
				312	: R2.getBegin();
				313	SourceLocation End =
				314	SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts),
				315	getLocForTokenEnd(R2.getEnd(), SM, LangOpts))
				316	? R2.getEnd()
				317	: R1.getEnd();
				318	return SourceRange(Begin, End);
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	319	}
				320
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	321	// Given a range whose endpoints may be in different expansions or files,
				322	// tries to find a range within a common file by following up the expansion and
				323	// include location in each.
				324	static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM,
				325	const LangOptions &LangOpts) {
				326	// Fast path for most common cases.
				327	if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
				328	return R;
				329	// Record the stack of expansion locations for the beginning, keyed by FileID.
				330	llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
				331	for (SourceLocation Begin = R.getBegin(); Begin.isValid();
				332	Begin = Begin.isFileID()
				333	? includeHashLoc(SM.getFileID(Begin), SM)
				334	: SM.getImmediateExpansionRange(Begin).getBegin()) {
				335	BeginExpansions[SM.getFileID(Begin)] = Begin;
				336	}
				337	// Move up the stack of expansion locations for the end until we find the
				338	// location in BeginExpansions with that has the same file id.
				339	for (SourceLocation End = R.getEnd(); End.isValid();
				340	End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM)
				341	: toTokenRange(SM.getImmediateExpansionRange(End),
				342	SM, LangOpts)
				343	.getEnd()) {
				344	auto It = BeginExpansions.find(SM.getFileID(End));
				345	if (It != BeginExpansions.end()) {
				346	if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
				347	return SourceLocation();
				348	return {It->second, End};
				349	}
				350	}
				351	return SourceRange();
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	352	}
				353
				354	// Find an expansion range (not necessarily immediate) the ends of which are in
				355	// the same file id.
				356	static SourceRange
				357	getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM,
				358	const LangOptions &LangOpts) {
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	359	return rangeInCommonFile(
				360	toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM,
				361	LangOpts);
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	362	}
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	363
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	364	// Returns the file range for a given Location as a Token Range
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	365	// This is quite similar to getFileLoc in SourceManager as both use
				366	// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
				367	// However:
				368	// - We want to maintain the full range information as we move from one file to
				369	// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange.
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	370	// - We want to split '>>' tokens as the lexer parses the '>>' in nested
				371	// template instantiations as a '>>' instead of two '>'s.
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	372	// There is also getExpansionRange but it simply calls
				373	// getImmediateExpansionRange on the begin and ends separately which is wrong.
				374	static SourceRange getTokenFileRange(SourceLocation Loc,
				375	const SourceManager &SM,
				376	const LangOptions &LangOpts) {
				377	SourceRange FileRange = Loc;
				378	while (!FileRange.getBegin().isFileID()) {
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	379	if (SM.isMacroArgExpansion(FileRange.getBegin())) {
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	380	FileRange = unionTokenRange(
				381	SM.getImmediateSpellingLoc(FileRange.getBegin()),
				382	SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	383	assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	384	} else {
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	385	SourceRange ExpansionRangeForBegin =
				386	getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts);
				387	SourceRange ExpansionRangeForEnd =
				388	getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts);
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	389	if (ExpansionRangeForBegin.isInvalid() \|\|
				390	ExpansionRangeForEnd.isInvalid())
				391	return SourceRange();
				392	assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
				393	ExpansionRangeForEnd.getBegin()) &&
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	394	"Both Expansion ranges should be in same file.");
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	395	FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
				396	SM, LangOpts);
				397	}
				398	}
				399	return FileRange;
				400	}
				401
Haojian Wu	6ae86ea	2019-07-19 08:33:39 +0000	[diff] [blame]	402	bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) {
Sam McCall	2be4569	2020-03-02 16:54:56 +0100	[diff] [blame]	403	if (!Loc.isValid())
				404	return false;
				405	FileID FID = SM.getFileID(SM.getExpansionLoc(Loc));
				406	return FID == SM.getMainFileID() \|\| FID == SM.getPreambleFileID();
Haojian Wu	6ae86ea	2019-07-19 08:33:39 +0000	[diff] [blame]	407	}
				408
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	409	llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM,
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	410	const LangOptions &LangOpts,
				411	SourceRange R) {
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	412	SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts);
				413	if (!isValidFileRange(SM, R1))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	414	return llvm::None;
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	415
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	416	SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts);
				417	if (!isValidFileRange(SM, R2))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	418	return llvm::None;
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	419
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	420	SourceRange Result =
				421	rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts);
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	422	unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
				423	// Convert from closed token range to half-open (char) range
				424	Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
				425	if (!isValidFileRange(SM, Result))
				426	return llvm::None;
				427
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	428	return Result;
				429	}
				430
				431	llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
				432	assert(isValidFileRange(SM, R));
				433	bool Invalid = false;
				434	auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
				435	assert(!Invalid);
				436
				437	size_t BeginOffset = SM.getFileOffset(R.getBegin());
				438	size_t EndOffset = SM.getFileOffset(R.getEnd());
				439	return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
				440	}
				441
Ilya Biryukov	cce67a3	2019-01-29 14:17:36 +0000	[diff] [blame]	442	llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
				443	Position P) {
				444	llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
				445	auto Offset =
				446	positionToOffset(Code, P, /AllowColumnBeyondLineLength=/false);
				447	if (!Offset)
				448	return Offset.takeError();
				449	return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
				450	}
				451
Ilya Biryukov	71028b8	2018-03-12 15:28:22 +0000	[diff] [blame]	452	Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
				453	// Clang is 1-based, LSP uses 0-based indexes.
				454	Position Begin = sourceLocToPosition(SM, R.getBegin());
				455	Position End = sourceLocToPosition(SM, R.getEnd());
				456
				457	return {Begin, End};
				458	}
				459
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	460	std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	461	size_t Offset) {
				462	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	463	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	464	int Lines = Before.count('\n');
				465	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	466	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	467	return {Lines + 1, Offset - StartOfLine + 1};
				468	}
				469
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	470	std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	471	size_t Pos = QName.rfind("::");
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	472	if (Pos == llvm::StringRef::npos)
				473	return {llvm::StringRef(), QName};
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	474	return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)};
				475	}
				476
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	477	TextEdit replacementToEdit(llvm::StringRef Code,
				478	const tooling::Replacement &R) {
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	479	Range ReplacementRange = {
				480	offsetToPosition(Code, R.getOffset()),
				481	offsetToPosition(Code, R.getOffset() + R.getLength())};
Benjamin Kramer	adcd026	2020-01-28 20:23:46 +0100	[diff] [blame]	482	return {ReplacementRange, std::string(R.getReplacementText())};
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	483	}
				484
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	485	std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	486	const tooling::Replacements &Repls) {
				487	std::vector<TextEdit> Edits;
				488	for (const auto &R : Repls)
				489	Edits.push_back(replacementToEdit(Code, R));
				490	return Edits;
				491	}
				492
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	493	llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
				494	const SourceManager &SourceMgr) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	495	if (!F)
				496	return None;
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	497
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	498	llvm::SmallString<128> FilePath = F->getName();
				499	if (!llvm::sys::path::is_absolute(FilePath)) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	500	if (auto EC =
Duncan P. N. Exon Smith	db8a742	2019-03-26 22:32:06 +0000	[diff] [blame]	501	SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	502	FilePath)) {
				503	elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
				504	EC.message());
Sam McCall	c008af6	2018-10-20 15:30:37 +0000	[diff] [blame]	505	return None;
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	506	}
				507	}
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	508
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	509	// Handle the symbolic link path case where the current working directory
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	510	// (getCurrentWorkingDirectory) is a symlink. We always want to the real
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	511	// file path (instead of the symlink path) for the C++ symbols.
				512	//
				513	// Consider the following example:
				514	//
				515	// src dir: /project/src/foo.h
				516	// current working directory (symlink): /tmp/build -> /project/src/
				517	//
				518	// The file path of Symbol is "/project/src/foo.h" instead of
				519	// "/tmp/build/foo.h"
Harlan Haskins	a02f857	2019-08-01 21:32:01 +0000	[diff] [blame]	520	if (auto Dir = SourceMgr.getFileManager().getDirectory(
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	521	llvm::sys::path::parent_path(FilePath))) {
				522	llvm::SmallString<128> RealPath;
Harlan Haskins	a02f857	2019-08-01 21:32:01 +0000	[diff] [blame]	523	llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	524	llvm::sys::path::append(RealPath, DirName,
				525	llvm::sys::path::filename(FilePath));
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	526	return RealPath.str().str();
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	527	}
				528
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	529	return FilePath.str().str();
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	530	}
				531
Kadir Cetinkaya	2f84d91	2018-08-08 08:59:29 +0000	[diff] [blame]	532	TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
				533	const LangOptions &L) {
				534	TextEdit Result;
				535	Result.range =
				536	halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
				537	Result.newText = FixIt.CodeToInsert;
				538	return Result;
				539	}
				540
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	541	FileDigest digest(llvm::StringRef Content) {
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	542	uint64_t Hash{llvm::xxHash64(Content)};
				543	FileDigest Result;
				544	for (unsigned I = 0; I < Result.size(); ++I) {
				545	Result[I] = uint8_t(Hash);
				546	Hash >>= 8;
				547	}
				548	return Result;
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	549	}
				550
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	551	llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	552	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	553	llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	554	if (Invalid)
				555	return None;
				556	return digest(Content);
				557	}
				558
Eric Liu	dd66277	2019-01-28 14:01:55 +0000	[diff] [blame]	559	format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
				560	llvm::StringRef Content,
				561	llvm::vfs::FileSystem *FS) {
				562	auto Style = format::getStyle(format::DefaultFormatStyle, File,
				563	format::DefaultFallbackStyle, Content, FS);
				564	if (!Style) {
				565	log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
				566	Style.takeError());
				567	Style = format::getLLVMStyle();
				568	}
				569	return *Style;
				570	}
				571
Haojian Wu	12e194c	2019-02-06 15:24:50 +0000	[diff] [blame]	572	llvm::Expected<tooling::Replacements>
				573	cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
				574	const format::FormatStyle &Style) {
				575	auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
				576	if (!CleanReplaces)
				577	return CleanReplaces;
				578	return formatReplacements(Code, std::move(*CleanReplaces), Style);
				579	}
				580
Haojian Wu	c5e4cf4	2019-11-07 10:53:19 +0100	[diff] [blame]	581	static void
				582	lex(llvm::StringRef Code, const LangOptions &LangOpts,
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	583	llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
Haojian Wu	c5e4cf4	2019-11-07 10:53:19 +0100	[diff] [blame]	584	Action) {
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	585	// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
				586	std::string NullTerminatedCode = Code.str();
				587	SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	588	auto &SM = FileSM.get();
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	589	for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
Haojian Wu	7ea4c6f	2019-10-30 13:21:47 +0100	[diff] [blame]	590	Action(Tok, SM);
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	591	}
				592
				593	llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
				594	const format::FormatStyle &Style) {
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	595	llvm::StringMap<unsigned> Identifiers;
Haojian Wu	7ea4c6f	2019-10-30 13:21:47 +0100	[diff] [blame]	596	auto LangOpt = format::getFormattingLangOpts(Style);
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	597	lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
				598	if (Tok.kind() == tok::identifier)
				599	++Identifiers[Tok.text(SM)];
				600	// FIXME: Should this function really return keywords too ?
				601	else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
				602	++Identifiers[Keyword];
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	603	});
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	604	return Identifiers;
				605	}
				606
Haojian Wu	7ea4c6f	2019-10-30 13:21:47 +0100	[diff] [blame]	607	std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
				608	llvm::StringRef Content,
				609	const LangOptions &LangOpts) {
				610	std::vector<Range> Ranges;
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	611	lex(Content, LangOpts,
				612	[&](const syntax::Token &Tok, const SourceManager &SM) {
				613	if (Tok.kind() != tok::identifier \|\| Tok.text(SM) != Identifier)
				614	return;
Kadir Cetinkaya	3755039	2020-03-01 16:05:12 +0100	[diff] [blame]	615	Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
Kadir Cetinkaya	98bb094	2020-02-27 15:10:54 +0100	[diff] [blame]	616	});
Haojian Wu	7ea4c6f	2019-10-30 13:21:47 +0100	[diff] [blame]	617	return Ranges;
				618	}
				619
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	620	namespace {
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	621	struct NamespaceEvent {
				622	enum {
				623	BeginNamespace, // namespace <ns> {. Payload is resolved <ns>.
				624	EndNamespace, // } // namespace <ns>. Payload is resolved outer
				625	// namespace.
				626	UsingDirective // using namespace <ns>. Payload is unresolved <ns>.
				627	} Trigger;
				628	std::string Payload;
				629	Position Pos;
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	630	};
				631	// Scans C++ source code for constructs that change the visible namespaces.
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	632	void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts,
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	633	llvm::function_ref<void(NamespaceEvent)> Callback) {
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	634
				635	// Stack of enclosing namespaces, e.g. {"clang", "clangd"}
				636	std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
				637	// Stack counts open braces. true if the brace opened a namespace.
				638	std::vector<bool> BraceStack;
				639
				640	enum {
				641	Default,
				642	Namespace, // just saw 'namespace'
				643	NamespaceName, // just saw 'namespace' NSName
				644	Using, // just saw 'using'
				645	UsingNamespace, // just saw 'using namespace'
				646	UsingNamespaceName, // just saw 'using namespace' NSName
				647	} State = Default;
				648	std::string NSName;
				649
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	650	NamespaceEvent Event;
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	651	lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) {
				652	Event.Pos = sourceLocToPosition(SM, Tok.location());
				653	switch (Tok.kind()) {
				654	case tok::kw_using:
				655	State = State == Default ? Using : Default;
				656	break;
				657	case tok::kw_namespace:
				658	switch (State) {
				659	case Using:
				660	State = UsingNamespace;
				661	break;
				662	case Default:
				663	State = Namespace;
				664	break;
				665	default:
				666	State = Default;
				667	break;
				668	}
				669	break;
				670	case tok::identifier:
				671	switch (State) {
				672	case UsingNamespace:
				673	NSName.clear();
				674	LLVM_FALLTHROUGH;
				675	case UsingNamespaceName:
				676	NSName.append(Tok.text(SM).str());
				677	State = UsingNamespaceName;
				678	break;
				679	case Namespace:
				680	NSName.clear();
				681	LLVM_FALLTHROUGH;
				682	case NamespaceName:
				683	NSName.append(Tok.text(SM).str());
				684	State = NamespaceName;
				685	break;
				686	case Using:
				687	case Default:
				688	State = Default;
				689	break;
				690	}
				691	break;
				692	case tok::coloncolon:
				693	// This can come at the beginning or in the middle of a namespace
				694	// name.
				695	switch (State) {
				696	case UsingNamespace:
				697	NSName.clear();
				698	LLVM_FALLTHROUGH;
				699	case UsingNamespaceName:
				700	NSName.append("::");
				701	State = UsingNamespaceName;
				702	break;
				703	case NamespaceName:
				704	NSName.append("::");
				705	State = NamespaceName;
				706	break;
				707	case Namespace: // Not legal here.
				708	case Using:
				709	case Default:
				710	State = Default;
				711	break;
				712	}
				713	break;
				714	case tok::l_brace:
				715	// Record which { started a namespace, so we know when } ends one.
				716	if (State == NamespaceName) {
				717	// Parsed: namespace <name> {
				718	BraceStack.push_back(true);
				719	Enclosing.push_back(NSName);
				720	Event.Trigger = NamespaceEvent::BeginNamespace;
				721	Event.Payload = llvm::join(Enclosing, "::");
				722	Callback(Event);
				723	} else {
				724	// This case includes anonymous namespaces (State = Namespace).
				725	// For our purposes, they're not namespaces and we ignore them.
				726	BraceStack.push_back(false);
				727	}
				728	State = Default;
				729	break;
				730	case tok::r_brace:
				731	// If braces are unmatched, we're going to be confused, but don't
				732	// crash.
				733	if (!BraceStack.empty()) {
				734	if (BraceStack.back()) {
				735	// Parsed: } // namespace
				736	Enclosing.pop_back();
				737	Event.Trigger = NamespaceEvent::EndNamespace;
				738	Event.Payload = llvm::join(Enclosing, "::");
				739	Callback(Event);
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	740	}
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	741	BraceStack.pop_back();
				742	}
				743	break;
				744	case tok::semi:
				745	if (State == UsingNamespaceName) {
				746	// Parsed: using namespace <name> ;
				747	Event.Trigger = NamespaceEvent::UsingDirective;
				748	Event.Payload = std::move(NSName);
				749	Callback(Event);
				750	}
				751	State = Default;
				752	break;
				753	default:
				754	State = Default;
				755	break;
				756	}
				757	});
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	758	}
				759
				760	// Returns the prefix namespaces of NS: {"" ... NS}.
				761	llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
				762	llvm::SmallVector<llvm::StringRef, 8> Results;
				763	Results.push_back(NS.take_front(0));
				764	NS.split(Results, "::", /MaxSplit=/-1, /KeepEmpty=/false);
				765	for (llvm::StringRef &R : Results)
				766	R = NS.take_front(R.end() - NS.begin());
				767	return Results;
				768	}
				769
				770	} // namespace
				771
				772	std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	773	const LangOptions &LangOpts) {
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	774	std::string Current;
				775	// Map from namespace to (resolved) namespaces introduced via using directive.
				776	llvm::StringMap<llvm::StringSet<>> UsingDirectives;
				777
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	778	parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	779	llvm::StringRef NS = Event.Payload;
				780	switch (Event.Trigger) {
				781	case NamespaceEvent::BeginNamespace:
				782	case NamespaceEvent::EndNamespace:
				783	Current = std::move(Event.Payload);
				784	break;
				785	case NamespaceEvent::UsingDirective:
				786	if (NS.consume_front("::"))
				787	UsingDirectives[Current].insert(NS);
				788	else {
				789	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
				790	if (Enclosing.empty())
				791	UsingDirectives[Current].insert(NS);
				792	else
				793	UsingDirectives[Current].insert((Enclosing + "::" + NS).str());
				794	}
				795	}
				796	break;
				797	}
				798	});
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	799
				800	std::vector<std::string> Found;
				801	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
Benjamin Kramer	adcd026	2020-01-28 20:23:46 +0100	[diff] [blame]	802	Found.push_back(std::string(Enclosing));
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	803	auto It = UsingDirectives.find(Enclosing);
				804	if (It != UsingDirectives.end())
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	805	for (const auto &Used : It->second)
Benjamin Kramer	adcd026	2020-01-28 20:23:46 +0100	[diff] [blame]	806	Found.push_back(std::string(Used.getKey()));
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	807	}
				808
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	809	llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
				810	if (Current == RHS)
				811	return false;
				812	if (Current == LHS)
				813	return true;
				814	return LHS < RHS;
				815	});
				816	Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
				817	return Found;
				818	}
				819
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	820	llvm::StringSet<> collectWords(llvm::StringRef Content) {
				821	// We assume short words are not significant.
				822	// We may want to consider other stopwords, e.g. language keywords.
				823	// (A very naive implementation showed no benefit, but lexing might do better)
				824	static constexpr int MinWordLength = 4;
				825
				826	std::vector<CharRole> Roles(Content.size());
				827	calculateRoles(Content, Roles);
				828
				829	llvm::StringSet<> Result;
				830	llvm::SmallString<256> Word;
				831	auto Flush = [&] {
				832	if (Word.size() >= MinWordLength) {
				833	for (char &C : Word)
				834	C = llvm::toLower(C);
				835	Result.insert(Word);
				836	}
				837	Word.clear();
				838	};
				839	for (unsigned I = 0; I < Content.size(); ++I) {
				840	switch (Roles[I]) {
				841	case Head:
				842	Flush();
				843	LLVM_FALLTHROUGH;
				844	case Tail:
				845	Word.push_back(Content[I]);
				846	break;
				847	case Unknown:
				848	case Separator:
				849	Flush();
				850	break;
				851	}
				852	}
				853	Flush();
				854
				855	return Result;
				856	}
				857
Sam McCall	3f1c2bf	2020-03-02 22:45:25 +0100	[diff] [blame^]	858	static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before,
				859	llvm::StringRef After) {
				860	// `foo` is an identifier.
				861	if (Before.endswith("`") && After.startswith("`"))
				862	return true;
				863	// In foo::bar, both foo and bar are identifiers.
				864	if (Before.endswith("::") \|\| After.startswith("::"))
				865	return true;
				866	// Doxygen tags like \c foo indicate identifiers.
				867	// Don't search too far back.
				868	// This duplicates clang's doxygen parser, revisit if it gets complicated.
				869	Before = Before.take_back(100); // Don't search too far back.
				870	auto Pos = Before.find_last_of("\\@");
				871	if (Pos != llvm::StringRef::npos) {
				872	llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' ');
				873	if (Tag == "p" \|\| Tag == "c" \|\| Tag == "class" \|\| Tag == "tparam" \|\|
				874	Tag == "param" \|\| Tag == "param[in]" \|\| Tag == "param[out]" \|\|
				875	Tag == "param[in,out]" \|\| Tag == "retval" \|\| Tag == "throw" \|\|
				876	Tag == "throws" \|\| Tag == "link")
				877	return true;
				878	}
				879
				880	// Word contains underscore.
				881	// This handles things like snake_case and MACRO_CASE.
				882	if (Word.contains('_')) {
				883	return true;
				884	}
				885	// Word contains capital letter other than at beginning.
				886	// This handles things like lowerCamel and UpperCamel.
				887	// The check for also containing a lowercase letter is to rule out
				888	// initialisms like "HTTP".
				889	bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
				890	bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
				891	if (HasLower && HasUpper) {
				892	return true;
				893	}
				894	// FIXME: consider mid-sentence Capitalization?
				895	return false;
				896	}
				897
				898	llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
				899	const syntax::TokenBuffer &TB,
				900	const LangOptions &LangOpts) {
				901	const auto &SM = TB.sourceManager();
				902	auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
				903	for (const auto &T : Touching) {
				904	// If the token is an identifier or a keyword, don't use any heuristics.
				905	if (tok::isAnyIdentifier(T.kind()) \|\| tok::getKeywordSpelling(T.kind())) {
				906	SpelledWord Result;
				907	Result.Location = T.location();
				908	Result.Text = T.text(SM);
				909	Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
				910	Result.PartOfSpelledToken = &T;
				911	Result.SpelledToken = &T;
				912	auto Expanded =
				913	TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
				914	if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
				915	Result.ExpandedToken = &Expanded.front();
				916	return Result;
				917	}
				918	}
				919	FileID File;
				920	unsigned Offset;
				921	std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc);
				922	bool Invalid = false;
				923	llvm::StringRef Code = SM.getBufferData(File, &Invalid);
				924	if (Invalid)
				925	return llvm::None;
				926	unsigned B = Offset, E = Offset;
				927	while (B > 0 && isIdentifierBody(Code[B - 1]))
				928	--B;
				929	while (E < Code.size() && isIdentifierBody(Code[E]))
				930	++E;
				931	if (B == E)
				932	return llvm::None;
				933
				934	SpelledWord Result;
				935	Result.Location = SM.getComposedLoc(File, B);
				936	Result.Text = Code.slice(B, E);
				937	Result.LikelyIdentifier =
				938	isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) &&
				939	// should not be a keyword
				940	tok::isAnyIdentifier(
				941	IdentifierTable(LangOpts).get(Result.Text).getTokenID());
				942	for (const auto &T : Touching)
				943	if (T.location() <= Result.Location)
				944	Result.PartOfSpelledToken = &T;
				945	return Result;
				946	}
				947
Kadir Cetinkaya	3ae2fc7	2020-02-28 09:25:40 +0100	[diff] [blame]	948	llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	949	Preprocessor &PP) {
Kadir Cetinkaya	3ae2fc7	2020-02-28 09:25:40 +0100	[diff] [blame]	950	SourceLocation Loc = SpelledTok.location();
Kadir Cetinkaya	c24c89d	2020-02-27 16:02:44 +0100	[diff] [blame]	951	assert(Loc.isFileID());
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	952	const auto &SM = PP.getSourceManager();
Kadir Cetinkaya	3ae2fc7	2020-02-28 09:25:40 +0100	[diff] [blame]	953	IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	954	if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())
				955	return None;
				956
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	957	// Get the definition just before the searched location so that a macro
Kadir Cetinkaya	c24c89d	2020-02-27 16:02:44 +0100	[diff] [blame]	958	// referenced in a '#undef MACRO' can still be found. Note that we only do
				959	// that if Loc is not pointing at start of file.
				960	if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc)
				961	Loc = Loc.getLocWithOffset(-1);
				962	MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc);
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	963	if (auto *MI = MacroDef.getMacroInfo())
				964	return DefinedMacro{IdentifierInfo->getName(), MI};
				965	return None;
				966	}
				967
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	968	llvm::Expected<std::string> Edit::apply() const {
				969	return tooling::applyAllReplacements(InitialCode, Replacements);
				970	}
				971
				972	std::vector<TextEdit> Edit::asTextEdits() const {
				973	return replacementsToEdits(InitialCode, Replacements);
				974	}
				975
				976	bool Edit::canApplyTo(llvm::StringRef Code) const {
				977	// Create line iterators, since line numbers are important while applying our
				978	// edit we cannot skip blank lines.
				979	auto LHS = llvm::MemoryBuffer::getMemBuffer(Code);
				980	llvm::line_iterator LHSIt(LHS, /SkipBlanks=*/false);
				981
				982	auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
				983	llvm::line_iterator RHSIt(RHS, /SkipBlanks=*/false);
				984
				985	// Compare the InitialCode we prepared the edit for with the Code we received
				986	// line by line to make sure there are no differences.
				987	// FIXME: This check is too conservative now, it should be enough to only
				988	// check lines around the replacements contained inside the Edit.
				989	while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
				990	if (LHSIt != RHSIt)
				991	return false;
				992	++LHSIt;
				993	++RHSIt;
				994	}
				995
				996	// After we reach EOF for any of the files we make sure the other one doesn't
				997	// contain any additional content except empty lines, they should not
				998	// interfere with the edit we produced.
				999	while (!LHSIt.is_at_eof()) {
				1000	if (!LHSIt->empty())
				1001	return false;
				1002	++LHSIt;
				1003	}
				1004	while (!RHSIt.is_at_eof()) {
				1005	if (!RHSIt->empty())
				1006	return false;
				1007	++RHSIt;
				1008	}
				1009	return true;
				1010	}
				1011
				1012	llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) {
				1013	if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style))
				1014	E.Replacements = std::move(*NewEdits);
				1015	else
				1016	return NewEdits.takeError();
				1017	return llvm::Error::success();
				1018	}
				1019
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	1020	EligibleRegion getEligiblePoints(llvm::StringRef Code,
				1021	llvm::StringRef FullyQualifiedName,
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	1022	const LangOptions &LangOpts) {
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	1023	EligibleRegion ER;
				1024	// Start with global namespace.
				1025	std::vector<std::string> Enclosing = {""};
				1026	// FIXME: In addition to namespaces try to generate events for function
				1027	// definitions as well. One might use a closing parantheses(")" followed by an
				1028	// opening brace "{" to trigger the start.
Nathan Ridge	445195b	2020-03-05 19:03:26 -0500	[diff] [blame]	1029	parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
Kadir Cetinkaya	d62e3ed	2019-09-25 11:35:38 +0200	[diff] [blame]	1030	// Using Directives only introduces declarations to current scope, they do
				1031	// not change the current namespace, so skip them.
				1032	if (Event.Trigger == NamespaceEvent::UsingDirective)
				1033	return;
				1034	// Do not qualify the global namespace.
				1035	if (!Event.Payload.empty())
				1036	Event.Payload.append("::");
				1037
				1038	std::string CurrentNamespace;
				1039	if (Event.Trigger == NamespaceEvent::BeginNamespace) {
				1040	Enclosing.emplace_back(std::move(Event.Payload));
				1041	CurrentNamespace = Enclosing.back();
				1042	// parseNameSpaceEvents reports the beginning position of a token; we want
				1043	// to insert after '{', so increment by one.
				1044	++Event.Pos.character;
				1045	} else {
				1046	// Event.Payload points to outer namespace when exiting a scope, so use
				1047	// the namespace we've last entered instead.
				1048	CurrentNamespace = std::move(Enclosing.back());
				1049	Enclosing.pop_back();
				1050	assert(Enclosing.back() == Event.Payload);
				1051	}
				1052
				1053	// Ignore namespaces that are not a prefix of the target.
				1054	if (!FullyQualifiedName.startswith(CurrentNamespace))
				1055	return;
				1056
				1057	// Prefer the namespace that shares the longest prefix with target.
				1058	if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) {
				1059	ER.EligiblePoints.clear();
				1060	ER.EnclosingNamespace = CurrentNamespace;
				1061	}
				1062	if (CurrentNamespace.size() == ER.EnclosingNamespace.size())
				1063	ER.EligiblePoints.emplace_back(std::move(Event.Pos));
				1064	});
				1065	// If there were no shared namespaces just return EOF.
				1066	if (ER.EligiblePoints.empty()) {
				1067	assert(ER.EnclosingNamespace.empty());
				1068	ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size()));
				1069	}
				1070	return ER;
				1071	}
				1072
Haojian Wu	509efe5	2019-11-13 16:30:07 +0100	[diff] [blame]	1073	bool isHeaderFile(llvm::StringRef FileName,
				1074	llvm::Optional<LangOptions> LangOpts) {
				1075	// Respect the langOpts, for non-file-extension cases, e.g. standard library
				1076	// files.
				1077	if (LangOpts && LangOpts->IsHeaderFile)
				1078	return true;
				1079	namespace types = clang::driver::types;
				1080	auto Lang = types::lookupTypeForExtension(
				1081	llvm::sys::path::extension(FileName).substr(1));
				1082	return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang);
				1083	}
				1084
Haojian Wu	f8865c0	2020-02-05 12:03:29 +0100	[diff] [blame]	1085	bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
				1086	auto FileName = SM.getFilename(Loc);
				1087	if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
				1088	return false;
				1089	auto FID = SM.getFileID(Loc);
				1090	// All proto generated headers should start with this line.
				1091	static const char *PROTO_HEADER_COMMENT =
				1092	"// Generated by the protocol buffer compiler. DO NOT EDIT!";
				1093	// Double check that this is an actual protobuf header.
				1094	return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT);
				1095	}
				1096
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	1097	} // namespace clangd
				1098	} // namespace clang