Blame - clang-tools-extra/clangd/SourceCode.cpp - toolchain/llvm-project

blob: 05ca7aaaa1e86981bf2f83f2e98e4d39a5f0faac [file] [log] [blame]

Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	1	//===--- SourceCode.h - Manipulating source code as strings ------ C++ --===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	#include "SourceCode.h"
				9
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	10	#include "Context.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	11	#include "FuzzyMatch.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	12	#include "Logger.h"
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	13	#include "Protocol.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	14	#include "refactor/Tweak.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	15	#include "clang/AST/ASTContext.h"
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	16	#include "clang/Basic/LangOptions.h"
				17	#include "clang/Basic/SourceLocation.h"
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	18	#include "clang/Basic/SourceManager.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	19	#include "clang/Basic/TokenKinds.h"
				20	#include "clang/Format/Format.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	21	#include "clang/Lex/Lexer.h"
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	22	#include "clang/Lex/Preprocessor.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	23	#include "clang/Tooling/Core/Replacement.h"
				24	#include "llvm/ADT/ArrayRef.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	25	#include "llvm/ADT/None.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	26	#include "llvm/ADT/StringExtras.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	27	#include "llvm/ADT/StringMap.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	28	#include "llvm/ADT/StringRef.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	29	#include "llvm/Support/Compiler.h"
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	30	#include "llvm/Support/Errc.h"
				31	#include "llvm/Support/Error.h"
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	32	#include "llvm/Support/ErrorHandling.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	33	#include "llvm/Support/LineIterator.h"
				34	#include "llvm/Support/MemoryBuffer.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	35	#include "llvm/Support/Path.h"
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	36	#include "llvm/Support/SHA1.h"
				37	#include "llvm/Support/VirtualFileSystem.h"
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	38	#include "llvm/Support/xxhash.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	39	#include <algorithm>
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	40
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	41	namespace clang {
				42	namespace clangd {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	43
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	44	// Here be dragons. LSP positions use columns measured in UTF-16 code units!
				45	// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
				46
				47	// Iterates over unicode codepoints in the (UTF-8) string. For each,
				48	// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
				49	// Returns true if CB returned true, false if we hit the end of string.
				50	template <typename Callback>
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	51	static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	52	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				53	// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	54	for (size_t I = 0; I < U8.size();) {
				55	unsigned char C = static_cast<unsigned char>(U8[I]);
				56	if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
				57	if (CB(1, 1))
				58	return true;
				59	++I;
				60	continue;
				61	}
				62	// This convenient property of UTF-8 holds for all non-ASCII characters.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	63	size_t UTF8Length = llvm::countLeadingOnes(C);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	64	// 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
				65	// 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
				66	assert((UTF8Length >= 2 && UTF8Length <= 4) &&
				67	"Invalid UTF-8, or transcoding bug?");
				68	I += UTF8Length; // Skip over all trailing bytes.
				69	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				70	// Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
				71	if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
				72	return true;
				73	}
				74	return false;
				75	}
				76
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	77	// Returns the byte offset into the string that is an offset of \p Units in
				78	// the specified encoding.
				79	// Conceptually, this converts to the encoding, truncates to CodeUnits,
				80	// converts back to UTF-8, and returns the length in bytes.
				81	static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
				82	bool &Valid) {
				83	Valid = Units >= 0;
				84	if (Units <= 0)
				85	return 0;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	86	size_t Result = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	87	switch (Enc) {
				88	case OffsetEncoding::UTF8:
				89	Result = Units;
				90	break;
				91	case OffsetEncoding::UTF16:
				92	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				93	Result += U8Len;
				94	Units -= U16Len;
				95	return Units <= 0;
				96	});
				97	if (Units < 0) // Offset in the middle of a surrogate pair.
				98	Valid = false;
				99	break;
				100	case OffsetEncoding::UTF32:
				101	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				102	Result += U8Len;
				103	Units--;
				104	return Units <= 0;
				105	});
				106	break;
				107	case OffsetEncoding::UnsupportedEncoding:
				108	llvm_unreachable("unsupported encoding");
				109	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	110	// Don't return an out-of-range index if we overran.
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	111	if (Result > U8.size()) {
				112	Valid = false;
				113	return U8.size();
				114	}
				115	return Result;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	116	}
				117
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	118	Key<OffsetEncoding> kCurrentOffsetEncoding;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	119	static OffsetEncoding lspEncoding() {
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	120	auto *Enc = Context::current().get(kCurrentOffsetEncoding);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	121	return Enc ? *Enc : OffsetEncoding::UTF16;
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	122	}
				123
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	124	// Like most strings in clangd, the input is UTF-8 encoded.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	125	size_t lspLength(llvm::StringRef Code) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	126	size_t Count = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	127	switch (lspEncoding()) {
				128	case OffsetEncoding::UTF8:
				129	Count = Code.size();
				130	break;
				131	case OffsetEncoding::UTF16:
				132	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				133	Count += U16Len;
				134	return false;
				135	});
				136	break;
				137	case OffsetEncoding::UTF32:
				138	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				139	++Count;
				140	return false;
				141	});
				142	break;
				143	case OffsetEncoding::UnsupportedEncoding:
				144	llvm_unreachable("unsupported encoding");
				145	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	146	return Count;
				147	}
				148
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	149	llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
				150	bool AllowColumnsBeyondLineLength) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	151	if (P.line < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	152	return llvm::make_error<llvm::StringError>(
				153	llvm::formatv("Line value can't be negative ({0})", P.line),
				154	llvm::errc::invalid_argument);
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	155	if (P.character < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	156	return llvm::make_error<llvm::StringError>(
				157	llvm::formatv("Character value can't be negative ({0})", P.character),
				158	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	159	size_t StartOfLine = 0;
				160	for (int I = 0; I != P.line; ++I) {
				161	size_t NextNL = Code.find('\n', StartOfLine);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	162	if (NextNL == llvm::StringRef::npos)
				163	return llvm::make_error<llvm::StringError>(
				164	llvm::formatv("Line value is out of range ({0})", P.line),
				165	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	166	StartOfLine = NextNL + 1;
				167	}
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	168	StringRef Line =
				169	Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	170
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	171	// P.character may be in UTF-16, transcode if necessary.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	172	bool Valid;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	173	size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	174	if (!Valid && !AllowColumnsBeyondLineLength)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	175	return llvm::make_error<llvm::StringError>(
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	176	llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
				177	P.character, P.line),
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	178	llvm::errc::invalid_argument);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	179	return StartOfLine + ByteInLine;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	180	}
				181
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	182	Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	183	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	184	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	185	int Lines = Before.count('\n');
				186	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	187	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	188	Position Pos;
				189	Pos.line = Lines;
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	190	Pos.character = lspLength(Before.substr(StartOfLine));
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	191	return Pos;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	192	}
				193
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	194	Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	195	// We use the SourceManager's line tables, but its column number is in bytes.
				196	FileID FID;
				197	unsigned Offset;
				198	std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	199	Position P;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	200	P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1;
				201	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	202	llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	203	if (!Invalid) {
				204	auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
				205	auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	206	P.character = lspLength(LineSoFar);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	207	}
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	208	return P;
				209	}
				210
Sam McCall	9573807	2019-08-06 20:25:59 +0000	[diff] [blame]	211	bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) {
				212	if (Loc.isMacroID()) {
				213	std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM);
				214	if (llvm::StringRef(PrintLoc).startswith("<scratch") \|\|
				215	llvm::StringRef(PrintLoc).startswith("<command line>"))
				216	return false;
				217	}
				218	return true;
				219	}
				220
				221	SourceLocation spellingLocIfSpelled(SourceLocation Loc,
				222	const SourceManager &SM) {
				223	if (!isSpelledInSource(Loc, SM))
				224	// Use the expansion location as spelling location is not interesting.
				225	return SM.getExpansionRange(Loc).getBegin();
				226	return SM.getSpellingLoc(Loc);
				227	}
				228
Haojian Wu	92c3257	2019-06-25 08:01:46 +0000	[diff] [blame]	229	llvm::Optional<Range> getTokenRange(const SourceManager &SM,
				230	const LangOptions &LangOpts,
				231	SourceLocation TokLoc) {
				232	if (!TokLoc.isValid())
				233	return llvm::None;
				234	SourceLocation End = Lexer::getLocForEndOfToken(TokLoc, 0, SM, LangOpts);
				235	if (!End.isValid())
				236	return llvm::None;
				237	return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
				238	}
				239
Haojian Wu	9f2bf66	2019-10-01 11:03:56 +0000	[diff] [blame]	240	namespace {
				241
				242	enum TokenFlavor { Identifier, Operator, Whitespace, Other };
				243
				244	bool isOverloadedOperator(const Token &Tok) {
				245	switch (Tok.getKind()) {
				246	#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly) \
				247	case tok::Token:
				248	#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly)
				249	#include "clang/Basic/OperatorKinds.def"
				250	return true;
				251
				252	default:
				253	break;
				254	}
				255	return false;
				256	}
				257
				258	TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM,
				259	const LangOptions &LangOpts) {
				260	Token Tok;
				261	Tok.setKind(tok::NUM_TOKENS);
				262	if (Lexer::getRawToken(Loc, Tok, SM, LangOpts,
				263	/IgnoreWhiteSpace/ false))
				264	return Other;
				265
				266	// getRawToken will return false without setting Tok when the token is
				267	// whitespace, so if the flag is not set, we are sure this is a whitespace.
				268	if (Tok.is(tok::TokenKind::NUM_TOKENS))
				269	return Whitespace;
				270	if (Tok.is(tok::TokenKind::raw_identifier))
				271	return Identifier;
				272	if (isOverloadedOperator(Tok))
				273	return Operator;
				274	return Other;
				275	}
				276
				277	} // namespace
				278
Sam McCall	19cefc2	2019-09-03 15:34:47 +0000	[diff] [blame]	279	SourceLocation getBeginningOfIdentifier(const Position &Pos,
				280	const SourceManager &SM,
				281	const LangOptions &LangOpts) {
				282	FileID FID = SM.getMainFileID();
				283	auto Offset = positionToOffset(SM.getBufferData(FID), Pos);
				284	if (!Offset) {
				285	log("getBeginningOfIdentifier: {0}", Offset.takeError());
				286	return SourceLocation();
				287	}
				288
Haojian Wu	9f2bf66	2019-10-01 11:03:56 +0000	[diff] [blame]	289	// GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
				290	// thing if the cursor is at the end of the token (identifier or operator).
				291	// The cases are:
				292	// 1) at the beginning of the token
				293	// 2) at the middle of the token
				294	// 3) at the end of the token
				295	// 4) anywhere outside the identifier or operator
				296	// To distinguish all cases, we lex both at the
				297	// GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
				298	// cases 1 and 4, we just return the original location.
Sam McCall	19cefc2	2019-09-03 15:34:47 +0000	[diff] [blame]	299	SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset);
Haojian Wu	9f2bf66	2019-10-01 11:03:56 +0000	[diff] [blame]	300	if (*Offset == 0) // Case 1 or 4.
Sam McCall	b2a984c0	2019-09-04 10:15:27 +0000	[diff] [blame]	301	return InputLoc;
Sam McCall	19cefc2	2019-09-03 15:34:47 +0000	[diff] [blame]	302	SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1);
Haojian Wu	9f2bf66	2019-10-01 11:03:56 +0000	[diff] [blame]	303	SourceLocation BeforeTokBeginning =
				304	Lexer::GetBeginningOfToken(Before, SM, LangOpts);
				305	TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts);
Sam McCall	19cefc2	2019-09-03 15:34:47 +0000	[diff] [blame]	306
Haojian Wu	9f2bf66	2019-10-01 11:03:56 +0000	[diff] [blame]	307	SourceLocation CurrentTokBeginning =
				308	Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts);
				309	TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts);
				310
				311	// At the middle of the token.
				312	if (BeforeTokBeginning == CurrentTokBeginning) {
				313	// For interesting token, we return the beginning of the token.
				314	if (CurrentKind == Identifier \|\| CurrentKind == Operator)
				315	return CurrentTokBeginning;
				316	// otherwise, we return the original loc.
				317	return InputLoc;
				318	}
				319
				320	// Whitespace is not interesting.
				321	if (BeforeKind == Whitespace)
				322	return CurrentTokBeginning;
				323	if (CurrentKind == Whitespace)
				324	return BeforeTokBeginning;
				325
				326	// The cursor is at the token boundary, e.g. "Before^Current", we prefer
				327	// identifiers to other tokens.
				328	if (CurrentKind == Identifier)
				329	return CurrentTokBeginning;
				330	if (BeforeKind == Identifier)
				331	return BeforeTokBeginning;
				332	// Then prefer overloaded operators to other tokens.
				333	if (CurrentKind == Operator)
				334	return CurrentTokBeginning;
				335	if (BeforeKind == Operator)
				336	return BeforeTokBeginning;
				337
				338	// Non-interesting case, we just return the original location.
				339	return InputLoc;
Sam McCall	19cefc2	2019-09-03 15:34:47 +0000	[diff] [blame]	340	}
				341
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	342	bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
				343	if (!R.getBegin().isValid() \|\| !R.getEnd().isValid())
				344	return false;
				345
				346	FileID BeginFID;
				347	size_t BeginOffset = 0;
				348	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
				349
				350	FileID EndFID;
				351	size_t EndOffset = 0;
				352	std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
				353
				354	return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
				355	}
				356
				357	bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
				358	SourceLocation L) {
				359	assert(isValidFileRange(Mgr, R));
				360
				361	FileID BeginFID;
				362	size_t BeginOffset = 0;
				363	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
				364	size_t EndOffset = Mgr.getFileOffset(R.getEnd());
				365
				366	FileID LFid;
				367	size_t LOffset;
				368	std::tie(LFid, LOffset) = Mgr.getDecomposedLoc(L);
				369	return BeginFID == LFid && BeginOffset <= LOffset && LOffset < EndOffset;
				370	}
				371
				372	bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
				373	SourceLocation L) {
				374	return L == R.getEnd() \|\| halfOpenRangeContains(Mgr, R, L);
				375	}
				376
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	377	SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
				378	assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
				379	FileID IncludingFile;
				380	unsigned Offset;
				381	std::tie(IncludingFile, Offset) =
				382	SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
				383	bool Invalid = false;
				384	llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
				385	if (Invalid)
				386	return SourceLocation();
				387	// Now buf is "...\n#include <foo>\n..."
				388	// and Offset points here: ^
				389	// Rewind to the preceding # on the line.
				390	assert(Offset < Buf.size());
				391	for (;; --Offset) {
				392	if (Buf[Offset] == '#')
				393	return SM.getComposedLoc(IncludingFile, Offset);
				394	if (Buf[Offset] == '\n' \|\| Offset == 0) // no hash, what's going on?
				395	return SourceLocation();
				396	}
				397	}
				398
				399
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	400	static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
				401	const LangOptions &LangOpts) {
				402	Token TheTok;
				403	if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts))
				404	return 0;
				405	// FIXME: Here we check whether the token at the location is a greatergreater
				406	// (>>) token and consider it as a single greater (>). This is to get it
				407	// working for templates but it isn't correct for the right shift operator. We
				408	// can avoid this by using half open char ranges in getFileRange() but getting
				409	// token ending is not well supported in macroIDs.
				410	if (TheTok.is(tok::greatergreater))
				411	return 1;
				412	return TheTok.getLength();
				413	}
				414
				415	// Returns location of the last character of the token at a given loc
				416	static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc,
				417	const SourceManager &SM,
				418	const LangOptions &LangOpts) {
				419	unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts);
				420	return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
				421	}
				422
				423	// Returns location of the starting of the token at a given EndLoc
				424	static SourceLocation getLocForTokenBegin(SourceLocation EndLoc,
				425	const SourceManager &SM,
				426	const LangOptions &LangOpts) {
				427	return EndLoc.getLocWithOffset(
				428	-(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts));
				429	}
				430
				431	// Converts a char source range to a token range.
				432	static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM,
				433	const LangOptions &LangOpts) {
				434	if (!Range.isTokenRange())
				435	Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts));
				436	return Range.getAsRange();
				437	}
				438	// Returns the union of two token ranges.
				439	// To find the maximum of the Ends of the ranges, we compare the location of the
				440	// last character of the token.
				441	static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
				442	const SourceManager &SM,
				443	const LangOptions &LangOpts) {
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	444	SourceLocation Begin =
				445	SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
				446	? R1.getBegin()
				447	: R2.getBegin();
				448	SourceLocation End =
				449	SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts),
				450	getLocForTokenEnd(R2.getEnd(), SM, LangOpts))
				451	? R2.getEnd()
				452	: R1.getEnd();
				453	return SourceRange(Begin, End);
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	454	}
				455
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	456	// Given a range whose endpoints may be in different expansions or files,
				457	// tries to find a range within a common file by following up the expansion and
				458	// include location in each.
				459	static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM,
				460	const LangOptions &LangOpts) {
				461	// Fast path for most common cases.
				462	if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
				463	return R;
				464	// Record the stack of expansion locations for the beginning, keyed by FileID.
				465	llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
				466	for (SourceLocation Begin = R.getBegin(); Begin.isValid();
				467	Begin = Begin.isFileID()
				468	? includeHashLoc(SM.getFileID(Begin), SM)
				469	: SM.getImmediateExpansionRange(Begin).getBegin()) {
				470	BeginExpansions[SM.getFileID(Begin)] = Begin;
				471	}
				472	// Move up the stack of expansion locations for the end until we find the
				473	// location in BeginExpansions with that has the same file id.
				474	for (SourceLocation End = R.getEnd(); End.isValid();
				475	End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM)
				476	: toTokenRange(SM.getImmediateExpansionRange(End),
				477	SM, LangOpts)
				478	.getEnd()) {
				479	auto It = BeginExpansions.find(SM.getFileID(End));
				480	if (It != BeginExpansions.end()) {
				481	if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
				482	return SourceLocation();
				483	return {It->second, End};
				484	}
				485	}
				486	return SourceRange();
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	487	}
				488
				489	// Find an expansion range (not necessarily immediate) the ends of which are in
				490	// the same file id.
				491	static SourceRange
				492	getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM,
				493	const LangOptions &LangOpts) {
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	494	return rangeInCommonFile(
				495	toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM,
				496	LangOpts);
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	497	}
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	498
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	499	// Returns the file range for a given Location as a Token Range
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	500	// This is quite similar to getFileLoc in SourceManager as both use
				501	// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
				502	// However:
				503	// - We want to maintain the full range information as we move from one file to
				504	// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange.
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	505	// - We want to split '>>' tokens as the lexer parses the '>>' in nested
				506	// template instantiations as a '>>' instead of two '>'s.
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	507	// There is also getExpansionRange but it simply calls
				508	// getImmediateExpansionRange on the begin and ends separately which is wrong.
				509	static SourceRange getTokenFileRange(SourceLocation Loc,
				510	const SourceManager &SM,
				511	const LangOptions &LangOpts) {
				512	SourceRange FileRange = Loc;
				513	while (!FileRange.getBegin().isFileID()) {
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	514	if (SM.isMacroArgExpansion(FileRange.getBegin())) {
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	515	FileRange = unionTokenRange(
				516	SM.getImmediateSpellingLoc(FileRange.getBegin()),
				517	SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	518	assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	519	} else {
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	520	SourceRange ExpansionRangeForBegin =
				521	getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts);
				522	SourceRange ExpansionRangeForEnd =
				523	getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts);
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	524	if (ExpansionRangeForBegin.isInvalid() \|\|
				525	ExpansionRangeForEnd.isInvalid())
				526	return SourceRange();
				527	assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
				528	ExpansionRangeForEnd.getBegin()) &&
Shaurya Gupta	8fbb6ce	2019-08-06 17:01:12 +0000	[diff] [blame]	529	"Both Expansion ranges should be in same file.");
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	530	FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
				531	SM, LangOpts);
				532	}
				533	}
				534	return FileRange;
				535	}
				536
Haojian Wu	6ae86ea	2019-07-19 08:33:39 +0000	[diff] [blame]	537	bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) {
				538	return Loc.isValid() && SM.isWrittenInMainFile(SM.getExpansionLoc(Loc));
				539	}
				540
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	541	llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM,
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	542	const LangOptions &LangOpts,
				543	SourceRange R) {
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	544	SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts);
				545	if (!isValidFileRange(SM, R1))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	546	return llvm::None;
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	547
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	548	SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts);
				549	if (!isValidFileRange(SM, R2))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	550	return llvm::None;
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	551
Sam McCall	c791d85	2019-08-27 08:44:06 +0000	[diff] [blame]	552	SourceRange Result =
				553	rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts);
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	554	unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
				555	// Convert from closed token range to half-open (char) range
				556	Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
				557	if (!isValidFileRange(SM, Result))
				558	return llvm::None;
				559
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	560	return Result;
				561	}
				562
				563	llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
				564	assert(isValidFileRange(SM, R));
				565	bool Invalid = false;
				566	auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
				567	assert(!Invalid);
				568
				569	size_t BeginOffset = SM.getFileOffset(R.getBegin());
				570	size_t EndOffset = SM.getFileOffset(R.getEnd());
				571	return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
				572	}
				573
Ilya Biryukov	cce67a3	2019-01-29 14:17:36 +0000	[diff] [blame]	574	llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
				575	Position P) {
				576	llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
				577	auto Offset =
				578	positionToOffset(Code, P, /AllowColumnBeyondLineLength=/false);
				579	if (!Offset)
				580	return Offset.takeError();
				581	return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
				582	}
				583
Ilya Biryukov	71028b8	2018-03-12 15:28:22 +0000	[diff] [blame]	584	Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
				585	// Clang is 1-based, LSP uses 0-based indexes.
				586	Position Begin = sourceLocToPosition(SM, R.getBegin());
				587	Position End = sourceLocToPosition(SM, R.getEnd());
				588
				589	return {Begin, End};
				590	}
				591
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	592	std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	593	size_t Offset) {
				594	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	595	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	596	int Lines = Before.count('\n');
				597	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	598	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	599	return {Lines + 1, Offset - StartOfLine + 1};
				600	}
				601
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	602	std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	603	size_t Pos = QName.rfind("::");
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	604	if (Pos == llvm::StringRef::npos)
				605	return {llvm::StringRef(), QName};
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	606	return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)};
				607	}
				608
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	609	TextEdit replacementToEdit(llvm::StringRef Code,
				610	const tooling::Replacement &R) {
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	611	Range ReplacementRange = {
				612	offsetToPosition(Code, R.getOffset()),
				613	offsetToPosition(Code, R.getOffset() + R.getLength())};
				614	return {ReplacementRange, R.getReplacementText()};
				615	}
				616
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	617	std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	618	const tooling::Replacements &Repls) {
				619	std::vector<TextEdit> Edits;
				620	for (const auto &R : Repls)
				621	Edits.push_back(replacementToEdit(Code, R));
				622	return Edits;
				623	}
				624
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	625	llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
				626	const SourceManager &SourceMgr) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	627	if (!F)
				628	return None;
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	629
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	630	llvm::SmallString<128> FilePath = F->getName();
				631	if (!llvm::sys::path::is_absolute(FilePath)) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	632	if (auto EC =
Duncan P. N. Exon Smith	db8a742	2019-03-26 22:32:06 +0000	[diff] [blame]	633	SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	634	FilePath)) {
				635	elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
				636	EC.message());
Sam McCall	c008af6	2018-10-20 15:30:37 +0000	[diff] [blame]	637	return None;
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	638	}
				639	}
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	640
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	641	// Handle the symbolic link path case where the current working directory
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	642	// (getCurrentWorkingDirectory) is a symlink. We always want to the real
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	643	// file path (instead of the symlink path) for the C++ symbols.
				644	//
				645	// Consider the following example:
				646	//
				647	// src dir: /project/src/foo.h
				648	// current working directory (symlink): /tmp/build -> /project/src/
				649	//
				650	// The file path of Symbol is "/project/src/foo.h" instead of
				651	// "/tmp/build/foo.h"
Harlan Haskins	a02f857	2019-08-01 21:32:01 +0000	[diff] [blame]	652	if (auto Dir = SourceMgr.getFileManager().getDirectory(
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	653	llvm::sys::path::parent_path(FilePath))) {
				654	llvm::SmallString<128> RealPath;
Harlan Haskins	a02f857	2019-08-01 21:32:01 +0000	[diff] [blame]	655	llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	656	llvm::sys::path::append(RealPath, DirName,
				657	llvm::sys::path::filename(FilePath));
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	658	return RealPath.str().str();
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	659	}
				660
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	661	return FilePath.str().str();
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	662	}
				663
Kadir Cetinkaya	2f84d91	2018-08-08 08:59:29 +0000	[diff] [blame]	664	TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
				665	const LangOptions &L) {
				666	TextEdit Result;
				667	Result.range =
				668	halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
				669	Result.newText = FixIt.CodeToInsert;
				670	return Result;
				671	}
				672
Haojian Wu	aa3ed5a	2019-01-25 15:14:03 +0000	[diff] [blame]	673	bool isRangeConsecutive(const Range &Left, const Range &Right) {
Kadir Cetinkaya	a9c9d00	2018-08-13 08:23:01 +0000	[diff] [blame]	674	return Left.end.line == Right.start.line &&
				675	Left.end.character == Right.start.character;
				676	}
				677
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	678	FileDigest digest(llvm::StringRef Content) {
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	679	uint64_t Hash{llvm::xxHash64(Content)};
				680	FileDigest Result;
				681	for (unsigned I = 0; I < Result.size(); ++I) {
				682	Result[I] = uint8_t(Hash);
				683	Hash >>= 8;
				684	}
				685	return Result;
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	686	}
				687
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	688	llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	689	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	690	llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	691	if (Invalid)
				692	return None;
				693	return digest(Content);
				694	}
				695
Eric Liu	dd66277	2019-01-28 14:01:55 +0000	[diff] [blame]	696	format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
				697	llvm::StringRef Content,
				698	llvm::vfs::FileSystem *FS) {
				699	auto Style = format::getStyle(format::DefaultFormatStyle, File,
				700	format::DefaultFallbackStyle, Content, FS);
				701	if (!Style) {
				702	log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
				703	Style.takeError());
				704	Style = format::getLLVMStyle();
				705	}
				706	return *Style;
				707	}
				708
Haojian Wu	12e194c	2019-02-06 15:24:50 +0000	[diff] [blame]	709	llvm::Expected<tooling::Replacements>
				710	cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
				711	const format::FormatStyle &Style) {
				712	auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
				713	if (!CleanReplaces)
				714	return CleanReplaces;
				715	return formatReplacements(Code, std::move(*CleanReplaces), Style);
				716	}
				717
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	718	template <typename Action>
				719	static void lex(llvm::StringRef Code, const format::FormatStyle &Style,
				720	Action A) {
				721	// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
				722	std::string NullTerminatedCode = Code.str();
				723	SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	724	auto &SM = FileSM.get();
				725	auto FID = SM.getMainFileID();
				726	Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
				727	Token Tok;
				728
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	729	while (!Lex.LexFromRawLexer(Tok))
				730	A(Tok);
Kadir Cetinkaya	194117f	2019-09-25 14:12:05 +0000	[diff] [blame]	731	// LexFromRawLexer returns true after it lexes last token, so we still have
				732	// one more token to report.
				733	A(Tok);
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	734	}
				735
				736	llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
				737	const format::FormatStyle &Style) {
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	738	llvm::StringMap<unsigned> Identifiers;
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	739	lex(Content, Style, [&](const clang::Token &Tok) {
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	740	switch (Tok.getKind()) {
				741	case tok::identifier:
				742	++Identifiers[Tok.getIdentifierInfo()->getName()];
				743	break;
				744	case tok::raw_identifier:
				745	++Identifiers[Tok.getRawIdentifier()];
				746	break;
				747	default:
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	748	break;
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	749	}
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	750	});
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	751	return Identifiers;
				752	}
				753
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	754	namespace {
				755	enum NamespaceEvent {
				756	BeginNamespace, // namespace <ns> {. Payload is resolved <ns>.
				757	EndNamespace, // } // namespace <ns>. Payload is resolved outer namespace.
				758	UsingDirective // using namespace <ns>. Payload is unresolved <ns>.
				759	};
				760	// Scans C++ source code for constructs that change the visible namespaces.
				761	void parseNamespaceEvents(
				762	llvm::StringRef Code, const format::FormatStyle &Style,
				763	llvm::function_ref<void(NamespaceEvent, llvm::StringRef)> Callback) {
				764
				765	// Stack of enclosing namespaces, e.g. {"clang", "clangd"}
				766	std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
				767	// Stack counts open braces. true if the brace opened a namespace.
				768	std::vector<bool> BraceStack;
				769
				770	enum {
				771	Default,
				772	Namespace, // just saw 'namespace'
				773	NamespaceName, // just saw 'namespace' NSName
				774	Using, // just saw 'using'
				775	UsingNamespace, // just saw 'using namespace'
				776	UsingNamespaceName, // just saw 'using namespace' NSName
				777	} State = Default;
				778	std::string NSName;
				779
				780	lex(Code, Style, [&](const clang::Token &Tok) {
				781	switch(Tok.getKind()) {
				782	case tok::raw_identifier:
				783	// In raw mode, this could be a keyword or a name.
				784	switch (State) {
				785	case UsingNamespace:
				786	case UsingNamespaceName:
				787	NSName.append(Tok.getRawIdentifier());
				788	State = UsingNamespaceName;
				789	break;
				790	case Namespace:
				791	case NamespaceName:
				792	NSName.append(Tok.getRawIdentifier());
				793	State = NamespaceName;
				794	break;
				795	case Using:
				796	State =
				797	(Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
				798	break;
				799	case Default:
				800	NSName.clear();
				801	if (Tok.getRawIdentifier() == "namespace")
				802	State = Namespace;
				803	else if (Tok.getRawIdentifier() == "using")
				804	State = Using;
				805	break;
				806	}
				807	break;
				808	case tok::coloncolon:
				809	// This can come at the beginning or in the middle of a namespace name.
				810	switch (State) {
				811	case UsingNamespace:
				812	case UsingNamespaceName:
				813	NSName.append("::");
				814	State = UsingNamespaceName;
				815	break;
				816	case NamespaceName:
				817	NSName.append("::");
				818	State = NamespaceName;
				819	break;
				820	case Namespace: // Not legal here.
				821	case Using:
				822	case Default:
				823	State = Default;
				824	break;
				825	}
				826	break;
				827	case tok::l_brace:
				828	// Record which { started a namespace, so we know when } ends one.
				829	if (State == NamespaceName) {
				830	// Parsed: namespace <name> {
				831	BraceStack.push_back(true);
				832	Enclosing.push_back(NSName);
				833	Callback(BeginNamespace, llvm::join(Enclosing, "::"));
				834	} else {
				835	// This case includes anonymous namespaces (State = Namespace).
				836	// For our purposes, they're not namespaces and we ignore them.
				837	BraceStack.push_back(false);
				838	}
				839	State = Default;
				840	break;
				841	case tok::r_brace:
				842	// If braces are unmatched, we're going to be confused, but don't crash.
				843	if (!BraceStack.empty()) {
				844	if (BraceStack.back()) {
				845	// Parsed: } // namespace
				846	Enclosing.pop_back();
				847	Callback(EndNamespace, llvm::join(Enclosing, "::"));
				848	}
				849	BraceStack.pop_back();
				850	}
				851	break;
				852	case tok::semi:
				853	if (State == UsingNamespaceName)
				854	// Parsed: using namespace <name> ;
				855	Callback(UsingDirective, llvm::StringRef(NSName));
				856	State = Default;
				857	break;
				858	default:
				859	State = Default;
				860	break;
				861	}
				862	});
				863	}
				864
				865	// Returns the prefix namespaces of NS: {"" ... NS}.
				866	llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
				867	llvm::SmallVector<llvm::StringRef, 8> Results;
				868	Results.push_back(NS.take_front(0));
				869	NS.split(Results, "::", /MaxSplit=/-1, /KeepEmpty=/false);
				870	for (llvm::StringRef &R : Results)
				871	R = NS.take_front(R.end() - NS.begin());
				872	return Results;
				873	}
				874
				875	} // namespace
				876
				877	std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
				878	const format::FormatStyle &Style) {
				879	std::string Current;
				880	// Map from namespace to (resolved) namespaces introduced via using directive.
				881	llvm::StringMap<llvm::StringSet<>> UsingDirectives;
				882
				883	parseNamespaceEvents(Code, Style,
				884	[&](NamespaceEvent Event, llvm::StringRef NS) {
				885	switch (Event) {
				886	case BeginNamespace:
				887	case EndNamespace:
				888	Current = NS;
				889	break;
				890	case UsingDirective:
				891	if (NS.consume_front("::"))
				892	UsingDirectives[Current].insert(NS);
				893	else {
				894	for (llvm::StringRef Enclosing :
				895	ancestorNamespaces(Current)) {
				896	if (Enclosing.empty())
				897	UsingDirectives[Current].insert(NS);
				898	else
				899	UsingDirectives[Current].insert(
				900	(Enclosing + "::" + NS).str());
				901	}
				902	}
				903	break;
				904	}
				905	});
				906
				907	std::vector<std::string> Found;
				908	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
				909	Found.push_back(Enclosing);
				910	auto It = UsingDirectives.find(Enclosing);
				911	if (It != UsingDirectives.end())
				912	for (const auto& Used : It->second)
				913	Found.push_back(Used.getKey());
				914	}
				915
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	916	llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
				917	if (Current == RHS)
				918	return false;
				919	if (Current == LHS)
				920	return true;
				921	return LHS < RHS;
				922	});
				923	Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
				924	return Found;
				925	}
				926
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	927	llvm::StringSet<> collectWords(llvm::StringRef Content) {
				928	// We assume short words are not significant.
				929	// We may want to consider other stopwords, e.g. language keywords.
				930	// (A very naive implementation showed no benefit, but lexing might do better)
				931	static constexpr int MinWordLength = 4;
				932
				933	std::vector<CharRole> Roles(Content.size());
				934	calculateRoles(Content, Roles);
				935
				936	llvm::StringSet<> Result;
				937	llvm::SmallString<256> Word;
				938	auto Flush = [&] {
				939	if (Word.size() >= MinWordLength) {
				940	for (char &C : Word)
				941	C = llvm::toLower(C);
				942	Result.insert(Word);
				943	}
				944	Word.clear();
				945	};
				946	for (unsigned I = 0; I < Content.size(); ++I) {
				947	switch (Roles[I]) {
				948	case Head:
				949	Flush();
				950	LLVM_FALLTHROUGH;
				951	case Tail:
				952	Word.push_back(Content[I]);
				953	break;
				954	case Unknown:
				955	case Separator:
				956	Flush();
				957	break;
				958	}
				959	}
				960	Flush();
				961
				962	return Result;
				963	}
				964
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	965	llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc,
				966	Preprocessor &PP) {
				967	const auto &SM = PP.getSourceManager();
				968	const auto &LangOpts = PP.getLangOpts();
				969	Token Result;
				970	if (Lexer::getRawToken(SM.getSpellingLoc(Loc), Result, SM, LangOpts, false))
				971	return None;
				972	if (Result.is(tok::raw_identifier))
				973	PP.LookUpIdentifierInfo(Result);
				974	IdentifierInfo *IdentifierInfo = Result.getIdentifierInfo();
				975	if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())
				976	return None;
				977
				978	std::pair<FileID, unsigned int> DecLoc = SM.getDecomposedExpansionLoc(Loc);
				979	// Get the definition just before the searched location so that a macro
				980	// referenced in a '#undef MACRO' can still be found.
				981	SourceLocation BeforeSearchedLocation =
				982	SM.getMacroArgExpandedLocation(SM.getLocForStartOfFile(DecLoc.first)
				983	.getLocWithOffset(DecLoc.second - 1));
				984	MacroDefinition MacroDef =
				985	PP.getMacroDefinitionAtLoc(IdentifierInfo, BeforeSearchedLocation);
				986	if (auto *MI = MacroDef.getMacroInfo())
				987	return DefinedMacro{IdentifierInfo->getName(), MI};
				988	return None;
				989	}
				990
Kadir Cetinkaya	5b27093	2019-09-09 12:28:44 +0000	[diff] [blame]	991	llvm::Expected<std::string> Edit::apply() const {
				992	return tooling::applyAllReplacements(InitialCode, Replacements);
				993	}
				994
				995	std::vector<TextEdit> Edit::asTextEdits() const {
				996	return replacementsToEdits(InitialCode, Replacements);
				997	}
				998
				999	bool Edit::canApplyTo(llvm::StringRef Code) const {
				1000	// Create line iterators, since line numbers are important while applying our
				1001	// edit we cannot skip blank lines.
				1002	auto LHS = llvm::MemoryBuffer::getMemBuffer(Code);
				1003	llvm::line_iterator LHSIt(LHS, /SkipBlanks=*/false);
				1004
				1005	auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
				1006	llvm::line_iterator RHSIt(RHS, /SkipBlanks=*/false);
				1007
				1008	// Compare the InitialCode we prepared the edit for with the Code we received
				1009	// line by line to make sure there are no differences.
				1010	// FIXME: This check is too conservative now, it should be enough to only
				1011	// check lines around the replacements contained inside the Edit.
				1012	while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
				1013	if (LHSIt != RHSIt)
				1014	return false;
				1015	++LHSIt;
				1016	++RHSIt;
				1017	}
				1018
				1019	// After we reach EOF for any of the files we make sure the other one doesn't
				1020	// contain any additional content except empty lines, they should not
				1021	// interfere with the edit we produced.
				1022	while (!LHSIt.is_at_eof()) {
				1023	if (!LHSIt->empty())
				1024	return false;
				1025	++LHSIt;
				1026	}
				1027	while (!RHSIt.is_at_eof()) {
				1028	if (!RHSIt->empty())
				1029	return false;
				1030	++RHSIt;
				1031	}
				1032	return true;
				1033	}
				1034
				1035	llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) {
				1036	if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style))
				1037	E.Replacements = std::move(*NewEdits);
				1038	else
				1039	return NewEdits.takeError();
				1040	return llvm::Error::success();
				1041	}
				1042
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	1043	} // namespace clangd
				1044	} // namespace clang