Blame - clang-tools-extra/clangd/SourceCode.cpp - toolchain/llvm-project

blob: fb1183aef5115ccb6c76c7ff5160c8d8ae72c082 [file] [log] [blame]

Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	1	//===--- SourceCode.h - Manipulating source code as strings ------ C++ --===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	#include "SourceCode.h"
				9
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	10	#include "Context.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	11	#include "FuzzyMatch.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	12	#include "Logger.h"
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	13	#include "Protocol.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	14	#include "clang/AST/ASTContext.h"
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	15	#include "clang/Basic/LangOptions.h"
				16	#include "clang/Basic/SourceLocation.h"
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	17	#include "clang/Basic/SourceManager.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	18	#include "clang/Basic/TokenKinds.h"
				19	#include "clang/Format/Format.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	20	#include "clang/Lex/Lexer.h"
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	21	#include "clang/Lex/Preprocessor.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	22	#include "llvm/ADT/None.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	23	#include "llvm/ADT/StringExtras.h"
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	24	#include "llvm/ADT/StringRef.h"
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	25	#include "llvm/Support/Compiler.h"
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	26	#include "llvm/Support/Errc.h"
				27	#include "llvm/Support/Error.h"
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	28	#include "llvm/Support/ErrorHandling.h"
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	29	#include "llvm/Support/Path.h"
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	30	#include "llvm/Support/xxhash.h"
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	31	#include <algorithm>
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	32
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	33	namespace clang {
				34	namespace clangd {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	35
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	36	// Here be dragons. LSP positions use columns measured in UTF-16 code units!
				37	// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
				38
				39	// Iterates over unicode codepoints in the (UTF-8) string. For each,
				40	// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
				41	// Returns true if CB returned true, false if we hit the end of string.
				42	template <typename Callback>
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	43	static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	44	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				45	// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	46	for (size_t I = 0; I < U8.size();) {
				47	unsigned char C = static_cast<unsigned char>(U8[I]);
				48	if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
				49	if (CB(1, 1))
				50	return true;
				51	++I;
				52	continue;
				53	}
				54	// This convenient property of UTF-8 holds for all non-ASCII characters.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	55	size_t UTF8Length = llvm::countLeadingOnes(C);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	56	// 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
				57	// 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
				58	assert((UTF8Length >= 2 && UTF8Length <= 4) &&
				59	"Invalid UTF-8, or transcoding bug?");
				60	I += UTF8Length; // Skip over all trailing bytes.
				61	// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
				62	// Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
				63	if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
				64	return true;
				65	}
				66	return false;
				67	}
				68
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	69	// Returns the byte offset into the string that is an offset of \p Units in
				70	// the specified encoding.
				71	// Conceptually, this converts to the encoding, truncates to CodeUnits,
				72	// converts back to UTF-8, and returns the length in bytes.
				73	static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
				74	bool &Valid) {
				75	Valid = Units >= 0;
				76	if (Units <= 0)
				77	return 0;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	78	size_t Result = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	79	switch (Enc) {
				80	case OffsetEncoding::UTF8:
				81	Result = Units;
				82	break;
				83	case OffsetEncoding::UTF16:
				84	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				85	Result += U8Len;
				86	Units -= U16Len;
				87	return Units <= 0;
				88	});
				89	if (Units < 0) // Offset in the middle of a surrogate pair.
				90	Valid = false;
				91	break;
				92	case OffsetEncoding::UTF32:
				93	Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
				94	Result += U8Len;
				95	Units--;
				96	return Units <= 0;
				97	});
				98	break;
				99	case OffsetEncoding::UnsupportedEncoding:
				100	llvm_unreachable("unsupported encoding");
				101	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	102	// Don't return an out-of-range index if we overran.
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	103	if (Result > U8.size()) {
				104	Valid = false;
				105	return U8.size();
				106	}
				107	return Result;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	108	}
				109
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	110	Key<OffsetEncoding> kCurrentOffsetEncoding;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	111	static OffsetEncoding lspEncoding() {
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	112	auto *Enc = Context::current().get(kCurrentOffsetEncoding);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	113	return Enc ? *Enc : OffsetEncoding::UTF16;
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	114	}
				115
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	116	// Like most strings in clangd, the input is UTF-8 encoded.
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	117	size_t lspLength(llvm::StringRef Code) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	118	size_t Count = 0;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	119	switch (lspEncoding()) {
				120	case OffsetEncoding::UTF8:
				121	Count = Code.size();
				122	break;
				123	case OffsetEncoding::UTF16:
				124	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				125	Count += U16Len;
				126	return false;
				127	});
				128	break;
				129	case OffsetEncoding::UTF32:
				130	iterateCodepoints(Code, [&](int U8Len, int U16Len) {
				131	++Count;
				132	return false;
				133	});
				134	break;
				135	case OffsetEncoding::UnsupportedEncoding:
				136	llvm_unreachable("unsupported encoding");
				137	}
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	138	return Count;
				139	}
				140
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	141	llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
				142	bool AllowColumnsBeyondLineLength) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	143	if (P.line < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	144	return llvm::make_error<llvm::StringError>(
				145	llvm::formatv("Line value can't be negative ({0})", P.line),
				146	llvm::errc::invalid_argument);
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	147	if (P.character < 0)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	148	return llvm::make_error<llvm::StringError>(
				149	llvm::formatv("Character value can't be negative ({0})", P.character),
				150	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	151	size_t StartOfLine = 0;
				152	for (int I = 0; I != P.line; ++I) {
				153	size_t NextNL = Code.find('\n', StartOfLine);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	154	if (NextNL == llvm::StringRef::npos)
				155	return llvm::make_error<llvm::StringError>(
				156	llvm::formatv("Line value is out of range ({0})", P.line),
				157	llvm::errc::invalid_argument);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	158	StartOfLine = NextNL + 1;
				159	}
Sam McCall	a69698f	2019-03-27 17:47:49 +0000	[diff] [blame]	160	StringRef Line =
				161	Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
Simon Marchi	766338a	2018-03-21 14:36:46 +0000	[diff] [blame]	162
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	163	// P.character may be in UTF-16, transcode if necessary.
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	164	bool Valid;
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	165	size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	166	if (!Valid && !AllowColumnsBeyondLineLength)
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	167	return llvm::make_error<llvm::StringError>(
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	168	llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
				169	P.character, P.line),
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	170	llvm::errc::invalid_argument);
Sam McCall	8b25d22	2019-03-28 14:37:51 +0000	[diff] [blame]	171	return StartOfLine + ByteInLine;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	172	}
				173
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	174	Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	175	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	176	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	177	int Lines = Before.count('\n');
				178	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	179	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	180	Position Pos;
				181	Pos.line = Lines;
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	182	Pos.character = lspLength(Before.substr(StartOfLine));
Ilya Biryukov	7beea3a	2018-02-14 10:52:04 +0000	[diff] [blame]	183	return Pos;
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	184	}
				185
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	186	Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	187	// We use the SourceManager's line tables, but its column number is in bytes.
				188	FileID FID;
				189	unsigned Offset;
				190	std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	191	Position P;
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	192	P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1;
				193	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	194	llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	195	if (!Invalid) {
				196	auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
				197	auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
Sam McCall	7189112	2018-10-23 11:51:53 +0000	[diff] [blame]	198	P.character = lspLength(LineSoFar);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	199	}
Marc-Andre Laperle	63a1098	2018-02-21 02:39:08 +0000	[diff] [blame]	200	return P;
				201	}
				202
Haojian Wu	92c3257	2019-06-25 08:01:46 +0000	[diff] [blame]	203	llvm::Optional<Range> getTokenRange(const SourceManager &SM,
				204	const LangOptions &LangOpts,
				205	SourceLocation TokLoc) {
				206	if (!TokLoc.isValid())
				207	return llvm::None;
				208	SourceLocation End = Lexer::getLocForEndOfToken(TokLoc, 0, SM, LangOpts);
				209	if (!End.isValid())
				210	return llvm::None;
				211	return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
				212	}
				213
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	214	bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
				215	if (!R.getBegin().isValid() \|\| !R.getEnd().isValid())
				216	return false;
				217
				218	FileID BeginFID;
				219	size_t BeginOffset = 0;
				220	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
				221
				222	FileID EndFID;
				223	size_t EndOffset = 0;
				224	std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
				225
				226	return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
				227	}
				228
				229	bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
				230	SourceLocation L) {
				231	assert(isValidFileRange(Mgr, R));
				232
				233	FileID BeginFID;
				234	size_t BeginOffset = 0;
				235	std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
				236	size_t EndOffset = Mgr.getFileOffset(R.getEnd());
				237
				238	FileID LFid;
				239	size_t LOffset;
				240	std::tie(LFid, LOffset) = Mgr.getDecomposedLoc(L);
				241	return BeginFID == LFid && BeginOffset <= LOffset && LOffset < EndOffset;
				242	}
				243
				244	bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
				245	SourceLocation L) {
				246	return L == R.getEnd() \|\| halfOpenRangeContains(Mgr, R, L);
				247	}
				248
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	249	static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
				250	const LangOptions &LangOpts) {
				251	Token TheTok;
				252	if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts))
				253	return 0;
				254	// FIXME: Here we check whether the token at the location is a greatergreater
				255	// (>>) token and consider it as a single greater (>). This is to get it
				256	// working for templates but it isn't correct for the right shift operator. We
				257	// can avoid this by using half open char ranges in getFileRange() but getting
				258	// token ending is not well supported in macroIDs.
				259	if (TheTok.is(tok::greatergreater))
				260	return 1;
				261	return TheTok.getLength();
				262	}
				263
				264	// Returns location of the last character of the token at a given loc
				265	static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc,
				266	const SourceManager &SM,
				267	const LangOptions &LangOpts) {
				268	unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts);
				269	return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
				270	}
				271
				272	// Returns location of the starting of the token at a given EndLoc
				273	static SourceLocation getLocForTokenBegin(SourceLocation EndLoc,
				274	const SourceManager &SM,
				275	const LangOptions &LangOpts) {
				276	return EndLoc.getLocWithOffset(
				277	-(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts));
				278	}
				279
				280	// Converts a char source range to a token range.
				281	static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM,
				282	const LangOptions &LangOpts) {
				283	if (!Range.isTokenRange())
				284	Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts));
				285	return Range.getAsRange();
				286	}
				287	// Returns the union of two token ranges.
				288	// To find the maximum of the Ends of the ranges, we compare the location of the
				289	// last character of the token.
				290	static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
				291	const SourceManager &SM,
				292	const LangOptions &LangOpts) {
				293	SourceLocation E1 = getLocForTokenEnd(R1.getEnd(), SM, LangOpts);
				294	SourceLocation E2 = getLocForTokenEnd(R2.getEnd(), SM, LangOpts);
				295	return SourceRange(std::min(R1.getBegin(), R2.getBegin()),
				296	E1 < E2 ? R2.getEnd() : R1.getEnd());
				297	}
				298
				299	// Returns the tokenFileRange for a given Location as a Token Range
				300	// This is quite similar to getFileLoc in SourceManager as both use
				301	// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
				302	// However:
				303	// - We want to maintain the full range information as we move from one file to
				304	// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange.
				305	// - We want to split '>>' tokens as the lexer parses the '>>' in template
				306	// instantiations as a '>>' instead of a '>'.
				307	// There is also getExpansionRange but it simply calls
				308	// getImmediateExpansionRange on the begin and ends separately which is wrong.
				309	static SourceRange getTokenFileRange(SourceLocation Loc,
				310	const SourceManager &SM,
				311	const LangOptions &LangOpts) {
				312	SourceRange FileRange = Loc;
				313	while (!FileRange.getBegin().isFileID()) {
				314	assert(!FileRange.getEnd().isFileID() &&
				315	"Both Begin and End should be MacroIDs.");
				316	if (SM.isMacroArgExpansion(FileRange.getBegin())) {
				317	FileRange.setBegin(SM.getImmediateSpellingLoc(FileRange.getBegin()));
				318	FileRange.setEnd(SM.getImmediateSpellingLoc(FileRange.getEnd()));
				319	} else {
				320	SourceRange ExpansionRangeForBegin = toTokenRange(
				321	SM.getImmediateExpansionRange(FileRange.getBegin()), SM, LangOpts);
				322	SourceRange ExpansionRangeForEnd = toTokenRange(
				323	SM.getImmediateExpansionRange(FileRange.getEnd()), SM, LangOpts);
				324	FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
				325	SM, LangOpts);
				326	}
				327	}
				328	return FileRange;
				329	}
				330
				331	llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM,
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	332	const LangOptions &LangOpts,
				333	SourceRange R) {
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	334	SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts);
				335	if (!isValidFileRange(SM, R1))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	336	return llvm::None;
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	337
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	338	SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts);
				339	if (!isValidFileRange(SM, R2))
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	340	return llvm::None;
Shaurya Gupta	0d26d6f	2019-07-12 11:42:31 +0000	[diff] [blame]	341
				342	SourceRange Result = unionTokenRange(R1, R2, SM, LangOpts);
				343	unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
				344	// Convert from closed token range to half-open (char) range
				345	Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
				346	if (!isValidFileRange(SM, Result))
				347	return llvm::None;
				348
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	349	return Result;
				350	}
				351
				352	llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
				353	assert(isValidFileRange(SM, R));
				354	bool Invalid = false;
				355	auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
				356	assert(!Invalid);
				357
				358	size_t BeginOffset = SM.getFileOffset(R.getBegin());
				359	size_t EndOffset = SM.getFileOffset(R.getEnd());
				360	return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
				361	}
				362
Ilya Biryukov	cce67a3	2019-01-29 14:17:36 +0000	[diff] [blame]	363	llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
				364	Position P) {
				365	llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
				366	auto Offset =
				367	positionToOffset(Code, P, /AllowColumnBeyondLineLength=/false);
				368	if (!Offset)
				369	return Offset.takeError();
				370	return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
				371	}
				372
Ilya Biryukov	71028b8	2018-03-12 15:28:22 +0000	[diff] [blame]	373	Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
				374	// Clang is 1-based, LSP uses 0-based indexes.
				375	Position Begin = sourceLocToPosition(SM, R.getBegin());
				376	Position End = sourceLocToPosition(SM, R.getEnd());
				377
				378	return {Begin, End};
				379	}
				380
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	381	std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	382	size_t Offset) {
				383	Offset = std::min(Code.size(), Offset);
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	384	llvm::StringRef Before = Code.substr(0, Offset);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	385	int Lines = Before.count('\n');
				386	size_t PrevNL = Before.rfind('\n');
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	387	size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Sam McCall	a4962cc	2018-04-27 11:59:28 +0000	[diff] [blame]	388	return {Lines + 1, Offset - StartOfLine + 1};
				389	}
				390
Ilya Biryukov	4399878	2019-01-31 21:30:05 +0000	[diff] [blame]	391	std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	392	size_t Pos = QName.rfind("::");
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	393	if (Pos == llvm::StringRef::npos)
				394	return {llvm::StringRef(), QName};
Marc-Andre Laperle	b387b6e	2018-04-23 20:00:52 +0000	[diff] [blame]	395	return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)};
				396	}
				397
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	398	TextEdit replacementToEdit(llvm::StringRef Code,
				399	const tooling::Replacement &R) {
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	400	Range ReplacementRange = {
				401	offsetToPosition(Code, R.getOffset()),
				402	offsetToPosition(Code, R.getOffset() + R.getLength())};
				403	return {ReplacementRange, R.getReplacementText()};
				404	}
				405
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	406	std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
Eric Liu	9133ecd	2018-05-11 12:12:08 +0000	[diff] [blame]	407	const tooling::Replacements &Repls) {
				408	std::vector<TextEdit> Edits;
				409	for (const auto &R : Repls)
				410	Edits.push_back(replacementToEdit(Code, R));
				411	return Edits;
				412	}
				413
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	414	llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
				415	const SourceManager &SourceMgr) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	416	if (!F)
				417	return None;
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	418
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	419	llvm::SmallString<128> FilePath = F->getName();
				420	if (!llvm::sys::path::is_absolute(FilePath)) {
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	421	if (auto EC =
Duncan P. N. Exon Smith	db8a742	2019-03-26 22:32:06 +0000	[diff] [blame]	422	SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	423	FilePath)) {
				424	elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
				425	EC.message());
Sam McCall	c008af6	2018-10-20 15:30:37 +0000	[diff] [blame]	426	return None;
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	427	}
				428	}
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	429
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	430	// Handle the symbolic link path case where the current working directory
				431	// (getCurrentWorkingDirectory) is a symlink./ We always want to the real
				432	// file path (instead of the symlink path) for the C++ symbols.
				433	//
				434	// Consider the following example:
				435	//
				436	// src dir: /project/src/foo.h
				437	// current working directory (symlink): /tmp/build -> /project/src/
				438	//
				439	// The file path of Symbol is "/project/src/foo.h" instead of
				440	// "/tmp/build/foo.h"
				441	if (const DirectoryEntry *Dir = SourceMgr.getFileManager().getDirectory(
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	442	llvm::sys::path::parent_path(FilePath))) {
				443	llvm::SmallString<128> RealPath;
				444	llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir);
				445	llvm::sys::path::append(RealPath, DirName,
				446	llvm::sys::path::filename(FilePath));
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	447	return RealPath.str().str();
Simon Marchi	25f1f73	2018-08-10 22:27:53 +0000	[diff] [blame]	448	}
				449
Kadir Cetinkaya	dd67793	2018-12-19 10:46:21 +0000	[diff] [blame]	450	return FilePath.str().str();
Marc-Andre Laperle	1be6970	2018-07-05 19:35:01 +0000	[diff] [blame]	451	}
				452
Kadir Cetinkaya	2f84d91	2018-08-08 08:59:29 +0000	[diff] [blame]	453	TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
				454	const LangOptions &L) {
				455	TextEdit Result;
				456	Result.range =
				457	halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
				458	Result.newText = FixIt.CodeToInsert;
				459	return Result;
				460	}
				461
Haojian Wu	aa3ed5a	2019-01-25 15:14:03 +0000	[diff] [blame]	462	bool isRangeConsecutive(const Range &Left, const Range &Right) {
Kadir Cetinkaya	a9c9d00	2018-08-13 08:23:01 +0000	[diff] [blame]	463	return Left.end.line == Right.start.line &&
				464	Left.end.character == Right.start.character;
				465	}
				466
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	467	FileDigest digest(llvm::StringRef Content) {
Sam McCall	674d8a9	2019-07-08 11:33:17 +0000	[diff] [blame]	468	uint64_t Hash{llvm::xxHash64(Content)};
				469	FileDigest Result;
				470	for (unsigned I = 0; I < Result.size(); ++I) {
				471	Result[I] = uint8_t(Hash);
				472	Hash >>= 8;
				473	}
				474	return Result;
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	475	}
				476
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	477	llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	478	bool Invalid = false;
Ilya Biryukov	f2001aa	2019-01-07 15:45:19 +0000	[diff] [blame]	479	llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
Kadir Cetinkaya	d08eab4	2018-11-27 16:08:53 +0000	[diff] [blame]	480	if (Invalid)
				481	return None;
				482	return digest(Content);
				483	}
				484
Eric Liu	dd66277	2019-01-28 14:01:55 +0000	[diff] [blame]	485	format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
				486	llvm::StringRef Content,
				487	llvm::vfs::FileSystem *FS) {
				488	auto Style = format::getStyle(format::DefaultFormatStyle, File,
				489	format::DefaultFallbackStyle, Content, FS);
				490	if (!Style) {
				491	log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
				492	Style.takeError());
				493	Style = format::getLLVMStyle();
				494	}
				495	return *Style;
				496	}
				497
Haojian Wu	12e194c	2019-02-06 15:24:50 +0000	[diff] [blame]	498	llvm::Expected<tooling::Replacements>
				499	cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
				500	const format::FormatStyle &Style) {
				501	auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
				502	if (!CleanReplaces)
				503	return CleanReplaces;
				504	return formatReplacements(Code, std::move(*CleanReplaces), Style);
				505	}
				506
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	507	template <typename Action>
				508	static void lex(llvm::StringRef Code, const format::FormatStyle &Style,
				509	Action A) {
				510	// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
				511	std::string NullTerminatedCode = Code.str();
				512	SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	513	auto &SM = FileSM.get();
				514	auto FID = SM.getMainFileID();
				515	Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
				516	Token Tok;
				517
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	518	while (!Lex.LexFromRawLexer(Tok))
				519	A(Tok);
				520	}
				521
				522	llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
				523	const format::FormatStyle &Style) {
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	524	llvm::StringMap<unsigned> Identifiers;
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	525	lex(Content, Style, [&](const clang::Token &Tok) {
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	526	switch (Tok.getKind()) {
				527	case tok::identifier:
				528	++Identifiers[Tok.getIdentifierInfo()->getName()];
				529	break;
				530	case tok::raw_identifier:
				531	++Identifiers[Tok.getRawIdentifier()];
				532	break;
				533	default:
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	534	break;
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	535	}
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	536	});
Eric Liu	00d99bd	2019-04-11 09:36:36 +0000	[diff] [blame]	537	return Identifiers;
				538	}
				539
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	540	namespace {
				541	enum NamespaceEvent {
				542	BeginNamespace, // namespace <ns> {. Payload is resolved <ns>.
				543	EndNamespace, // } // namespace <ns>. Payload is resolved outer namespace.
				544	UsingDirective // using namespace <ns>. Payload is unresolved <ns>.
				545	};
				546	// Scans C++ source code for constructs that change the visible namespaces.
				547	void parseNamespaceEvents(
				548	llvm::StringRef Code, const format::FormatStyle &Style,
				549	llvm::function_ref<void(NamespaceEvent, llvm::StringRef)> Callback) {
				550
				551	// Stack of enclosing namespaces, e.g. {"clang", "clangd"}
				552	std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
				553	// Stack counts open braces. true if the brace opened a namespace.
				554	std::vector<bool> BraceStack;
				555
				556	enum {
				557	Default,
				558	Namespace, // just saw 'namespace'
				559	NamespaceName, // just saw 'namespace' NSName
				560	Using, // just saw 'using'
				561	UsingNamespace, // just saw 'using namespace'
				562	UsingNamespaceName, // just saw 'using namespace' NSName
				563	} State = Default;
				564	std::string NSName;
				565
				566	lex(Code, Style, [&](const clang::Token &Tok) {
				567	switch(Tok.getKind()) {
				568	case tok::raw_identifier:
				569	// In raw mode, this could be a keyword or a name.
				570	switch (State) {
				571	case UsingNamespace:
				572	case UsingNamespaceName:
				573	NSName.append(Tok.getRawIdentifier());
				574	State = UsingNamespaceName;
				575	break;
				576	case Namespace:
				577	case NamespaceName:
				578	NSName.append(Tok.getRawIdentifier());
				579	State = NamespaceName;
				580	break;
				581	case Using:
				582	State =
				583	(Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
				584	break;
				585	case Default:
				586	NSName.clear();
				587	if (Tok.getRawIdentifier() == "namespace")
				588	State = Namespace;
				589	else if (Tok.getRawIdentifier() == "using")
				590	State = Using;
				591	break;
				592	}
				593	break;
				594	case tok::coloncolon:
				595	// This can come at the beginning or in the middle of a namespace name.
				596	switch (State) {
				597	case UsingNamespace:
				598	case UsingNamespaceName:
				599	NSName.append("::");
				600	State = UsingNamespaceName;
				601	break;
				602	case NamespaceName:
				603	NSName.append("::");
				604	State = NamespaceName;
				605	break;
				606	case Namespace: // Not legal here.
				607	case Using:
				608	case Default:
				609	State = Default;
				610	break;
				611	}
				612	break;
				613	case tok::l_brace:
				614	// Record which { started a namespace, so we know when } ends one.
				615	if (State == NamespaceName) {
				616	// Parsed: namespace <name> {
				617	BraceStack.push_back(true);
				618	Enclosing.push_back(NSName);
				619	Callback(BeginNamespace, llvm::join(Enclosing, "::"));
				620	} else {
				621	// This case includes anonymous namespaces (State = Namespace).
				622	// For our purposes, they're not namespaces and we ignore them.
				623	BraceStack.push_back(false);
				624	}
				625	State = Default;
				626	break;
				627	case tok::r_brace:
				628	// If braces are unmatched, we're going to be confused, but don't crash.
				629	if (!BraceStack.empty()) {
				630	if (BraceStack.back()) {
				631	// Parsed: } // namespace
				632	Enclosing.pop_back();
				633	Callback(EndNamespace, llvm::join(Enclosing, "::"));
				634	}
				635	BraceStack.pop_back();
				636	}
				637	break;
				638	case tok::semi:
				639	if (State == UsingNamespaceName)
				640	// Parsed: using namespace <name> ;
				641	Callback(UsingDirective, llvm::StringRef(NSName));
				642	State = Default;
				643	break;
				644	default:
				645	State = Default;
				646	break;
				647	}
				648	});
				649	}
				650
				651	// Returns the prefix namespaces of NS: {"" ... NS}.
				652	llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
				653	llvm::SmallVector<llvm::StringRef, 8> Results;
				654	Results.push_back(NS.take_front(0));
				655	NS.split(Results, "::", /MaxSplit=/-1, /KeepEmpty=/false);
				656	for (llvm::StringRef &R : Results)
				657	R = NS.take_front(R.end() - NS.begin());
				658	return Results;
				659	}
				660
				661	} // namespace
				662
				663	std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
				664	const format::FormatStyle &Style) {
				665	std::string Current;
				666	// Map from namespace to (resolved) namespaces introduced via using directive.
				667	llvm::StringMap<llvm::StringSet<>> UsingDirectives;
				668
				669	parseNamespaceEvents(Code, Style,
				670	[&](NamespaceEvent Event, llvm::StringRef NS) {
				671	switch (Event) {
				672	case BeginNamespace:
				673	case EndNamespace:
				674	Current = NS;
				675	break;
				676	case UsingDirective:
				677	if (NS.consume_front("::"))
				678	UsingDirectives[Current].insert(NS);
				679	else {
				680	for (llvm::StringRef Enclosing :
				681	ancestorNamespaces(Current)) {
				682	if (Enclosing.empty())
				683	UsingDirectives[Current].insert(NS);
				684	else
				685	UsingDirectives[Current].insert(
				686	(Enclosing + "::" + NS).str());
				687	}
				688	}
				689	break;
				690	}
				691	});
				692
				693	std::vector<std::string> Found;
				694	for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
				695	Found.push_back(Enclosing);
				696	auto It = UsingDirectives.find(Enclosing);
				697	if (It != UsingDirectives.end())
				698	for (const auto& Used : It->second)
				699	Found.push_back(Used.getKey());
				700	}
				701
Sam McCall	c316b22	2019-04-26 07:45:49 +0000	[diff] [blame]	702	llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
				703	if (Current == RHS)
				704	return false;
				705	if (Current == LHS)
				706	return true;
				707	return LHS < RHS;
				708	});
				709	Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
				710	return Found;
				711	}
				712
Sam McCall	9fb22b2	2019-05-06 10:25:10 +0000	[diff] [blame]	713	llvm::StringSet<> collectWords(llvm::StringRef Content) {
				714	// We assume short words are not significant.
				715	// We may want to consider other stopwords, e.g. language keywords.
				716	// (A very naive implementation showed no benefit, but lexing might do better)
				717	static constexpr int MinWordLength = 4;
				718
				719	std::vector<CharRole> Roles(Content.size());
				720	calculateRoles(Content, Roles);
				721
				722	llvm::StringSet<> Result;
				723	llvm::SmallString<256> Word;
				724	auto Flush = [&] {
				725	if (Word.size() >= MinWordLength) {
				726	for (char &C : Word)
				727	C = llvm::toLower(C);
				728	Result.insert(Word);
				729	}
				730	Word.clear();
				731	};
				732	for (unsigned I = 0; I < Content.size(); ++I) {
				733	switch (Roles[I]) {
				734	case Head:
				735	Flush();
				736	LLVM_FALLTHROUGH;
				737	case Tail:
				738	Word.push_back(Content[I]);
				739	break;
				740	case Unknown:
				741	case Separator:
				742	Flush();
				743	break;
				744	}
				745	}
				746	Flush();
				747
				748	return Result;
				749	}
				750
Haojian Wu	9d34f45	2019-07-01 09:26:48 +0000	[diff] [blame]	751	llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc,
				752	Preprocessor &PP) {
				753	const auto &SM = PP.getSourceManager();
				754	const auto &LangOpts = PP.getLangOpts();
				755	Token Result;
				756	if (Lexer::getRawToken(SM.getSpellingLoc(Loc), Result, SM, LangOpts, false))
				757	return None;
				758	if (Result.is(tok::raw_identifier))
				759	PP.LookUpIdentifierInfo(Result);
				760	IdentifierInfo *IdentifierInfo = Result.getIdentifierInfo();
				761	if (!IdentifierInfo \|\| !IdentifierInfo->hadMacroDefinition())
				762	return None;
				763
				764	std::pair<FileID, unsigned int> DecLoc = SM.getDecomposedExpansionLoc(Loc);
				765	// Get the definition just before the searched location so that a macro
				766	// referenced in a '#undef MACRO' can still be found.
				767	SourceLocation BeforeSearchedLocation =
				768	SM.getMacroArgExpandedLocation(SM.getLocForStartOfFile(DecLoc.first)
				769	.getLocWithOffset(DecLoc.second - 1));
				770	MacroDefinition MacroDef =
				771	PP.getMacroDefinitionAtLoc(IdentifierInfo, BeforeSearchedLocation);
				772	if (auto *MI = MacroDef.getMacroInfo())
				773	return DefinedMacro{IdentifierInfo->getName(), MI};
				774	return None;
				775	}
				776
Sam McCall	b536a2a	2017-12-19 12:23:48 +0000	[diff] [blame]	777	} // namespace clangd
				778	} // namespace clang