[clangd] Fix toHalfOpenFileRange where start/end endpoints are in different files due to #include
Summary: https://github.com/clangd/clangd/issues/129
Reviewers: SureYeaah
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D66590
llvm-svn: 370029
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 7a2f7c7..0c52dcd 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -264,6 +264,29 @@
return L == R.getEnd() || halfOpenRangeContains(Mgr, R, L);
}
+SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
+ assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
+ FileID IncludingFile;
+ unsigned Offset;
+ std::tie(IncludingFile, Offset) =
+ SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
+ bool Invalid = false;
+ llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
+ if (Invalid)
+ return SourceLocation();
+ // Now buf is "...\n#include <foo>\n..."
+ // and Offset points here: ^
+ // Rewind to the preceding # on the line.
+ assert(Offset < Buf.size());
+ for (;; --Offset) {
+ if (Buf[Offset] == '#')
+ return SM.getComposedLoc(IncludingFile, Offset);
+ if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on?
+ return SourceLocation();
+ }
+}
+
+
static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
const LangOptions &LangOpts) {
Token TheTok;
@@ -308,16 +331,49 @@
static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
const SourceManager &SM,
const LangOptions &LangOpts) {
- SourceLocation E1 = getLocForTokenEnd(R1.getEnd(), SM, LangOpts);
- SourceLocation E2 = getLocForTokenEnd(R2.getEnd(), SM, LangOpts);
- return SourceRange(std::min(R1.getBegin(), R2.getBegin()),
- E1 < E2 ? R2.getEnd() : R1.getEnd());
+ SourceLocation Begin =
+ SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
+ ? R1.getBegin()
+ : R2.getBegin();
+ SourceLocation End =
+ SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts),
+ getLocForTokenEnd(R2.getEnd(), SM, LangOpts))
+ ? R2.getEnd()
+ : R1.getEnd();
+ return SourceRange(Begin, End);
}
-// Check if two locations have the same file id.
-static bool inSameFile(SourceLocation Loc1, SourceLocation Loc2,
- const SourceManager &SM) {
- return SM.getFileID(Loc1) == SM.getFileID(Loc2);
+// Given a range whose endpoints may be in different expansions or files,
+// tries to find a range within a common file by following up the expansion and
+// include location in each.
+static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ // Fast path for most common cases.
+ if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
+ return R;
+ // Record the stack of expansion locations for the beginning, keyed by FileID.
+ llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
+ for (SourceLocation Begin = R.getBegin(); Begin.isValid();
+ Begin = Begin.isFileID()
+ ? includeHashLoc(SM.getFileID(Begin), SM)
+ : SM.getImmediateExpansionRange(Begin).getBegin()) {
+ BeginExpansions[SM.getFileID(Begin)] = Begin;
+ }
+ // Move up the stack of expansion locations for the end until we find the
+ // location in BeginExpansions with that has the same file id.
+ for (SourceLocation End = R.getEnd(); End.isValid();
+ End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM)
+ : toTokenRange(SM.getImmediateExpansionRange(End),
+ SM, LangOpts)
+ .getEnd()) {
+ auto It = BeginExpansions.find(SM.getFileID(End));
+ if (It != BeginExpansions.end()) {
+ if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
+ return SourceLocation();
+ return {It->second, End};
+ }
+ }
+ return SourceRange();
}
// Find an expansion range (not necessarily immediate) the ends of which are in
@@ -325,33 +381,11 @@
static SourceRange
getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM,
const LangOptions &LangOpts) {
- SourceRange ExpansionRange =
- toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts);
- // Fast path for most common cases.
- if (inSameFile(ExpansionRange.getBegin(), ExpansionRange.getEnd(), SM))
- return ExpansionRange;
- // Record the stack of expansion locations for the beginning, keyed by FileID.
- llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
- for (SourceLocation Begin = ExpansionRange.getBegin(); Begin.isValid();
- Begin = Begin.isFileID()
- ? SourceLocation()
- : SM.getImmediateExpansionRange(Begin).getBegin()) {
- BeginExpansions[SM.getFileID(Begin)] = Begin;
- }
- // Move up the stack of expansion locations for the end until we find the
- // location in BeginExpansions with that has the same file id.
- for (SourceLocation End = ExpansionRange.getEnd(); End.isValid();
- End = End.isFileID() ? SourceLocation()
- : toTokenRange(SM.getImmediateExpansionRange(End),
- SM, LangOpts)
- .getEnd()) {
- auto It = BeginExpansions.find(SM.getFileID(End));
- if (It != BeginExpansions.end())
- return {It->second, End};
- }
- llvm_unreachable(
- "We should able to find a common ancestor in the expansion tree.");
+ return rangeInCommonFile(
+ toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM,
+ LangOpts);
}
+
// Returns the file range for a given Location as a Token Range
// This is quite similar to getFileLoc in SourceManager as both use
// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
@@ -371,14 +405,17 @@
FileRange = unionTokenRange(
SM.getImmediateSpellingLoc(FileRange.getBegin()),
SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
- assert(inSameFile(FileRange.getBegin(), FileRange.getEnd(), SM));
+ assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
} else {
SourceRange ExpansionRangeForBegin =
getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts);
SourceRange ExpansionRangeForEnd =
getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts);
- assert(inSameFile(ExpansionRangeForBegin.getBegin(),
- ExpansionRangeForEnd.getBegin(), SM) &&
+ if (ExpansionRangeForBegin.isInvalid() ||
+ ExpansionRangeForEnd.isInvalid())
+ return SourceRange();
+ assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
+ ExpansionRangeForEnd.getBegin()) &&
"Both Expansion ranges should be in same file.");
FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
SM, LangOpts);
@@ -402,7 +439,8 @@
if (!isValidFileRange(SM, R2))
return llvm::None;
- SourceRange Result = unionTokenRange(R1, R2, SM, LangOpts);
+ SourceRange Result =
+ rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts);
unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
// Convert from closed token range to half-open (char) range
Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));