Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 1 | //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #include "SourceCode.h" |
| 9 | |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 10 | #include "Context.h" |
Sam McCall | 9fb22b2 | 2019-05-06 10:25:10 +0000 | [diff] [blame] | 11 | #include "FuzzyMatch.h" |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 12 | #include "Logger.h" |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 13 | #include "Protocol.h" |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 14 | #include "clang/AST/ASTContext.h" |
Shaurya Gupta | 0d26d6f | 2019-07-12 11:42:31 +0000 | [diff] [blame] | 15 | #include "clang/Basic/LangOptions.h" |
| 16 | #include "clang/Basic/SourceLocation.h" |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 17 | #include "clang/Basic/SourceManager.h" |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 18 | #include "clang/Basic/TokenKinds.h" |
| 19 | #include "clang/Format/Format.h" |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 20 | #include "clang/Lex/Lexer.h" |
Haojian Wu | 9d34f45 | 2019-07-01 09:26:48 +0000 | [diff] [blame] | 21 | #include "clang/Lex/Preprocessor.h" |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 22 | #include "llvm/ADT/None.h" |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 23 | #include "llvm/ADT/StringExtras.h" |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 24 | #include "llvm/ADT/StringRef.h" |
Sam McCall | 9fb22b2 | 2019-05-06 10:25:10 +0000 | [diff] [blame] | 25 | #include "llvm/Support/Compiler.h" |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 26 | #include "llvm/Support/Errc.h" |
| 27 | #include "llvm/Support/Error.h" |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 28 | #include "llvm/Support/ErrorHandling.h" |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 29 | #include "llvm/Support/Path.h" |
Sam McCall | 674d8a9 | 2019-07-08 11:33:17 +0000 | [diff] [blame] | 30 | #include "llvm/Support/xxhash.h" |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 31 | #include <algorithm> |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 32 | |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 33 | namespace clang { |
| 34 | namespace clangd { |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 35 | |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 36 | // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! |
| 37 | // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. |
| 38 | |
| 39 | // Iterates over unicode codepoints in the (UTF-8) string. For each, |
| 40 | // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. |
| 41 | // Returns true if CB returned true, false if we hit the end of string. |
| 42 | template <typename Callback> |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 43 | static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 44 | // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). |
| 45 | // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 46 | for (size_t I = 0; I < U8.size();) { |
| 47 | unsigned char C = static_cast<unsigned char>(U8[I]); |
| 48 | if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. |
| 49 | if (CB(1, 1)) |
| 50 | return true; |
| 51 | ++I; |
| 52 | continue; |
| 53 | } |
| 54 | // This convenient property of UTF-8 holds for all non-ASCII characters. |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 55 | size_t UTF8Length = llvm::countLeadingOnes(C); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 56 | // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. |
| 57 | // 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug. |
| 58 | assert((UTF8Length >= 2 && UTF8Length <= 4) && |
| 59 | "Invalid UTF-8, or transcoding bug?"); |
| 60 | I += UTF8Length; // Skip over all trailing bytes. |
| 61 | // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). |
| 62 | // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) |
| 63 | if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) |
| 64 | return true; |
| 65 | } |
| 66 | return false; |
| 67 | } |
| 68 | |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 69 | // Returns the byte offset into the string that is an offset of \p Units in |
| 70 | // the specified encoding. |
| 71 | // Conceptually, this converts to the encoding, truncates to CodeUnits, |
| 72 | // converts back to UTF-8, and returns the length in bytes. |
| 73 | static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, |
| 74 | bool &Valid) { |
| 75 | Valid = Units >= 0; |
| 76 | if (Units <= 0) |
| 77 | return 0; |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 78 | size_t Result = 0; |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 79 | switch (Enc) { |
| 80 | case OffsetEncoding::UTF8: |
| 81 | Result = Units; |
| 82 | break; |
| 83 | case OffsetEncoding::UTF16: |
| 84 | Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { |
| 85 | Result += U8Len; |
| 86 | Units -= U16Len; |
| 87 | return Units <= 0; |
| 88 | }); |
| 89 | if (Units < 0) // Offset in the middle of a surrogate pair. |
| 90 | Valid = false; |
| 91 | break; |
| 92 | case OffsetEncoding::UTF32: |
| 93 | Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { |
| 94 | Result += U8Len; |
| 95 | Units--; |
| 96 | return Units <= 0; |
| 97 | }); |
| 98 | break; |
| 99 | case OffsetEncoding::UnsupportedEncoding: |
| 100 | llvm_unreachable("unsupported encoding"); |
| 101 | } |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 102 | // Don't return an out-of-range index if we overran. |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 103 | if (Result > U8.size()) { |
| 104 | Valid = false; |
| 105 | return U8.size(); |
| 106 | } |
| 107 | return Result; |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 108 | } |
| 109 | |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 110 | Key<OffsetEncoding> kCurrentOffsetEncoding; |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 111 | static OffsetEncoding lspEncoding() { |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 112 | auto *Enc = Context::current().get(kCurrentOffsetEncoding); |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 113 | return Enc ? *Enc : OffsetEncoding::UTF16; |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 114 | } |
| 115 | |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 116 | // Like most strings in clangd, the input is UTF-8 encoded. |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 117 | size_t lspLength(llvm::StringRef Code) { |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 118 | size_t Count = 0; |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 119 | switch (lspEncoding()) { |
| 120 | case OffsetEncoding::UTF8: |
| 121 | Count = Code.size(); |
| 122 | break; |
| 123 | case OffsetEncoding::UTF16: |
| 124 | iterateCodepoints(Code, [&](int U8Len, int U16Len) { |
| 125 | Count += U16Len; |
| 126 | return false; |
| 127 | }); |
| 128 | break; |
| 129 | case OffsetEncoding::UTF32: |
| 130 | iterateCodepoints(Code, [&](int U8Len, int U16Len) { |
| 131 | ++Count; |
| 132 | return false; |
| 133 | }); |
| 134 | break; |
| 135 | case OffsetEncoding::UnsupportedEncoding: |
| 136 | llvm_unreachable("unsupported encoding"); |
| 137 | } |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 138 | return Count; |
| 139 | } |
| 140 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 141 | llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, |
| 142 | bool AllowColumnsBeyondLineLength) { |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 143 | if (P.line < 0) |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 144 | return llvm::make_error<llvm::StringError>( |
| 145 | llvm::formatv("Line value can't be negative ({0})", P.line), |
| 146 | llvm::errc::invalid_argument); |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 147 | if (P.character < 0) |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 148 | return llvm::make_error<llvm::StringError>( |
| 149 | llvm::formatv("Character value can't be negative ({0})", P.character), |
| 150 | llvm::errc::invalid_argument); |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 151 | size_t StartOfLine = 0; |
| 152 | for (int I = 0; I != P.line; ++I) { |
| 153 | size_t NextNL = Code.find('\n', StartOfLine); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 154 | if (NextNL == llvm::StringRef::npos) |
| 155 | return llvm::make_error<llvm::StringError>( |
| 156 | llvm::formatv("Line value is out of range ({0})", P.line), |
| 157 | llvm::errc::invalid_argument); |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 158 | StartOfLine = NextNL + 1; |
| 159 | } |
Sam McCall | a69698f | 2019-03-27 17:47:49 +0000 | [diff] [blame] | 160 | StringRef Line = |
| 161 | Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); |
Simon Marchi | 766338a | 2018-03-21 14:36:46 +0000 | [diff] [blame] | 162 | |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 163 | // P.character may be in UTF-16, transcode if necessary. |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 164 | bool Valid; |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 165 | size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 166 | if (!Valid && !AllowColumnsBeyondLineLength) |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 167 | return llvm::make_error<llvm::StringError>( |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 168 | llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(), |
| 169 | P.character, P.line), |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 170 | llvm::errc::invalid_argument); |
Sam McCall | 8b25d22 | 2019-03-28 14:37:51 +0000 | [diff] [blame] | 171 | return StartOfLine + ByteInLine; |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 172 | } |
| 173 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 174 | Position offsetToPosition(llvm::StringRef Code, size_t Offset) { |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 175 | Offset = std::min(Code.size(), Offset); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 176 | llvm::StringRef Before = Code.substr(0, Offset); |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 177 | int Lines = Before.count('\n'); |
| 178 | size_t PrevNL = Before.rfind('\n'); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 179 | size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); |
Ilya Biryukov | 7beea3a | 2018-02-14 10:52:04 +0000 | [diff] [blame] | 180 | Position Pos; |
| 181 | Pos.line = Lines; |
Sam McCall | 7189112 | 2018-10-23 11:51:53 +0000 | [diff] [blame] | 182 | Pos.character = lspLength(Before.substr(StartOfLine)); |
Ilya Biryukov | 7beea3a | 2018-02-14 10:52:04 +0000 | [diff] [blame] | 183 | return Pos; |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 184 | } |
| 185 | |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 186 | Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) { |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 187 | // We use the SourceManager's line tables, but its column number is in bytes. |
| 188 | FileID FID; |
| 189 | unsigned Offset; |
| 190 | std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc); |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 191 | Position P; |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 192 | P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1; |
| 193 | bool Invalid = false; |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 194 | llvm::StringRef Code = SM.getBufferData(FID, &Invalid); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 195 | if (!Invalid) { |
| 196 | auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; |
| 197 | auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); |
Sam McCall | 7189112 | 2018-10-23 11:51:53 +0000 | [diff] [blame] | 198 | P.character = lspLength(LineSoFar); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 199 | } |
Marc-Andre Laperle | 63a1098 | 2018-02-21 02:39:08 +0000 | [diff] [blame] | 200 | return P; |
| 201 | } |
| 202 | |
Haojian Wu | 92c3257 | 2019-06-25 08:01:46 +0000 | [diff] [blame] | 203 | llvm::Optional<Range> getTokenRange(const SourceManager &SM, |
| 204 | const LangOptions &LangOpts, |
| 205 | SourceLocation TokLoc) { |
| 206 | if (!TokLoc.isValid()) |
| 207 | return llvm::None; |
| 208 | SourceLocation End = Lexer::getLocForEndOfToken(TokLoc, 0, SM, LangOpts); |
| 209 | if (!End.isValid()) |
| 210 | return llvm::None; |
| 211 | return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End)); |
| 212 | } |
| 213 | |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 214 | bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { |
| 215 | if (!R.getBegin().isValid() || !R.getEnd().isValid()) |
| 216 | return false; |
| 217 | |
| 218 | FileID BeginFID; |
| 219 | size_t BeginOffset = 0; |
| 220 | std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); |
| 221 | |
| 222 | FileID EndFID; |
| 223 | size_t EndOffset = 0; |
| 224 | std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); |
| 225 | |
| 226 | return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; |
| 227 | } |
| 228 | |
| 229 | bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R, |
| 230 | SourceLocation L) { |
| 231 | assert(isValidFileRange(Mgr, R)); |
| 232 | |
| 233 | FileID BeginFID; |
| 234 | size_t BeginOffset = 0; |
| 235 | std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); |
| 236 | size_t EndOffset = Mgr.getFileOffset(R.getEnd()); |
| 237 | |
| 238 | FileID LFid; |
| 239 | size_t LOffset; |
| 240 | std::tie(LFid, LOffset) = Mgr.getDecomposedLoc(L); |
| 241 | return BeginFID == LFid && BeginOffset <= LOffset && LOffset < EndOffset; |
| 242 | } |
| 243 | |
| 244 | bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R, |
| 245 | SourceLocation L) { |
| 246 | return L == R.getEnd() || halfOpenRangeContains(Mgr, R, L); |
| 247 | } |
| 248 | |
Shaurya Gupta | 0d26d6f | 2019-07-12 11:42:31 +0000 | [diff] [blame] | 249 | static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, |
| 250 | const LangOptions &LangOpts) { |
| 251 | Token TheTok; |
| 252 | if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) |
| 253 | return 0; |
| 254 | // FIXME: Here we check whether the token at the location is a greatergreater |
| 255 | // (>>) token and consider it as a single greater (>). This is to get it |
| 256 | // working for templates but it isn't correct for the right shift operator. We |
| 257 | // can avoid this by using half open char ranges in getFileRange() but getting |
| 258 | // token ending is not well supported in macroIDs. |
| 259 | if (TheTok.is(tok::greatergreater)) |
| 260 | return 1; |
| 261 | return TheTok.getLength(); |
| 262 | } |
| 263 | |
| 264 | // Returns location of the last character of the token at a given loc |
| 265 | static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, |
| 266 | const SourceManager &SM, |
| 267 | const LangOptions &LangOpts) { |
| 268 | unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); |
| 269 | return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); |
| 270 | } |
| 271 | |
| 272 | // Returns location of the starting of the token at a given EndLoc |
| 273 | static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, |
| 274 | const SourceManager &SM, |
| 275 | const LangOptions &LangOpts) { |
| 276 | return EndLoc.getLocWithOffset( |
| 277 | -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); |
| 278 | } |
| 279 | |
| 280 | // Converts a char source range to a token range. |
| 281 | static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, |
| 282 | const LangOptions &LangOpts) { |
| 283 | if (!Range.isTokenRange()) |
| 284 | Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); |
| 285 | return Range.getAsRange(); |
| 286 | } |
| 287 | // Returns the union of two token ranges. |
| 288 | // To find the maximum of the Ends of the ranges, we compare the location of the |
| 289 | // last character of the token. |
| 290 | static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, |
| 291 | const SourceManager &SM, |
| 292 | const LangOptions &LangOpts) { |
| 293 | SourceLocation E1 = getLocForTokenEnd(R1.getEnd(), SM, LangOpts); |
| 294 | SourceLocation E2 = getLocForTokenEnd(R2.getEnd(), SM, LangOpts); |
| 295 | return SourceRange(std::min(R1.getBegin(), R2.getBegin()), |
| 296 | E1 < E2 ? R2.getEnd() : R1.getEnd()); |
| 297 | } |
| 298 | |
| 299 | // Returns the tokenFileRange for a given Location as a Token Range |
| 300 | // This is quite similar to getFileLoc in SourceManager as both use |
| 301 | // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). |
| 302 | // However: |
| 303 | // - We want to maintain the full range information as we move from one file to |
| 304 | // the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. |
| 305 | // - We want to split '>>' tokens as the lexer parses the '>>' in template |
| 306 | // instantiations as a '>>' instead of a '>'. |
| 307 | // There is also getExpansionRange but it simply calls |
| 308 | // getImmediateExpansionRange on the begin and ends separately which is wrong. |
| 309 | static SourceRange getTokenFileRange(SourceLocation Loc, |
| 310 | const SourceManager &SM, |
| 311 | const LangOptions &LangOpts) { |
| 312 | SourceRange FileRange = Loc; |
| 313 | while (!FileRange.getBegin().isFileID()) { |
| 314 | assert(!FileRange.getEnd().isFileID() && |
| 315 | "Both Begin and End should be MacroIDs."); |
| 316 | if (SM.isMacroArgExpansion(FileRange.getBegin())) { |
| 317 | FileRange.setBegin(SM.getImmediateSpellingLoc(FileRange.getBegin())); |
| 318 | FileRange.setEnd(SM.getImmediateSpellingLoc(FileRange.getEnd())); |
| 319 | } else { |
| 320 | SourceRange ExpansionRangeForBegin = toTokenRange( |
| 321 | SM.getImmediateExpansionRange(FileRange.getBegin()), SM, LangOpts); |
| 322 | SourceRange ExpansionRangeForEnd = toTokenRange( |
| 323 | SM.getImmediateExpansionRange(FileRange.getEnd()), SM, LangOpts); |
| 324 | FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, |
| 325 | SM, LangOpts); |
| 326 | } |
| 327 | } |
| 328 | return FileRange; |
| 329 | } |
| 330 | |
| 331 | llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM, |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 332 | const LangOptions &LangOpts, |
| 333 | SourceRange R) { |
Shaurya Gupta | 0d26d6f | 2019-07-12 11:42:31 +0000 | [diff] [blame] | 334 | SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); |
| 335 | if (!isValidFileRange(SM, R1)) |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 336 | return llvm::None; |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 337 | |
Shaurya Gupta | 0d26d6f | 2019-07-12 11:42:31 +0000 | [diff] [blame] | 338 | SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); |
| 339 | if (!isValidFileRange(SM, R2)) |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 340 | return llvm::None; |
Shaurya Gupta | 0d26d6f | 2019-07-12 11:42:31 +0000 | [diff] [blame] | 341 | |
| 342 | SourceRange Result = unionTokenRange(R1, R2, SM, LangOpts); |
| 343 | unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); |
| 344 | // Convert from closed token range to half-open (char) range |
| 345 | Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); |
| 346 | if (!isValidFileRange(SM, Result)) |
| 347 | return llvm::None; |
| 348 | |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 349 | return Result; |
| 350 | } |
| 351 | |
| 352 | llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { |
| 353 | assert(isValidFileRange(SM, R)); |
| 354 | bool Invalid = false; |
| 355 | auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid); |
| 356 | assert(!Invalid); |
| 357 | |
| 358 | size_t BeginOffset = SM.getFileOffset(R.getBegin()); |
| 359 | size_t EndOffset = SM.getFileOffset(R.getEnd()); |
| 360 | return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); |
| 361 | } |
| 362 | |
Ilya Biryukov | cce67a3 | 2019-01-29 14:17:36 +0000 | [diff] [blame] | 363 | llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, |
| 364 | Position P) { |
| 365 | llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer(); |
| 366 | auto Offset = |
| 367 | positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false); |
| 368 | if (!Offset) |
| 369 | return Offset.takeError(); |
| 370 | return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); |
| 371 | } |
| 372 | |
Ilya Biryukov | 71028b8 | 2018-03-12 15:28:22 +0000 | [diff] [blame] | 373 | Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) { |
| 374 | // Clang is 1-based, LSP uses 0-based indexes. |
| 375 | Position Begin = sourceLocToPosition(SM, R.getBegin()); |
| 376 | Position End = sourceLocToPosition(SM, R.getEnd()); |
| 377 | |
| 378 | return {Begin, End}; |
| 379 | } |
| 380 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 381 | std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 382 | size_t Offset) { |
| 383 | Offset = std::min(Code.size(), Offset); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 384 | llvm::StringRef Before = Code.substr(0, Offset); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 385 | int Lines = Before.count('\n'); |
| 386 | size_t PrevNL = Before.rfind('\n'); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 387 | size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); |
Sam McCall | a4962cc | 2018-04-27 11:59:28 +0000 | [diff] [blame] | 388 | return {Lines + 1, Offset - StartOfLine + 1}; |
| 389 | } |
| 390 | |
Ilya Biryukov | 4399878 | 2019-01-31 21:30:05 +0000 | [diff] [blame] | 391 | std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) { |
Marc-Andre Laperle | b387b6e | 2018-04-23 20:00:52 +0000 | [diff] [blame] | 392 | size_t Pos = QName.rfind("::"); |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 393 | if (Pos == llvm::StringRef::npos) |
| 394 | return {llvm::StringRef(), QName}; |
Marc-Andre Laperle | b387b6e | 2018-04-23 20:00:52 +0000 | [diff] [blame] | 395 | return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)}; |
| 396 | } |
| 397 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 398 | TextEdit replacementToEdit(llvm::StringRef Code, |
| 399 | const tooling::Replacement &R) { |
Eric Liu | 9133ecd | 2018-05-11 12:12:08 +0000 | [diff] [blame] | 400 | Range ReplacementRange = { |
| 401 | offsetToPosition(Code, R.getOffset()), |
| 402 | offsetToPosition(Code, R.getOffset() + R.getLength())}; |
| 403 | return {ReplacementRange, R.getReplacementText()}; |
| 404 | } |
| 405 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 406 | std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code, |
Eric Liu | 9133ecd | 2018-05-11 12:12:08 +0000 | [diff] [blame] | 407 | const tooling::Replacements &Repls) { |
| 408 | std::vector<TextEdit> Edits; |
| 409 | for (const auto &R : Repls) |
| 410 | Edits.push_back(replacementToEdit(Code, R)); |
| 411 | return Edits; |
| 412 | } |
| 413 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 414 | llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, |
| 415 | const SourceManager &SourceMgr) { |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 416 | if (!F) |
| 417 | return None; |
Simon Marchi | 25f1f73 | 2018-08-10 22:27:53 +0000 | [diff] [blame] | 418 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 419 | llvm::SmallString<128> FilePath = F->getName(); |
| 420 | if (!llvm::sys::path::is_absolute(FilePath)) { |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 421 | if (auto EC = |
Duncan P. N. Exon Smith | db8a742 | 2019-03-26 22:32:06 +0000 | [diff] [blame] | 422 | SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute( |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 423 | FilePath)) { |
| 424 | elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, |
| 425 | EC.message()); |
Sam McCall | c008af6 | 2018-10-20 15:30:37 +0000 | [diff] [blame] | 426 | return None; |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 427 | } |
| 428 | } |
Simon Marchi | 25f1f73 | 2018-08-10 22:27:53 +0000 | [diff] [blame] | 429 | |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 430 | // Handle the symbolic link path case where the current working directory |
| 431 | // (getCurrentWorkingDirectory) is a symlink./ We always want to the real |
| 432 | // file path (instead of the symlink path) for the C++ symbols. |
| 433 | // |
| 434 | // Consider the following example: |
| 435 | // |
| 436 | // src dir: /project/src/foo.h |
| 437 | // current working directory (symlink): /tmp/build -> /project/src/ |
| 438 | // |
| 439 | // The file path of Symbol is "/project/src/foo.h" instead of |
| 440 | // "/tmp/build/foo.h" |
| 441 | if (const DirectoryEntry *Dir = SourceMgr.getFileManager().getDirectory( |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 442 | llvm::sys::path::parent_path(FilePath))) { |
| 443 | llvm::SmallString<128> RealPath; |
| 444 | llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir); |
| 445 | llvm::sys::path::append(RealPath, DirName, |
| 446 | llvm::sys::path::filename(FilePath)); |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 447 | return RealPath.str().str(); |
Simon Marchi | 25f1f73 | 2018-08-10 22:27:53 +0000 | [diff] [blame] | 448 | } |
| 449 | |
Kadir Cetinkaya | dd67793 | 2018-12-19 10:46:21 +0000 | [diff] [blame] | 450 | return FilePath.str().str(); |
Marc-Andre Laperle | 1be6970 | 2018-07-05 19:35:01 +0000 | [diff] [blame] | 451 | } |
| 452 | |
Kadir Cetinkaya | 2f84d91 | 2018-08-08 08:59:29 +0000 | [diff] [blame] | 453 | TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, |
| 454 | const LangOptions &L) { |
| 455 | TextEdit Result; |
| 456 | Result.range = |
| 457 | halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L)); |
| 458 | Result.newText = FixIt.CodeToInsert; |
| 459 | return Result; |
| 460 | } |
| 461 | |
Haojian Wu | aa3ed5a | 2019-01-25 15:14:03 +0000 | [diff] [blame] | 462 | bool isRangeConsecutive(const Range &Left, const Range &Right) { |
Kadir Cetinkaya | a9c9d00 | 2018-08-13 08:23:01 +0000 | [diff] [blame] | 463 | return Left.end.line == Right.start.line && |
| 464 | Left.end.character == Right.start.character; |
| 465 | } |
| 466 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 467 | FileDigest digest(llvm::StringRef Content) { |
Sam McCall | 674d8a9 | 2019-07-08 11:33:17 +0000 | [diff] [blame] | 468 | uint64_t Hash{llvm::xxHash64(Content)}; |
| 469 | FileDigest Result; |
| 470 | for (unsigned I = 0; I < Result.size(); ++I) { |
| 471 | Result[I] = uint8_t(Hash); |
| 472 | Hash >>= 8; |
| 473 | } |
| 474 | return Result; |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 475 | } |
| 476 | |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 477 | llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 478 | bool Invalid = false; |
Ilya Biryukov | f2001aa | 2019-01-07 15:45:19 +0000 | [diff] [blame] | 479 | llvm::StringRef Content = SM.getBufferData(FID, &Invalid); |
Kadir Cetinkaya | d08eab4 | 2018-11-27 16:08:53 +0000 | [diff] [blame] | 480 | if (Invalid) |
| 481 | return None; |
| 482 | return digest(Content); |
| 483 | } |
| 484 | |
Eric Liu | dd66277 | 2019-01-28 14:01:55 +0000 | [diff] [blame] | 485 | format::FormatStyle getFormatStyleForFile(llvm::StringRef File, |
| 486 | llvm::StringRef Content, |
| 487 | llvm::vfs::FileSystem *FS) { |
| 488 | auto Style = format::getStyle(format::DefaultFormatStyle, File, |
| 489 | format::DefaultFallbackStyle, Content, FS); |
| 490 | if (!Style) { |
| 491 | log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, |
| 492 | Style.takeError()); |
| 493 | Style = format::getLLVMStyle(); |
| 494 | } |
| 495 | return *Style; |
| 496 | } |
| 497 | |
Haojian Wu | 12e194c | 2019-02-06 15:24:50 +0000 | [diff] [blame] | 498 | llvm::Expected<tooling::Replacements> |
| 499 | cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, |
| 500 | const format::FormatStyle &Style) { |
| 501 | auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style); |
| 502 | if (!CleanReplaces) |
| 503 | return CleanReplaces; |
| 504 | return formatReplacements(Code, std::move(*CleanReplaces), Style); |
| 505 | } |
| 506 | |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 507 | template <typename Action> |
| 508 | static void lex(llvm::StringRef Code, const format::FormatStyle &Style, |
| 509 | Action A) { |
| 510 | // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! |
| 511 | std::string NullTerminatedCode = Code.str(); |
| 512 | SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode); |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame] | 513 | auto &SM = FileSM.get(); |
| 514 | auto FID = SM.getMainFileID(); |
| 515 | Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style)); |
| 516 | Token Tok; |
| 517 | |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 518 | while (!Lex.LexFromRawLexer(Tok)) |
| 519 | A(Tok); |
| 520 | } |
| 521 | |
| 522 | llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, |
| 523 | const format::FormatStyle &Style) { |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame] | 524 | llvm::StringMap<unsigned> Identifiers; |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 525 | lex(Content, Style, [&](const clang::Token &Tok) { |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame] | 526 | switch (Tok.getKind()) { |
| 527 | case tok::identifier: |
| 528 | ++Identifiers[Tok.getIdentifierInfo()->getName()]; |
| 529 | break; |
| 530 | case tok::raw_identifier: |
| 531 | ++Identifiers[Tok.getRawIdentifier()]; |
| 532 | break; |
| 533 | default: |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 534 | break; |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame] | 535 | } |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 536 | }); |
Eric Liu | 00d99bd | 2019-04-11 09:36:36 +0000 | [diff] [blame] | 537 | return Identifiers; |
| 538 | } |
| 539 | |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 540 | namespace { |
| 541 | enum NamespaceEvent { |
| 542 | BeginNamespace, // namespace <ns> {. Payload is resolved <ns>. |
| 543 | EndNamespace, // } // namespace <ns>. Payload is resolved *outer* namespace. |
| 544 | UsingDirective // using namespace <ns>. Payload is unresolved <ns>. |
| 545 | }; |
| 546 | // Scans C++ source code for constructs that change the visible namespaces. |
| 547 | void parseNamespaceEvents( |
| 548 | llvm::StringRef Code, const format::FormatStyle &Style, |
| 549 | llvm::function_ref<void(NamespaceEvent, llvm::StringRef)> Callback) { |
| 550 | |
| 551 | // Stack of enclosing namespaces, e.g. {"clang", "clangd"} |
| 552 | std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd" |
| 553 | // Stack counts open braces. true if the brace opened a namespace. |
| 554 | std::vector<bool> BraceStack; |
| 555 | |
| 556 | enum { |
| 557 | Default, |
| 558 | Namespace, // just saw 'namespace' |
| 559 | NamespaceName, // just saw 'namespace' NSName |
| 560 | Using, // just saw 'using' |
| 561 | UsingNamespace, // just saw 'using namespace' |
| 562 | UsingNamespaceName, // just saw 'using namespace' NSName |
| 563 | } State = Default; |
| 564 | std::string NSName; |
| 565 | |
| 566 | lex(Code, Style, [&](const clang::Token &Tok) { |
| 567 | switch(Tok.getKind()) { |
| 568 | case tok::raw_identifier: |
| 569 | // In raw mode, this could be a keyword or a name. |
| 570 | switch (State) { |
| 571 | case UsingNamespace: |
| 572 | case UsingNamespaceName: |
| 573 | NSName.append(Tok.getRawIdentifier()); |
| 574 | State = UsingNamespaceName; |
| 575 | break; |
| 576 | case Namespace: |
| 577 | case NamespaceName: |
| 578 | NSName.append(Tok.getRawIdentifier()); |
| 579 | State = NamespaceName; |
| 580 | break; |
| 581 | case Using: |
| 582 | State = |
| 583 | (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default; |
| 584 | break; |
| 585 | case Default: |
| 586 | NSName.clear(); |
| 587 | if (Tok.getRawIdentifier() == "namespace") |
| 588 | State = Namespace; |
| 589 | else if (Tok.getRawIdentifier() == "using") |
| 590 | State = Using; |
| 591 | break; |
| 592 | } |
| 593 | break; |
| 594 | case tok::coloncolon: |
| 595 | // This can come at the beginning or in the middle of a namespace name. |
| 596 | switch (State) { |
| 597 | case UsingNamespace: |
| 598 | case UsingNamespaceName: |
| 599 | NSName.append("::"); |
| 600 | State = UsingNamespaceName; |
| 601 | break; |
| 602 | case NamespaceName: |
| 603 | NSName.append("::"); |
| 604 | State = NamespaceName; |
| 605 | break; |
| 606 | case Namespace: // Not legal here. |
| 607 | case Using: |
| 608 | case Default: |
| 609 | State = Default; |
| 610 | break; |
| 611 | } |
| 612 | break; |
| 613 | case tok::l_brace: |
| 614 | // Record which { started a namespace, so we know when } ends one. |
| 615 | if (State == NamespaceName) { |
| 616 | // Parsed: namespace <name> { |
| 617 | BraceStack.push_back(true); |
| 618 | Enclosing.push_back(NSName); |
| 619 | Callback(BeginNamespace, llvm::join(Enclosing, "::")); |
| 620 | } else { |
| 621 | // This case includes anonymous namespaces (State = Namespace). |
| 622 | // For our purposes, they're not namespaces and we ignore them. |
| 623 | BraceStack.push_back(false); |
| 624 | } |
| 625 | State = Default; |
| 626 | break; |
| 627 | case tok::r_brace: |
| 628 | // If braces are unmatched, we're going to be confused, but don't crash. |
| 629 | if (!BraceStack.empty()) { |
| 630 | if (BraceStack.back()) { |
| 631 | // Parsed: } // namespace |
| 632 | Enclosing.pop_back(); |
| 633 | Callback(EndNamespace, llvm::join(Enclosing, "::")); |
| 634 | } |
| 635 | BraceStack.pop_back(); |
| 636 | } |
| 637 | break; |
| 638 | case tok::semi: |
| 639 | if (State == UsingNamespaceName) |
| 640 | // Parsed: using namespace <name> ; |
| 641 | Callback(UsingDirective, llvm::StringRef(NSName)); |
| 642 | State = Default; |
| 643 | break; |
| 644 | default: |
| 645 | State = Default; |
| 646 | break; |
| 647 | } |
| 648 | }); |
| 649 | } |
| 650 | |
| 651 | // Returns the prefix namespaces of NS: {"" ... NS}. |
| 652 | llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) { |
| 653 | llvm::SmallVector<llvm::StringRef, 8> Results; |
| 654 | Results.push_back(NS.take_front(0)); |
| 655 | NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
| 656 | for (llvm::StringRef &R : Results) |
| 657 | R = NS.take_front(R.end() - NS.begin()); |
| 658 | return Results; |
| 659 | } |
| 660 | |
| 661 | } // namespace |
| 662 | |
| 663 | std::vector<std::string> visibleNamespaces(llvm::StringRef Code, |
| 664 | const format::FormatStyle &Style) { |
| 665 | std::string Current; |
| 666 | // Map from namespace to (resolved) namespaces introduced via using directive. |
| 667 | llvm::StringMap<llvm::StringSet<>> UsingDirectives; |
| 668 | |
| 669 | parseNamespaceEvents(Code, Style, |
| 670 | [&](NamespaceEvent Event, llvm::StringRef NS) { |
| 671 | switch (Event) { |
| 672 | case BeginNamespace: |
| 673 | case EndNamespace: |
| 674 | Current = NS; |
| 675 | break; |
| 676 | case UsingDirective: |
| 677 | if (NS.consume_front("::")) |
| 678 | UsingDirectives[Current].insert(NS); |
| 679 | else { |
| 680 | for (llvm::StringRef Enclosing : |
| 681 | ancestorNamespaces(Current)) { |
| 682 | if (Enclosing.empty()) |
| 683 | UsingDirectives[Current].insert(NS); |
| 684 | else |
| 685 | UsingDirectives[Current].insert( |
| 686 | (Enclosing + "::" + NS).str()); |
| 687 | } |
| 688 | } |
| 689 | break; |
| 690 | } |
| 691 | }); |
| 692 | |
| 693 | std::vector<std::string> Found; |
| 694 | for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { |
| 695 | Found.push_back(Enclosing); |
| 696 | auto It = UsingDirectives.find(Enclosing); |
| 697 | if (It != UsingDirectives.end()) |
| 698 | for (const auto& Used : It->second) |
| 699 | Found.push_back(Used.getKey()); |
| 700 | } |
| 701 | |
Sam McCall | c316b22 | 2019-04-26 07:45:49 +0000 | [diff] [blame] | 702 | llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { |
| 703 | if (Current == RHS) |
| 704 | return false; |
| 705 | if (Current == LHS) |
| 706 | return true; |
| 707 | return LHS < RHS; |
| 708 | }); |
| 709 | Found.erase(std::unique(Found.begin(), Found.end()), Found.end()); |
| 710 | return Found; |
| 711 | } |
| 712 | |
Sam McCall | 9fb22b2 | 2019-05-06 10:25:10 +0000 | [diff] [blame] | 713 | llvm::StringSet<> collectWords(llvm::StringRef Content) { |
| 714 | // We assume short words are not significant. |
| 715 | // We may want to consider other stopwords, e.g. language keywords. |
| 716 | // (A very naive implementation showed no benefit, but lexing might do better) |
| 717 | static constexpr int MinWordLength = 4; |
| 718 | |
| 719 | std::vector<CharRole> Roles(Content.size()); |
| 720 | calculateRoles(Content, Roles); |
| 721 | |
| 722 | llvm::StringSet<> Result; |
| 723 | llvm::SmallString<256> Word; |
| 724 | auto Flush = [&] { |
| 725 | if (Word.size() >= MinWordLength) { |
| 726 | for (char &C : Word) |
| 727 | C = llvm::toLower(C); |
| 728 | Result.insert(Word); |
| 729 | } |
| 730 | Word.clear(); |
| 731 | }; |
| 732 | for (unsigned I = 0; I < Content.size(); ++I) { |
| 733 | switch (Roles[I]) { |
| 734 | case Head: |
| 735 | Flush(); |
| 736 | LLVM_FALLTHROUGH; |
| 737 | case Tail: |
| 738 | Word.push_back(Content[I]); |
| 739 | break; |
| 740 | case Unknown: |
| 741 | case Separator: |
| 742 | Flush(); |
| 743 | break; |
| 744 | } |
| 745 | } |
| 746 | Flush(); |
| 747 | |
| 748 | return Result; |
| 749 | } |
| 750 | |
Haojian Wu | 9d34f45 | 2019-07-01 09:26:48 +0000 | [diff] [blame] | 751 | llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc, |
| 752 | Preprocessor &PP) { |
| 753 | const auto &SM = PP.getSourceManager(); |
| 754 | const auto &LangOpts = PP.getLangOpts(); |
| 755 | Token Result; |
| 756 | if (Lexer::getRawToken(SM.getSpellingLoc(Loc), Result, SM, LangOpts, false)) |
| 757 | return None; |
| 758 | if (Result.is(tok::raw_identifier)) |
| 759 | PP.LookUpIdentifierInfo(Result); |
| 760 | IdentifierInfo *IdentifierInfo = Result.getIdentifierInfo(); |
| 761 | if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) |
| 762 | return None; |
| 763 | |
| 764 | std::pair<FileID, unsigned int> DecLoc = SM.getDecomposedExpansionLoc(Loc); |
| 765 | // Get the definition just before the searched location so that a macro |
| 766 | // referenced in a '#undef MACRO' can still be found. |
| 767 | SourceLocation BeforeSearchedLocation = |
| 768 | SM.getMacroArgExpandedLocation(SM.getLocForStartOfFile(DecLoc.first) |
| 769 | .getLocWithOffset(DecLoc.second - 1)); |
| 770 | MacroDefinition MacroDef = |
| 771 | PP.getMacroDefinitionAtLoc(IdentifierInfo, BeforeSearchedLocation); |
| 772 | if (auto *MI = MacroDef.getMacroInfo()) |
| 773 | return DefinedMacro{IdentifierInfo->getName(), MI}; |
| 774 | return None; |
| 775 | } |
| 776 | |
Sam McCall | b536a2a | 2017-12-19 12:23:48 +0000 | [diff] [blame] | 777 | } // namespace clangd |
| 778 | } // namespace clang |