blob: 1999fcd32614406211ebb9c1fa84c472766702b8 [file] [log] [blame]
Sam McCallb536a2a2017-12-19 12:23:48 +00001//===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Sam McCallb536a2a2017-12-19 12:23:48 +00006//
7//===----------------------------------------------------------------------===//
8#include "SourceCode.h"
9
Sam McCalla69698f2019-03-27 17:47:49 +000010#include "Context.h"
Marc-Andre Laperle1be69702018-07-05 19:35:01 +000011#include "Logger.h"
Sam McCalla69698f2019-03-27 17:47:49 +000012#include "Protocol.h"
Marc-Andre Laperle1be69702018-07-05 19:35:01 +000013#include "clang/AST/ASTContext.h"
Marc-Andre Laperle63a10982018-02-21 02:39:08 +000014#include "clang/Basic/SourceManager.h"
Sam McCallc316b222019-04-26 07:45:49 +000015#include "clang/Basic/TokenKinds.h"
16#include "clang/Format/Format.h"
Marc-Andre Laperle1be69702018-07-05 19:35:01 +000017#include "clang/Lex/Lexer.h"
Ilya Biryukov43998782019-01-31 21:30:05 +000018#include "llvm/ADT/None.h"
Sam McCallc316b222019-04-26 07:45:49 +000019#include "llvm/ADT/StringExtras.h"
Ilya Biryukov43998782019-01-31 21:30:05 +000020#include "llvm/ADT/StringRef.h"
Simon Marchi766338a2018-03-21 14:36:46 +000021#include "llvm/Support/Errc.h"
22#include "llvm/Support/Error.h"
Sam McCall8b25d222019-03-28 14:37:51 +000023#include "llvm/Support/ErrorHandling.h"
Marc-Andre Laperle1be69702018-07-05 19:35:01 +000024#include "llvm/Support/Path.h"
Sam McCallc316b222019-04-26 07:45:49 +000025#include <algorithm>
Marc-Andre Laperle63a10982018-02-21 02:39:08 +000026
Sam McCallb536a2a2017-12-19 12:23:48 +000027namespace clang {
28namespace clangd {
Sam McCallb536a2a2017-12-19 12:23:48 +000029
Sam McCalla4962cc2018-04-27 11:59:28 +000030// Here be dragons. LSP positions use columns measured in *UTF-16 code units*!
31// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
32
33// Iterates over unicode codepoints in the (UTF-8) string. For each,
34// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
35// Returns true if CB returned true, false if we hit the end of string.
36template <typename Callback>
Ilya Biryukovf2001aa2019-01-07 15:45:19 +000037static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
Sam McCall8b25d222019-03-28 14:37:51 +000038 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
39 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
Sam McCalla4962cc2018-04-27 11:59:28 +000040 for (size_t I = 0; I < U8.size();) {
41 unsigned char C = static_cast<unsigned char>(U8[I]);
42 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
43 if (CB(1, 1))
44 return true;
45 ++I;
46 continue;
47 }
48 // This convenient property of UTF-8 holds for all non-ASCII characters.
Ilya Biryukovf2001aa2019-01-07 15:45:19 +000049 size_t UTF8Length = llvm::countLeadingOnes(C);
Sam McCalla4962cc2018-04-27 11:59:28 +000050 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
51 // 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
52 assert((UTF8Length >= 2 && UTF8Length <= 4) &&
53 "Invalid UTF-8, or transcoding bug?");
54 I += UTF8Length; // Skip over all trailing bytes.
55 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
56 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
57 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
58 return true;
59 }
60 return false;
61}
62
Sam McCall8b25d222019-03-28 14:37:51 +000063// Returns the byte offset into the string that is an offset of \p Units in
64// the specified encoding.
65// Conceptually, this converts to the encoding, truncates to CodeUnits,
66// converts back to UTF-8, and returns the length in bytes.
67static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
68 bool &Valid) {
69 Valid = Units >= 0;
70 if (Units <= 0)
71 return 0;
Sam McCalla4962cc2018-04-27 11:59:28 +000072 size_t Result = 0;
Sam McCall8b25d222019-03-28 14:37:51 +000073 switch (Enc) {
74 case OffsetEncoding::UTF8:
75 Result = Units;
76 break;
77 case OffsetEncoding::UTF16:
78 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
79 Result += U8Len;
80 Units -= U16Len;
81 return Units <= 0;
82 });
83 if (Units < 0) // Offset in the middle of a surrogate pair.
84 Valid = false;
85 break;
86 case OffsetEncoding::UTF32:
87 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
88 Result += U8Len;
89 Units--;
90 return Units <= 0;
91 });
92 break;
93 case OffsetEncoding::UnsupportedEncoding:
94 llvm_unreachable("unsupported encoding");
95 }
Sam McCalla4962cc2018-04-27 11:59:28 +000096 // Don't return an out-of-range index if we overran.
Sam McCall8b25d222019-03-28 14:37:51 +000097 if (Result > U8.size()) {
98 Valid = false;
99 return U8.size();
100 }
101 return Result;
Sam McCalla4962cc2018-04-27 11:59:28 +0000102}
103
Sam McCalla69698f2019-03-27 17:47:49 +0000104Key<OffsetEncoding> kCurrentOffsetEncoding;
Sam McCall8b25d222019-03-28 14:37:51 +0000105static OffsetEncoding lspEncoding() {
Sam McCalla69698f2019-03-27 17:47:49 +0000106 auto *Enc = Context::current().get(kCurrentOffsetEncoding);
Sam McCall8b25d222019-03-28 14:37:51 +0000107 return Enc ? *Enc : OffsetEncoding::UTF16;
Sam McCalla69698f2019-03-27 17:47:49 +0000108}
109
Sam McCalla4962cc2018-04-27 11:59:28 +0000110// Like most strings in clangd, the input is UTF-8 encoded.
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000111size_t lspLength(llvm::StringRef Code) {
Sam McCalla4962cc2018-04-27 11:59:28 +0000112 size_t Count = 0;
Sam McCall8b25d222019-03-28 14:37:51 +0000113 switch (lspEncoding()) {
114 case OffsetEncoding::UTF8:
115 Count = Code.size();
116 break;
117 case OffsetEncoding::UTF16:
118 iterateCodepoints(Code, [&](int U8Len, int U16Len) {
119 Count += U16Len;
120 return false;
121 });
122 break;
123 case OffsetEncoding::UTF32:
124 iterateCodepoints(Code, [&](int U8Len, int U16Len) {
125 ++Count;
126 return false;
127 });
128 break;
129 case OffsetEncoding::UnsupportedEncoding:
130 llvm_unreachable("unsupported encoding");
131 }
Sam McCalla4962cc2018-04-27 11:59:28 +0000132 return Count;
133}
134
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000135llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
136 bool AllowColumnsBeyondLineLength) {
Sam McCallb536a2a2017-12-19 12:23:48 +0000137 if (P.line < 0)
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000138 return llvm::make_error<llvm::StringError>(
139 llvm::formatv("Line value can't be negative ({0})", P.line),
140 llvm::errc::invalid_argument);
Simon Marchi766338a2018-03-21 14:36:46 +0000141 if (P.character < 0)
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000142 return llvm::make_error<llvm::StringError>(
143 llvm::formatv("Character value can't be negative ({0})", P.character),
144 llvm::errc::invalid_argument);
Sam McCallb536a2a2017-12-19 12:23:48 +0000145 size_t StartOfLine = 0;
146 for (int I = 0; I != P.line; ++I) {
147 size_t NextNL = Code.find('\n', StartOfLine);
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000148 if (NextNL == llvm::StringRef::npos)
149 return llvm::make_error<llvm::StringError>(
150 llvm::formatv("Line value is out of range ({0})", P.line),
151 llvm::errc::invalid_argument);
Sam McCallb536a2a2017-12-19 12:23:48 +0000152 StartOfLine = NextNL + 1;
153 }
Sam McCalla69698f2019-03-27 17:47:49 +0000154 StringRef Line =
155 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
Simon Marchi766338a2018-03-21 14:36:46 +0000156
Sam McCall8b25d222019-03-28 14:37:51 +0000157 // P.character may be in UTF-16, transcode if necessary.
Sam McCalla4962cc2018-04-27 11:59:28 +0000158 bool Valid;
Sam McCall8b25d222019-03-28 14:37:51 +0000159 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
Sam McCalla4962cc2018-04-27 11:59:28 +0000160 if (!Valid && !AllowColumnsBeyondLineLength)
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000161 return llvm::make_error<llvm::StringError>(
Sam McCall8b25d222019-03-28 14:37:51 +0000162 llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
163 P.character, P.line),
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000164 llvm::errc::invalid_argument);
Sam McCall8b25d222019-03-28 14:37:51 +0000165 return StartOfLine + ByteInLine;
Sam McCallb536a2a2017-12-19 12:23:48 +0000166}
167
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000168Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
Sam McCallb536a2a2017-12-19 12:23:48 +0000169 Offset = std::min(Code.size(), Offset);
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000170 llvm::StringRef Before = Code.substr(0, Offset);
Sam McCallb536a2a2017-12-19 12:23:48 +0000171 int Lines = Before.count('\n');
172 size_t PrevNL = Before.rfind('\n');
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000173 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Ilya Biryukov7beea3a2018-02-14 10:52:04 +0000174 Position Pos;
175 Pos.line = Lines;
Sam McCall71891122018-10-23 11:51:53 +0000176 Pos.character = lspLength(Before.substr(StartOfLine));
Ilya Biryukov7beea3a2018-02-14 10:52:04 +0000177 return Pos;
Sam McCallb536a2a2017-12-19 12:23:48 +0000178}
179
Marc-Andre Laperle63a10982018-02-21 02:39:08 +0000180Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
Sam McCalla4962cc2018-04-27 11:59:28 +0000181 // We use the SourceManager's line tables, but its column number is in bytes.
182 FileID FID;
183 unsigned Offset;
184 std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
Marc-Andre Laperle63a10982018-02-21 02:39:08 +0000185 Position P;
Sam McCalla4962cc2018-04-27 11:59:28 +0000186 P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1;
187 bool Invalid = false;
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000188 llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
Sam McCalla4962cc2018-04-27 11:59:28 +0000189 if (!Invalid) {
190 auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
191 auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
Sam McCall71891122018-10-23 11:51:53 +0000192 P.character = lspLength(LineSoFar);
Sam McCalla4962cc2018-04-27 11:59:28 +0000193 }
Marc-Andre Laperle63a10982018-02-21 02:39:08 +0000194 return P;
195}
196
Ilya Biryukov43998782019-01-31 21:30:05 +0000197bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
198 if (!R.getBegin().isValid() || !R.getEnd().isValid())
199 return false;
200
201 FileID BeginFID;
202 size_t BeginOffset = 0;
203 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
204
205 FileID EndFID;
206 size_t EndOffset = 0;
207 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
208
209 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
210}
211
212bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
213 SourceLocation L) {
214 assert(isValidFileRange(Mgr, R));
215
216 FileID BeginFID;
217 size_t BeginOffset = 0;
218 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
219 size_t EndOffset = Mgr.getFileOffset(R.getEnd());
220
221 FileID LFid;
222 size_t LOffset;
223 std::tie(LFid, LOffset) = Mgr.getDecomposedLoc(L);
224 return BeginFID == LFid && BeginOffset <= LOffset && LOffset < EndOffset;
225}
226
227bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
228 SourceLocation L) {
229 return L == R.getEnd() || halfOpenRangeContains(Mgr, R, L);
230}
231
232llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
233 const LangOptions &LangOpts,
234 SourceRange R) {
235 auto Begin = Mgr.getFileLoc(R.getBegin());
236 if (Begin.isInvalid())
237 return llvm::None;
238 auto End = Mgr.getFileLoc(R.getEnd());
239 if (End.isInvalid())
240 return llvm::None;
241 End = Lexer::getLocForEndOfToken(End, 0, Mgr, LangOpts);
242
243 SourceRange Result(Begin, End);
244 if (!isValidFileRange(Mgr, Result))
245 return llvm::None;
246 return Result;
247}
248
249llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
250 assert(isValidFileRange(SM, R));
251 bool Invalid = false;
252 auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
253 assert(!Invalid);
254
255 size_t BeginOffset = SM.getFileOffset(R.getBegin());
256 size_t EndOffset = SM.getFileOffset(R.getEnd());
257 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
258}
259
Ilya Biryukovcce67a32019-01-29 14:17:36 +0000260llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
261 Position P) {
262 llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
263 auto Offset =
264 positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false);
265 if (!Offset)
266 return Offset.takeError();
267 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
268}
269
Ilya Biryukov71028b82018-03-12 15:28:22 +0000270Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
271 // Clang is 1-based, LSP uses 0-based indexes.
272 Position Begin = sourceLocToPosition(SM, R.getBegin());
273 Position End = sourceLocToPosition(SM, R.getEnd());
274
275 return {Begin, End};
276}
277
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000278std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
Sam McCalla4962cc2018-04-27 11:59:28 +0000279 size_t Offset) {
280 Offset = std::min(Code.size(), Offset);
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000281 llvm::StringRef Before = Code.substr(0, Offset);
Sam McCalla4962cc2018-04-27 11:59:28 +0000282 int Lines = Before.count('\n');
283 size_t PrevNL = Before.rfind('\n');
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000284 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
Sam McCalla4962cc2018-04-27 11:59:28 +0000285 return {Lines + 1, Offset - StartOfLine + 1};
286}
287
Ilya Biryukov43998782019-01-31 21:30:05 +0000288std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
Marc-Andre Laperleb387b6e2018-04-23 20:00:52 +0000289 size_t Pos = QName.rfind("::");
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000290 if (Pos == llvm::StringRef::npos)
291 return {llvm::StringRef(), QName};
Marc-Andre Laperleb387b6e2018-04-23 20:00:52 +0000292 return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)};
293}
294
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000295TextEdit replacementToEdit(llvm::StringRef Code,
296 const tooling::Replacement &R) {
Eric Liu9133ecd2018-05-11 12:12:08 +0000297 Range ReplacementRange = {
298 offsetToPosition(Code, R.getOffset()),
299 offsetToPosition(Code, R.getOffset() + R.getLength())};
300 return {ReplacementRange, R.getReplacementText()};
301}
302
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000303std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
Eric Liu9133ecd2018-05-11 12:12:08 +0000304 const tooling::Replacements &Repls) {
305 std::vector<TextEdit> Edits;
306 for (const auto &R : Repls)
307 Edits.push_back(replacementToEdit(Code, R));
308 return Edits;
309}
310
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000311llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
312 const SourceManager &SourceMgr) {
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000313 if (!F)
314 return None;
Simon Marchi25f1f732018-08-10 22:27:53 +0000315
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000316 llvm::SmallString<128> FilePath = F->getName();
317 if (!llvm::sys::path::is_absolute(FilePath)) {
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000318 if (auto EC =
Duncan P. N. Exon Smithdb8a7422019-03-26 22:32:06 +0000319 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000320 FilePath)) {
321 elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
322 EC.message());
Sam McCallc008af62018-10-20 15:30:37 +0000323 return None;
Marc-Andre Laperle1be69702018-07-05 19:35:01 +0000324 }
325 }
Simon Marchi25f1f732018-08-10 22:27:53 +0000326
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000327 // Handle the symbolic link path case where the current working directory
328 // (getCurrentWorkingDirectory) is a symlink./ We always want to the real
329 // file path (instead of the symlink path) for the C++ symbols.
330 //
331 // Consider the following example:
332 //
333 // src dir: /project/src/foo.h
334 // current working directory (symlink): /tmp/build -> /project/src/
335 //
336 // The file path of Symbol is "/project/src/foo.h" instead of
337 // "/tmp/build/foo.h"
338 if (const DirectoryEntry *Dir = SourceMgr.getFileManager().getDirectory(
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000339 llvm::sys::path::parent_path(FilePath))) {
340 llvm::SmallString<128> RealPath;
341 llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir);
342 llvm::sys::path::append(RealPath, DirName,
343 llvm::sys::path::filename(FilePath));
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000344 return RealPath.str().str();
Simon Marchi25f1f732018-08-10 22:27:53 +0000345 }
346
Kadir Cetinkayadd677932018-12-19 10:46:21 +0000347 return FilePath.str().str();
Marc-Andre Laperle1be69702018-07-05 19:35:01 +0000348}
349
Kadir Cetinkaya2f84d912018-08-08 08:59:29 +0000350TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
351 const LangOptions &L) {
352 TextEdit Result;
353 Result.range =
354 halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
355 Result.newText = FixIt.CodeToInsert;
356 return Result;
357}
358
Haojian Wuaa3ed5a2019-01-25 15:14:03 +0000359bool isRangeConsecutive(const Range &Left, const Range &Right) {
Kadir Cetinkayaa9c9d002018-08-13 08:23:01 +0000360 return Left.end.line == Right.start.line &&
361 Left.end.character == Right.start.character;
362}
363
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000364FileDigest digest(llvm::StringRef Content) {
Kadir Cetinkayad08eab42018-11-27 16:08:53 +0000365 return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
366}
367
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000368llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
Kadir Cetinkayad08eab42018-11-27 16:08:53 +0000369 bool Invalid = false;
Ilya Biryukovf2001aa2019-01-07 15:45:19 +0000370 llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
Kadir Cetinkayad08eab42018-11-27 16:08:53 +0000371 if (Invalid)
372 return None;
373 return digest(Content);
374}
375
Eric Liudd662772019-01-28 14:01:55 +0000376format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
377 llvm::StringRef Content,
378 llvm::vfs::FileSystem *FS) {
379 auto Style = format::getStyle(format::DefaultFormatStyle, File,
380 format::DefaultFallbackStyle, Content, FS);
381 if (!Style) {
382 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
383 Style.takeError());
384 Style = format::getLLVMStyle();
385 }
386 return *Style;
387}
388
Haojian Wu12e194c2019-02-06 15:24:50 +0000389llvm::Expected<tooling::Replacements>
390cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
391 const format::FormatStyle &Style) {
392 auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
393 if (!CleanReplaces)
394 return CleanReplaces;
395 return formatReplacements(Code, std::move(*CleanReplaces), Style);
396}
397
Sam McCallc316b222019-04-26 07:45:49 +0000398template <typename Action>
399static void lex(llvm::StringRef Code, const format::FormatStyle &Style,
400 Action A) {
401 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
402 std::string NullTerminatedCode = Code.str();
403 SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
Eric Liu00d99bd2019-04-11 09:36:36 +0000404 auto &SM = FileSM.get();
405 auto FID = SM.getMainFileID();
406 Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
407 Token Tok;
408
Sam McCallc316b222019-04-26 07:45:49 +0000409 while (!Lex.LexFromRawLexer(Tok))
410 A(Tok);
411}
412
413llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
414 const format::FormatStyle &Style) {
Eric Liu00d99bd2019-04-11 09:36:36 +0000415 llvm::StringMap<unsigned> Identifiers;
Sam McCallc316b222019-04-26 07:45:49 +0000416 lex(Content, Style, [&](const clang::Token &Tok) {
Eric Liu00d99bd2019-04-11 09:36:36 +0000417 switch (Tok.getKind()) {
418 case tok::identifier:
419 ++Identifiers[Tok.getIdentifierInfo()->getName()];
420 break;
421 case tok::raw_identifier:
422 ++Identifiers[Tok.getRawIdentifier()];
423 break;
424 default:
Sam McCallc316b222019-04-26 07:45:49 +0000425 break;
Eric Liu00d99bd2019-04-11 09:36:36 +0000426 }
Sam McCallc316b222019-04-26 07:45:49 +0000427 });
Eric Liu00d99bd2019-04-11 09:36:36 +0000428 return Identifiers;
429}
430
Sam McCallc316b222019-04-26 07:45:49 +0000431namespace {
432enum NamespaceEvent {
433 BeginNamespace, // namespace <ns> {. Payload is resolved <ns>.
434 EndNamespace, // } // namespace <ns>. Payload is resolved *outer* namespace.
435 UsingDirective // using namespace <ns>. Payload is unresolved <ns>.
436};
437// Scans C++ source code for constructs that change the visible namespaces.
438void parseNamespaceEvents(
439 llvm::StringRef Code, const format::FormatStyle &Style,
440 llvm::function_ref<void(NamespaceEvent, llvm::StringRef)> Callback) {
441
442 // Stack of enclosing namespaces, e.g. {"clang", "clangd"}
443 std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
444 // Stack counts open braces. true if the brace opened a namespace.
445 std::vector<bool> BraceStack;
446
447 enum {
448 Default,
449 Namespace, // just saw 'namespace'
450 NamespaceName, // just saw 'namespace' NSName
451 Using, // just saw 'using'
452 UsingNamespace, // just saw 'using namespace'
453 UsingNamespaceName, // just saw 'using namespace' NSName
454 } State = Default;
455 std::string NSName;
456
457 lex(Code, Style, [&](const clang::Token &Tok) {
458 switch(Tok.getKind()) {
459 case tok::raw_identifier:
460 // In raw mode, this could be a keyword or a name.
461 switch (State) {
462 case UsingNamespace:
463 case UsingNamespaceName:
464 NSName.append(Tok.getRawIdentifier());
465 State = UsingNamespaceName;
466 break;
467 case Namespace:
468 case NamespaceName:
469 NSName.append(Tok.getRawIdentifier());
470 State = NamespaceName;
471 break;
472 case Using:
473 State =
474 (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
475 break;
476 case Default:
477 NSName.clear();
478 if (Tok.getRawIdentifier() == "namespace")
479 State = Namespace;
480 else if (Tok.getRawIdentifier() == "using")
481 State = Using;
482 break;
483 }
484 break;
485 case tok::coloncolon:
486 // This can come at the beginning or in the middle of a namespace name.
487 switch (State) {
488 case UsingNamespace:
489 case UsingNamespaceName:
490 NSName.append("::");
491 State = UsingNamespaceName;
492 break;
493 case NamespaceName:
494 NSName.append("::");
495 State = NamespaceName;
496 break;
497 case Namespace: // Not legal here.
498 case Using:
499 case Default:
500 State = Default;
501 break;
502 }
503 break;
504 case tok::l_brace:
505 // Record which { started a namespace, so we know when } ends one.
506 if (State == NamespaceName) {
507 // Parsed: namespace <name> {
508 BraceStack.push_back(true);
509 Enclosing.push_back(NSName);
510 Callback(BeginNamespace, llvm::join(Enclosing, "::"));
511 } else {
512 // This case includes anonymous namespaces (State = Namespace).
513 // For our purposes, they're not namespaces and we ignore them.
514 BraceStack.push_back(false);
515 }
516 State = Default;
517 break;
518 case tok::r_brace:
519 // If braces are unmatched, we're going to be confused, but don't crash.
520 if (!BraceStack.empty()) {
521 if (BraceStack.back()) {
522 // Parsed: } // namespace
523 Enclosing.pop_back();
524 Callback(EndNamespace, llvm::join(Enclosing, "::"));
525 }
526 BraceStack.pop_back();
527 }
528 break;
529 case tok::semi:
530 if (State == UsingNamespaceName)
531 // Parsed: using namespace <name> ;
532 Callback(UsingDirective, llvm::StringRef(NSName));
533 State = Default;
534 break;
535 default:
536 State = Default;
537 break;
538 }
539 });
540}
541
542// Returns the prefix namespaces of NS: {"" ... NS}.
543llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
544 llvm::SmallVector<llvm::StringRef, 8> Results;
545 Results.push_back(NS.take_front(0));
546 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
547 for (llvm::StringRef &R : Results)
548 R = NS.take_front(R.end() - NS.begin());
549 return Results;
550}
551
552} // namespace
553
554std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
555 const format::FormatStyle &Style) {
556 std::string Current;
557 // Map from namespace to (resolved) namespaces introduced via using directive.
558 llvm::StringMap<llvm::StringSet<>> UsingDirectives;
559
560 parseNamespaceEvents(Code, Style,
561 [&](NamespaceEvent Event, llvm::StringRef NS) {
562 switch (Event) {
563 case BeginNamespace:
564 case EndNamespace:
565 Current = NS;
566 break;
567 case UsingDirective:
568 if (NS.consume_front("::"))
569 UsingDirectives[Current].insert(NS);
570 else {
571 for (llvm::StringRef Enclosing :
572 ancestorNamespaces(Current)) {
573 if (Enclosing.empty())
574 UsingDirectives[Current].insert(NS);
575 else
576 UsingDirectives[Current].insert(
577 (Enclosing + "::" + NS).str());
578 }
579 }
580 break;
581 }
582 });
583
584 std::vector<std::string> Found;
585 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
586 Found.push_back(Enclosing);
587 auto It = UsingDirectives.find(Enclosing);
588 if (It != UsingDirectives.end())
589 for (const auto& Used : It->second)
590 Found.push_back(Used.getKey());
591 }
592
593
594 llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
595 if (Current == RHS)
596 return false;
597 if (Current == LHS)
598 return true;
599 return LHS < RHS;
600 });
601 Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
602 return Found;
603}
604
Sam McCallb536a2a2017-12-19 12:23:48 +0000605} // namespace clangd
606} // namespace clang