blob: 9be2257a964eebd729a1c53da5ca85f3cf6d79fa [file] [log] [blame]
Sam McCall8dc9dbb2018-10-15 13:34:10 +00001//===-- Background.cpp - Build an index in a background thread ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "index/Background.h"
11#include "ClangdUnit.h"
12#include "Compiler.h"
13#include "Logger.h"
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000014#include "Threading.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000015#include "Trace.h"
Eric Liuad588af2018-11-06 10:55:21 +000016#include "URI.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000017#include "index/IndexAction.h"
18#include "index/MemIndex.h"
19#include "index/Serialization.h"
Eric Liuad588af2018-11-06 10:55:21 +000020#include "index/SymbolCollector.h"
21#include "clang/Basic/SourceLocation.h"
22#include "clang/Basic/SourceManager.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000026#include "llvm/Support/SHA1.h"
27#include <random>
Eric Liuad588af2018-11-06 10:55:21 +000028#include <string>
Sam McCall8dc9dbb2018-10-15 13:34:10 +000029
30using namespace llvm;
31namespace clang {
32namespace clangd {
33
34BackgroundIndex::BackgroundIndex(Context BackgroundContext,
35 StringRef ResourceDir,
Eric Liu0b70a872018-10-22 15:37:58 +000036 const FileSystemProvider &FSProvider,
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000037 ArrayRef<std::string> URISchemes,
38 size_t ThreadPoolSize)
Sam McCallc008af62018-10-20 15:30:37 +000039 : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
Sam McCall8dc9dbb2018-10-15 13:34:10 +000040 FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000041 URISchemes(URISchemes) {
42 assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
43 while (ThreadPoolSize--) {
44 ThreadPool.emplace_back([this] { run(); });
45 // Set priority to low, since background indexing is a long running task we
46 // do not want to eat up cpu when there are any other high priority threads.
47 // FIXME: In the future we might want a more general way of handling this to
48 // support a tasks with various priorities.
49 setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
50 }
51}
Sam McCall8dc9dbb2018-10-15 13:34:10 +000052
53BackgroundIndex::~BackgroundIndex() {
54 stop();
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000055 for (auto &Thread : ThreadPool)
56 Thread.join();
Sam McCall8dc9dbb2018-10-15 13:34:10 +000057}
58
59void BackgroundIndex::stop() {
60 {
61 std::lock_guard<std::mutex> Lock(QueueMu);
62 ShouldStop = true;
63 }
64 QueueCV.notify_all();
65}
66
67void BackgroundIndex::run() {
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000068 WithContext Background(BackgroundContext.clone());
Sam McCall8dc9dbb2018-10-15 13:34:10 +000069 while (true) {
Sam McCallc008af62018-10-20 15:30:37 +000070 Optional<Task> Task;
Sam McCall8dc9dbb2018-10-15 13:34:10 +000071 {
72 std::unique_lock<std::mutex> Lock(QueueMu);
73 QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); });
74 if (ShouldStop) {
75 Queue.clear();
76 QueueCV.notify_all();
77 return;
78 }
79 ++NumActiveTasks;
80 Task = std::move(Queue.front());
81 Queue.pop_front();
82 }
83 (*Task)();
84 {
85 std::unique_lock<std::mutex> Lock(QueueMu);
86 assert(NumActiveTasks > 0 && "before decrementing");
87 --NumActiveTasks;
88 }
89 QueueCV.notify_all();
90 }
91}
92
93void BackgroundIndex::blockUntilIdleForTest() {
94 std::unique_lock<std::mutex> Lock(QueueMu);
95 QueueCV.wait(Lock, [&] { return Queue.empty() && NumActiveTasks == 0; });
96}
97
98void BackgroundIndex::enqueue(StringRef Directory,
99 tooling::CompileCommand Cmd) {
Sam McCallbca624a2018-10-16 09:05:13 +0000100 {
101 std::lock_guard<std::mutex> Lock(QueueMu);
102 enqueueLocked(std::move(Cmd));
103 }
104 QueueCV.notify_all();
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000105}
106
107void BackgroundIndex::enqueueAll(StringRef Directory,
108 const tooling::CompilationDatabase &CDB) {
109 trace::Span Tracer("BackgroundIndexEnqueueCDB");
110 // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
111 // from disk and enqueue the commands asynchronously?
112 auto Cmds = CDB.getAllCompileCommands();
113 SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
114 std::mt19937 Generator(std::random_device{}());
115 std::shuffle(Cmds.begin(), Cmds.end(), Generator);
116 log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory);
117 {
118 std::lock_guard<std::mutex> Lock(QueueMu);
119 for (auto &Cmd : Cmds)
120 enqueueLocked(std::move(Cmd));
121 }
122 QueueCV.notify_all();
123}
124
125void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
126 Queue.push_back(Bind(
127 [this](tooling::CompileCommand Cmd) {
128 std::string Filename = Cmd.Filename;
129 Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
130 if (auto Error = index(std::move(Cmd)))
131 log("Indexing {0} failed: {1}", Filename, std::move(Error));
132 },
133 std::move(Cmd)));
134}
135
Eric Liuad588af2018-11-06 10:55:21 +0000136static BackgroundIndex::FileDigest digest(StringRef Content) {
137 return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
138}
139
140static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
141 FileID FID) {
142 bool Invalid = false;
143 StringRef Content = SM.getBufferData(FID, &Invalid);
144 if (Invalid)
145 return None;
146 return digest(Content);
147}
148
149// Resolves URI to file paths with cache.
150class URIToFileCache {
151public:
152 URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
153
154 llvm::StringRef resolve(llvm::StringRef FileURI) {
155 auto I = URIToPathCache.try_emplace(FileURI);
156 if (I.second) {
157 auto U = URI::parse(FileURI);
158 if (!U) {
159 elog("Failed to parse URI {0}: {1}", FileURI, U.takeError());
160 assert(false && "Failed to parse URI");
161 return "";
162 }
163 auto Path = URI::resolve(*U, HintPath);
164 if (!Path) {
165 elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
166 assert(false && "Failed to resolve URI");
167 return "";
168 }
169 I.first->second = *Path;
170 }
171 return I.first->second;
172 }
173
174private:
175 std::string HintPath;
176 llvm::StringMap<std::string> URIToPathCache;
177};
178
179/// Given index results from a TU, only update files in \p FilesToUpdate.
180void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
181 RefSlab Refs,
182 const StringMap<FileDigest> &FilesToUpdate) {
183 // Partition symbols/references into files.
184 struct File {
185 DenseSet<const Symbol *> Symbols;
186 DenseSet<const Ref *> Refs;
187 };
188 StringMap<File> Files;
189 URIToFileCache URICache(MainFile);
190 for (const auto &Sym : Symbols) {
191 if (Sym.CanonicalDeclaration) {
192 auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
193 if (FilesToUpdate.count(DeclPath) != 0)
194 Files[DeclPath].Symbols.insert(&Sym);
195 }
196 // For symbols with different declaration and definition locations, we store
197 // the full symbol in both the header file and the implementation file, so
198 // that merging can tell the preferred symbols (from canonical headers) from
199 // other symbols (e.g. forward declarations).
200 if (Sym.Definition &&
201 Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
202 auto DefPath = URICache.resolve(Sym.Definition.FileURI);
203 if (FilesToUpdate.count(DefPath) != 0)
204 Files[DefPath].Symbols.insert(&Sym);
205 }
206 }
207 DenseMap<const Ref *, SymbolID> RefToIDs;
208 for (const auto &SymRefs : Refs) {
209 for (const auto &R : SymRefs.second) {
210 auto Path = URICache.resolve(R.Location.FileURI);
211 if (FilesToUpdate.count(Path) != 0) {
212 auto &F = Files[Path];
213 RefToIDs[&R] = SymRefs.first;
214 F.Refs.insert(&R);
215 }
216 }
217 }
218
219 // Build and store new slabs for each updated file.
220 for (const auto &F : Files) {
221 StringRef Path = F.first();
222 vlog("Update symbols in {0}", Path);
223 SymbolSlab::Builder Syms;
224 RefSlab::Builder Refs;
225 for (const auto *S : F.second.Symbols)
226 Syms.insert(*S);
227 for (const auto *R : F.second.Refs)
228 Refs.insert(RefToIDs[R], *R);
229
230 std::lock_guard<std::mutex> Lock(DigestsMu);
231 // This can override a newer version that is added in another thread,
232 // if this thread sees the older version but finishes later. This should be
233 // rare in practice.
234 IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
235 IndexedSymbols.update(Path,
236 make_unique<SymbolSlab>(std::move(Syms).build()),
237 make_unique<RefSlab>(std::move(Refs).build()));
238 }
239}
240
241// Creates a filter to not collect index results from files with unchanged
242// digests.
243// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate.
244decltype(SymbolCollector::Options::FileFilter) createFileFilter(
245 const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
246 llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
247 return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
248 StringRef Path;
249 if (const auto *F = SM.getFileEntryForID(FID))
250 Path = F->getName();
251 if (Path.empty())
252 return false; // Skip invalid files.
253 SmallString<128> AbsPath(Path);
254 if (std::error_code EC =
255 SM.getFileManager().getVirtualFileSystem()->makeAbsolute(AbsPath)) {
256 elog("Warning: could not make absolute file: {0}", EC.message());
257 return false; // Skip files without absolute path.
258 }
259 sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
260 auto Digest = digestFile(SM, FID);
261 if (!Digest)
262 return false;
263 auto D = FileDigests.find(AbsPath);
264 if (D != FileDigests.end() && D->second == Digest)
265 return false; // Skip files that haven't changed.
266
267 FilesToUpdate[AbsPath] = *Digest;
268 return true;
269 };
270}
271
Sam McCallc008af62018-10-20 15:30:37 +0000272Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000273 trace::Span Tracer("BackgroundIndex");
274 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
275 SmallString<128> AbsolutePath;
Sam McCallc008af62018-10-20 15:30:37 +0000276 if (sys::path::is_absolute(Cmd.Filename)) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000277 AbsolutePath = Cmd.Filename;
278 } else {
279 AbsolutePath = Cmd.Directory;
Sam McCallc008af62018-10-20 15:30:37 +0000280 sys::path::append(AbsolutePath, Cmd.Filename);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000281 }
282
283 auto FS = FSProvider.getFileSystem();
284 auto Buf = FS->getBufferForFile(AbsolutePath);
285 if (!Buf)
286 return errorCodeToError(Buf.getError());
Eric Liuad588af2018-11-06 10:55:21 +0000287 auto Hash = digest(Buf->get()->getBuffer());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000288
Eric Liuad588af2018-11-06 10:55:21 +0000289 // Take a snapshot of the digests to avoid locking for each file in the TU.
290 llvm::StringMap<FileDigest> DigestsSnapshot;
291 {
292 std::lock_guard<std::mutex> Lock(DigestsMu);
293 if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
294 vlog("No need to index {0}, already up to date", AbsolutePath);
295 return Error::success();
296 }
297
298 DigestsSnapshot = IndexedFileDigests;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000299 }
300
301 log("Indexing {0}", Cmd.Filename, toHex(Hash));
302 ParseInputs Inputs;
303 Inputs.FS = std::move(FS);
304 Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory);
305 Inputs.CompileCommand = std::move(Cmd);
306 auto CI = buildCompilerInvocation(Inputs);
307 if (!CI)
Sam McCallc008af62018-10-20 15:30:37 +0000308 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000309 "Couldn't build compiler invocation");
310 IgnoreDiagnostics IgnoreDiags;
311 auto Clang = prepareCompilerInstance(
312 std::move(CI), /*Preamble=*/nullptr, std::move(*Buf),
313 std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags);
314 if (!Clang)
Sam McCallc008af62018-10-20 15:30:37 +0000315 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000316 "Couldn't build compiler instance");
317
318 SymbolCollector::Options IndexOpts;
Eric Liuad588af2018-11-06 10:55:21 +0000319 IndexOpts.URISchemes = URISchemes;
320 StringMap<FileDigest> FilesToUpdate;
321 IndexOpts.FileFilter = createFileFilter(DigestsSnapshot, FilesToUpdate);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000322 SymbolSlab Symbols;
323 RefSlab Refs;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000324 auto Action = createStaticIndexingAction(
325 IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
326 [&](RefSlab R) { Refs = std::move(R); });
327
328 // We're going to run clang here, and it could potentially crash.
329 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
330 // but the leaky "recovery" is pretty scary too in a long-running process.
331 // If crashes are a real problem, maybe we should fork a child process.
332
333 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
334 if (!Action->BeginSourceFile(*Clang, Input))
Sam McCallc008af62018-10-20 15:30:37 +0000335 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000336 "BeginSourceFile() failed");
337 if (!Action->Execute())
Sam McCallc008af62018-10-20 15:30:37 +0000338 return createStringError(inconvertibleErrorCode(), "Execute() failed");
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000339 Action->EndSourceFile();
340
341 log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
Haojian Wu6ece6e72018-10-18 15:33:20 +0000342 Symbols.size(), Refs.numRefs());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000343 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
Haojian Wu6ece6e72018-10-18 15:33:20 +0000344 SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
Eric Liuad588af2018-11-06 10:55:21 +0000345 update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate);
346 {
347 // Make sure hash for the main file is always updated even if there is no
348 // index data in it.
349 std::lock_guard<std::mutex> Lock(DigestsMu);
350 IndexedFileDigests[AbsolutePath] = Hash;
351 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000352
353 // FIXME: this should rebuild once-in-a-while, not after every file.
354 // At that point we should use Dex, too.
355 vlog("Rebuilding automatic index");
Eric Liuad588af2018-11-06 10:55:21 +0000356 reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge,
357 URISchemes));
358
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000359 return Error::success();
360}
361
362} // namespace clangd
363} // namespace clang