blob: da96f8cad31c1af08e5676ddb92e91ff0fd17325 [file] [log] [blame]
Sam McCall8dc9dbb2018-10-15 13:34:10 +00001//===-- Background.cpp - Build an index in a background thread ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "index/Background.h"
11#include "ClangdUnit.h"
12#include "Compiler.h"
13#include "Logger.h"
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000014#include "Threading.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000015#include "Trace.h"
Eric Liuad588af2018-11-06 10:55:21 +000016#include "URI.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000017#include "index/IndexAction.h"
18#include "index/MemIndex.h"
19#include "index/Serialization.h"
Eric Liuad588af2018-11-06 10:55:21 +000020#include "index/SymbolCollector.h"
21#include "clang/Basic/SourceLocation.h"
22#include "clang/Basic/SourceManager.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000026#include "llvm/Support/SHA1.h"
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000027
28#include <memory>
29#include <queue>
Sam McCall8dc9dbb2018-10-15 13:34:10 +000030#include <random>
Eric Liuad588af2018-11-06 10:55:21 +000031#include <string>
Sam McCall8dc9dbb2018-10-15 13:34:10 +000032
33using namespace llvm;
34namespace clang {
35namespace clangd {
36
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000037BackgroundIndex::BackgroundIndex(
38 Context BackgroundContext, StringRef ResourceDir,
39 const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes,
40 BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize)
Sam McCallc008af62018-10-20 15:30:37 +000041 : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
Sam McCall8dc9dbb2018-10-15 13:34:10 +000042 FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000043 URISchemes(URISchemes),
44 IndexStorageFactory(std::move(IndexStorageFactory)) {
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000045 assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000046 assert(IndexStorageFactory && "Storage factory can not be null!");
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000047 while (ThreadPoolSize--) {
48 ThreadPool.emplace_back([this] { run(); });
49 // Set priority to low, since background indexing is a long running task we
50 // do not want to eat up cpu when there are any other high priority threads.
51 // FIXME: In the future we might want a more general way of handling this to
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000052 // support tasks with various priorities.
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000053 setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
54 }
55}
Sam McCall8dc9dbb2018-10-15 13:34:10 +000056
57BackgroundIndex::~BackgroundIndex() {
58 stop();
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000059 for (auto &Thread : ThreadPool)
60 Thread.join();
Sam McCall8dc9dbb2018-10-15 13:34:10 +000061}
62
63void BackgroundIndex::stop() {
64 {
65 std::lock_guard<std::mutex> Lock(QueueMu);
66 ShouldStop = true;
67 }
68 QueueCV.notify_all();
69}
70
71void BackgroundIndex::run() {
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000072 WithContext Background(BackgroundContext.clone());
Sam McCall8dc9dbb2018-10-15 13:34:10 +000073 while (true) {
Sam McCallc008af62018-10-20 15:30:37 +000074 Optional<Task> Task;
Sam McCall8dc9dbb2018-10-15 13:34:10 +000075 {
76 std::unique_lock<std::mutex> Lock(QueueMu);
77 QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); });
78 if (ShouldStop) {
79 Queue.clear();
80 QueueCV.notify_all();
81 return;
82 }
83 ++NumActiveTasks;
84 Task = std::move(Queue.front());
85 Queue.pop_front();
86 }
87 (*Task)();
88 {
89 std::unique_lock<std::mutex> Lock(QueueMu);
90 assert(NumActiveTasks > 0 && "before decrementing");
91 --NumActiveTasks;
92 }
93 QueueCV.notify_all();
94 }
95}
96
97void BackgroundIndex::blockUntilIdleForTest() {
98 std::unique_lock<std::mutex> Lock(QueueMu);
99 QueueCV.wait(Lock, [&] { return Queue.empty() && NumActiveTasks == 0; });
100}
101
102void BackgroundIndex::enqueue(StringRef Directory,
103 tooling::CompileCommand Cmd) {
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000104 BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
Sam McCallbca624a2018-10-16 09:05:13 +0000105 {
106 std::lock_guard<std::mutex> Lock(QueueMu);
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000107 enqueueLocked(std::move(Cmd), IndexStorage);
Sam McCallbca624a2018-10-16 09:05:13 +0000108 }
109 QueueCV.notify_all();
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000110}
111
112void BackgroundIndex::enqueueAll(StringRef Directory,
113 const tooling::CompilationDatabase &CDB) {
114 trace::Span Tracer("BackgroundIndexEnqueueCDB");
115 // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
116 // from disk and enqueue the commands asynchronously?
117 auto Cmds = CDB.getAllCompileCommands();
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000118 BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000119 SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
120 std::mt19937 Generator(std::random_device{}());
121 std::shuffle(Cmds.begin(), Cmds.end(), Generator);
122 log("Enqueueing {0} commands for indexing from {1}", Cmds.size(), Directory);
123 {
124 std::lock_guard<std::mutex> Lock(QueueMu);
125 for (auto &Cmd : Cmds)
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000126 enqueueLocked(std::move(Cmd), IndexStorage);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000127 }
128 QueueCV.notify_all();
129}
130
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000131void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd,
132 BackgroundIndexStorage *IndexStorage) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000133 Queue.push_back(Bind(
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000134 [this, IndexStorage](tooling::CompileCommand Cmd) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000135 std::string Filename = Cmd.Filename;
136 Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000137 if (auto Error = index(std::move(Cmd), IndexStorage))
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000138 log("Indexing {0} failed: {1}", Filename, std::move(Error));
139 },
Kadir Cetinkayaed18e782018-11-15 10:34:39 +0000140 std::move(Cmd)));
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000141}
142
Kadir Cetinkaya5a9b92c2018-11-15 10:34:47 +0000143static BackgroundIndex::FileDigest digest(StringRef Content) {
144 return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
145}
146
147static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
148 FileID FID) {
149 bool Invalid = false;
150 StringRef Content = SM.getBufferData(FID, &Invalid);
151 if (Invalid)
152 return None;
153 return digest(Content);
154}
155
Eric Liuad588af2018-11-06 10:55:21 +0000156// Resolves URI to file paths with cache.
157class URIToFileCache {
158public:
159 URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
160
161 llvm::StringRef resolve(llvm::StringRef FileURI) {
162 auto I = URIToPathCache.try_emplace(FileURI);
163 if (I.second) {
164 auto U = URI::parse(FileURI);
165 if (!U) {
166 elog("Failed to parse URI {0}: {1}", FileURI, U.takeError());
167 assert(false && "Failed to parse URI");
168 return "";
169 }
170 auto Path = URI::resolve(*U, HintPath);
171 if (!Path) {
172 elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
173 assert(false && "Failed to resolve URI");
174 return "";
175 }
176 I.first->second = *Path;
177 }
178 return I.first->second;
179 }
180
181private:
182 std::string HintPath;
183 llvm::StringMap<std::string> URIToPathCache;
184};
185
186/// Given index results from a TU, only update files in \p FilesToUpdate.
187void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
188 RefSlab Refs,
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000189 const StringMap<FileDigest> &FilesToUpdate,
190 BackgroundIndexStorage *IndexStorage) {
Eric Liuad588af2018-11-06 10:55:21 +0000191 // Partition symbols/references into files.
192 struct File {
193 DenseSet<const Symbol *> Symbols;
194 DenseSet<const Ref *> Refs;
195 };
196 StringMap<File> Files;
197 URIToFileCache URICache(MainFile);
198 for (const auto &Sym : Symbols) {
199 if (Sym.CanonicalDeclaration) {
200 auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
201 if (FilesToUpdate.count(DeclPath) != 0)
202 Files[DeclPath].Symbols.insert(&Sym);
203 }
204 // For symbols with different declaration and definition locations, we store
205 // the full symbol in both the header file and the implementation file, so
206 // that merging can tell the preferred symbols (from canonical headers) from
207 // other symbols (e.g. forward declarations).
208 if (Sym.Definition &&
209 Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
210 auto DefPath = URICache.resolve(Sym.Definition.FileURI);
211 if (FilesToUpdate.count(DefPath) != 0)
212 Files[DefPath].Symbols.insert(&Sym);
213 }
214 }
215 DenseMap<const Ref *, SymbolID> RefToIDs;
216 for (const auto &SymRefs : Refs) {
217 for (const auto &R : SymRefs.second) {
218 auto Path = URICache.resolve(R.Location.FileURI);
219 if (FilesToUpdate.count(Path) != 0) {
220 auto &F = Files[Path];
221 RefToIDs[&R] = SymRefs.first;
222 F.Refs.insert(&R);
223 }
224 }
225 }
226
227 // Build and store new slabs for each updated file.
228 for (const auto &F : Files) {
229 StringRef Path = F.first();
230 vlog("Update symbols in {0}", Path);
231 SymbolSlab::Builder Syms;
232 RefSlab::Builder Refs;
233 for (const auto *S : F.second.Symbols)
234 Syms.insert(*S);
235 for (const auto *R : F.second.Refs)
236 Refs.insert(RefToIDs[R], *R);
237
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000238 auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
239 auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
240
241 auto Hash = FilesToUpdate.lookup(Path);
242 // We need to store shards before updating the index, since the latter
243 // consumes slabs.
244 // FIXME: Store Hash in the Shard.
245 if (IndexStorage) {
246 IndexFileOut Shard;
247 Shard.Symbols = SS.get();
248 Shard.Refs = RS.get();
249 if (auto Error = IndexStorage->storeShard(Path, Shard))
250 elog("Failed to write background-index shard for file {0}: {1}", Path,
251 std::move(Error));
252 }
253
Eric Liuad588af2018-11-06 10:55:21 +0000254 std::lock_guard<std::mutex> Lock(DigestsMu);
255 // This can override a newer version that is added in another thread,
256 // if this thread sees the older version but finishes later. This should be
257 // rare in practice.
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000258 IndexedFileDigests[Path] = Hash;
259 IndexedSymbols.update(Path, std::move(SS), std::move(RS));
Eric Liuad588af2018-11-06 10:55:21 +0000260 }
261}
262
263// Creates a filter to not collect index results from files with unchanged
264// digests.
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000265// \p FileDigests contains file digests for the current indexed files, and all
266// changed files will be added to \p FilesToUpdate.
Eric Liuad588af2018-11-06 10:55:21 +0000267decltype(SymbolCollector::Options::FileFilter) createFileFilter(
268 const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
269 llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
270 return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
271 StringRef Path;
272 if (const auto *F = SM.getFileEntryForID(FID))
273 Path = F->getName();
274 if (Path.empty())
275 return false; // Skip invalid files.
276 SmallString<128> AbsPath(Path);
277 if (std::error_code EC =
278 SM.getFileManager().getVirtualFileSystem()->makeAbsolute(AbsPath)) {
279 elog("Warning: could not make absolute file: {0}", EC.message());
280 return false; // Skip files without absolute path.
281 }
282 sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
283 auto Digest = digestFile(SM, FID);
284 if (!Digest)
285 return false;
286 auto D = FileDigests.find(AbsPath);
287 if (D != FileDigests.end() && D->second == Digest)
288 return false; // Skip files that haven't changed.
289
290 FilesToUpdate[AbsPath] = *Digest;
291 return true;
292 };
293}
294
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000295Error BackgroundIndex::index(tooling::CompileCommand Cmd,
296 BackgroundIndexStorage *IndexStorage) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000297 trace::Span Tracer("BackgroundIndex");
298 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
Kadir Cetinkayaed18e782018-11-15 10:34:39 +0000299 SmallString<128> AbsolutePath;
300 if (sys::path::is_absolute(Cmd.Filename)) {
301 AbsolutePath = Cmd.Filename;
302 } else {
303 AbsolutePath = Cmd.Directory;
304 sys::path::append(AbsolutePath, Cmd.Filename);
305 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000306
307 auto FS = FSProvider.getFileSystem();
308 auto Buf = FS->getBufferForFile(AbsolutePath);
309 if (!Buf)
310 return errorCodeToError(Buf.getError());
Eric Liuad588af2018-11-06 10:55:21 +0000311 auto Hash = digest(Buf->get()->getBuffer());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000312
Eric Liuad588af2018-11-06 10:55:21 +0000313 // Take a snapshot of the digests to avoid locking for each file in the TU.
314 llvm::StringMap<FileDigest> DigestsSnapshot;
315 {
316 std::lock_guard<std::mutex> Lock(DigestsMu);
317 if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
318 vlog("No need to index {0}, already up to date", AbsolutePath);
319 return Error::success();
320 }
321
322 DigestsSnapshot = IndexedFileDigests;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000323 }
324
325 log("Indexing {0}", Cmd.Filename, toHex(Hash));
326 ParseInputs Inputs;
327 Inputs.FS = std::move(FS);
328 Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory);
329 Inputs.CompileCommand = std::move(Cmd);
330 auto CI = buildCompilerInvocation(Inputs);
331 if (!CI)
Sam McCallc008af62018-10-20 15:30:37 +0000332 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000333 "Couldn't build compiler invocation");
334 IgnoreDiagnostics IgnoreDiags;
335 auto Clang = prepareCompilerInstance(
336 std::move(CI), /*Preamble=*/nullptr, std::move(*Buf),
337 std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags);
338 if (!Clang)
Sam McCallc008af62018-10-20 15:30:37 +0000339 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000340 "Couldn't build compiler instance");
341
342 SymbolCollector::Options IndexOpts;
Eric Liuad588af2018-11-06 10:55:21 +0000343 IndexOpts.URISchemes = URISchemes;
344 StringMap<FileDigest> FilesToUpdate;
345 IndexOpts.FileFilter = createFileFilter(DigestsSnapshot, FilesToUpdate);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000346 SymbolSlab Symbols;
347 RefSlab Refs;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000348 auto Action = createStaticIndexingAction(
349 IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
350 [&](RefSlab R) { Refs = std::move(R); });
351
352 // We're going to run clang here, and it could potentially crash.
353 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
354 // but the leaky "recovery" is pretty scary too in a long-running process.
355 // If crashes are a real problem, maybe we should fork a child process.
356
357 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
358 if (!Action->BeginSourceFile(*Clang, Input))
Sam McCallc008af62018-10-20 15:30:37 +0000359 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000360 "BeginSourceFile() failed");
361 if (!Action->Execute())
Sam McCallc008af62018-10-20 15:30:37 +0000362 return createStringError(inconvertibleErrorCode(), "Execute() failed");
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000363 Action->EndSourceFile();
364
365 log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
Haojian Wu6ece6e72018-10-18 15:33:20 +0000366 Symbols.size(), Refs.numRefs());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000367 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
Haojian Wu6ece6e72018-10-18 15:33:20 +0000368 SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000369 update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
370 IndexStorage);
Eric Liuad588af2018-11-06 10:55:21 +0000371 {
372 // Make sure hash for the main file is always updated even if there is no
373 // index data in it.
374 std::lock_guard<std::mutex> Lock(DigestsMu);
375 IndexedFileDigests[AbsolutePath] = Hash;
376 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000377
378 // FIXME: this should rebuild once-in-a-while, not after every file.
379 // At that point we should use Dex, too.
380 vlog("Rebuilding automatic index");
Eric Liuad588af2018-11-06 10:55:21 +0000381 reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge,
382 URISchemes));
383
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000384 return Error::success();
385}
386
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000387} // namespace clangd
388} // namespace clang