blob: 9d82e65d5d779d50119cb8c4b0fd44985ed501ec [file] [log] [blame]
Sam McCall8dc9dbb2018-10-15 13:34:10 +00001//===-- Background.cpp - Build an index in a background thread ------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "index/Background.h"
11#include "ClangdUnit.h"
12#include "Compiler.h"
13#include "Logger.h"
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000014#include "Threading.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000015#include "Trace.h"
Eric Liuad588af2018-11-06 10:55:21 +000016#include "URI.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000017#include "index/IndexAction.h"
18#include "index/MemIndex.h"
19#include "index/Serialization.h"
Eric Liuad588af2018-11-06 10:55:21 +000020#include "index/SymbolCollector.h"
21#include "clang/Basic/SourceLocation.h"
22#include "clang/Basic/SourceManager.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
Sam McCall8dc9dbb2018-10-15 13:34:10 +000026#include "llvm/Support/SHA1.h"
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000027
28#include <memory>
Sam McCall7d0e4842018-11-26 13:35:02 +000029#include <numeric>
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000030#include <queue>
Sam McCall8dc9dbb2018-10-15 13:34:10 +000031#include <random>
Eric Liuad588af2018-11-06 10:55:21 +000032#include <string>
Sam McCall8dc9dbb2018-10-15 13:34:10 +000033
34using namespace llvm;
35namespace clang {
36namespace clangd {
37
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000038BackgroundIndex::BackgroundIndex(
39 Context BackgroundContext, StringRef ResourceDir,
Sam McCall6e2d2a32018-11-26 09:51:50 +000040 const FileSystemProvider &FSProvider, const GlobalCompilationDatabase &CDB,
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000041 BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize)
Sam McCallc008af62018-10-20 15:30:37 +000042 : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
Sam McCall6e2d2a32018-11-26 09:51:50 +000043 FSProvider(FSProvider), CDB(CDB),
44 BackgroundContext(std::move(BackgroundContext)),
45 IndexStorageFactory(std::move(IndexStorageFactory)),
46 CommandsChanged(
47 CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
48 enqueue(ChangedFiles);
49 })) {
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000050 assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
Haojian Wu1bf52c52018-11-16 09:41:14 +000051 assert(this->IndexStorageFactory && "Storage factory can not be null!");
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000052 while (ThreadPoolSize--) {
53 ThreadPool.emplace_back([this] { run(); });
54 // Set priority to low, since background indexing is a long running task we
55 // do not want to eat up cpu when there are any other high priority threads.
56 // FIXME: In the future we might want a more general way of handling this to
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +000057 // support tasks with various priorities.
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000058 setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
59 }
60}
Sam McCall8dc9dbb2018-10-15 13:34:10 +000061
62BackgroundIndex::~BackgroundIndex() {
63 stop();
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000064 for (auto &Thread : ThreadPool)
65 Thread.join();
Sam McCall8dc9dbb2018-10-15 13:34:10 +000066}
67
68void BackgroundIndex::stop() {
69 {
70 std::lock_guard<std::mutex> Lock(QueueMu);
71 ShouldStop = true;
72 }
73 QueueCV.notify_all();
74}
75
76void BackgroundIndex::run() {
Kadir Cetinkaya6675be82018-10-30 12:13:27 +000077 WithContext Background(BackgroundContext.clone());
Sam McCall8dc9dbb2018-10-15 13:34:10 +000078 while (true) {
Sam McCallc008af62018-10-20 15:30:37 +000079 Optional<Task> Task;
Sam McCall8dc9dbb2018-10-15 13:34:10 +000080 {
81 std::unique_lock<std::mutex> Lock(QueueMu);
82 QueueCV.wait(Lock, [&] { return ShouldStop || !Queue.empty(); });
83 if (ShouldStop) {
84 Queue.clear();
85 QueueCV.notify_all();
86 return;
87 }
88 ++NumActiveTasks;
89 Task = std::move(Queue.front());
90 Queue.pop_front();
91 }
92 (*Task)();
93 {
94 std::unique_lock<std::mutex> Lock(QueueMu);
95 assert(NumActiveTasks > 0 && "before decrementing");
96 --NumActiveTasks;
97 }
98 QueueCV.notify_all();
99 }
100}
101
102void BackgroundIndex::blockUntilIdleForTest() {
103 std::unique_lock<std::mutex> Lock(QueueMu);
104 QueueCV.wait(Lock, [&] { return Queue.empty() && NumActiveTasks == 0; });
105}
106
Sam McCall6e2d2a32018-11-26 09:51:50 +0000107void BackgroundIndex::enqueue(const std::vector<std::string> &ChangedFiles) {
108 enqueueTask([this, ChangedFiles] {
109 trace::Span Tracer("BackgroundIndexEnqueue");
110 // We're doing this asynchronously, because we'll read shards here too.
111 // FIXME: read shards here too.
112
113 log("Enqueueing {0} commands for indexing", ChangedFiles.size());
114 SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
115
116 // We shuffle the files because processing them in a random order should
117 // quickly give us good coverage of headers in the project.
118 std::vector<unsigned> Permutation(ChangedFiles.size());
119 std::iota(Permutation.begin(), Permutation.end(), 0);
120 std::mt19937 Generator(std::random_device{}());
121 std::shuffle(Permutation.begin(), Permutation.end(), Generator);
122
123 for (const unsigned I : Permutation)
124 enqueue(ChangedFiles[I]);
125 });
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000126}
127
Sam McCall6e2d2a32018-11-26 09:51:50 +0000128void BackgroundIndex::enqueue(const std::string &File) {
129 ProjectInfo Project;
130 if (auto Cmd = CDB.getCompileCommand(File, &Project)) {
131 auto *Storage = IndexStorageFactory(Project.SourceRoot);
132 enqueueTask(Bind(
133 [this, File, Storage](tooling::CompileCommand Cmd) {
134 Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
135 if (auto Error = index(std::move(Cmd), Storage))
136 log("Indexing {0} failed: {1}", File, std::move(Error));
137 },
138 std::move(*Cmd)));
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000139 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000140}
141
Sam McCall6e2d2a32018-11-26 09:51:50 +0000142void BackgroundIndex::enqueueTask(Task T) {
143 {
144 std::lock_guard<std::mutex> Lock(QueueMu);
145 Queue.push_back(std::move(T));
146 }
147 QueueCV.notify_all();
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000148}
149
Kadir Cetinkaya5a9b92c2018-11-15 10:34:47 +0000150static BackgroundIndex::FileDigest digest(StringRef Content) {
151 return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
152}
153
154static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
155 FileID FID) {
156 bool Invalid = false;
157 StringRef Content = SM.getBufferData(FID, &Invalid);
158 if (Invalid)
159 return None;
160 return digest(Content);
161}
162
Eric Liuad588af2018-11-06 10:55:21 +0000163// Resolves URI to file paths with cache.
164class URIToFileCache {
165public:
166 URIToFileCache(llvm::StringRef HintPath) : HintPath(HintPath) {}
167
168 llvm::StringRef resolve(llvm::StringRef FileURI) {
169 auto I = URIToPathCache.try_emplace(FileURI);
170 if (I.second) {
171 auto U = URI::parse(FileURI);
172 if (!U) {
173 elog("Failed to parse URI {0}: {1}", FileURI, U.takeError());
174 assert(false && "Failed to parse URI");
175 return "";
176 }
177 auto Path = URI::resolve(*U, HintPath);
178 if (!Path) {
179 elog("Failed to resolve URI {0}: {1}", FileURI, Path.takeError());
180 assert(false && "Failed to resolve URI");
181 return "";
182 }
183 I.first->second = *Path;
184 }
185 return I.first->second;
186 }
187
188private:
189 std::string HintPath;
190 llvm::StringMap<std::string> URIToPathCache;
191};
192
193/// Given index results from a TU, only update files in \p FilesToUpdate.
194void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
195 RefSlab Refs,
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000196 const StringMap<FileDigest> &FilesToUpdate,
197 BackgroundIndexStorage *IndexStorage) {
Eric Liuad588af2018-11-06 10:55:21 +0000198 // Partition symbols/references into files.
199 struct File {
200 DenseSet<const Symbol *> Symbols;
201 DenseSet<const Ref *> Refs;
202 };
203 StringMap<File> Files;
204 URIToFileCache URICache(MainFile);
205 for (const auto &Sym : Symbols) {
206 if (Sym.CanonicalDeclaration) {
207 auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
208 if (FilesToUpdate.count(DeclPath) != 0)
209 Files[DeclPath].Symbols.insert(&Sym);
210 }
211 // For symbols with different declaration and definition locations, we store
212 // the full symbol in both the header file and the implementation file, so
213 // that merging can tell the preferred symbols (from canonical headers) from
214 // other symbols (e.g. forward declarations).
215 if (Sym.Definition &&
216 Sym.Definition.FileURI != Sym.CanonicalDeclaration.FileURI) {
217 auto DefPath = URICache.resolve(Sym.Definition.FileURI);
218 if (FilesToUpdate.count(DefPath) != 0)
219 Files[DefPath].Symbols.insert(&Sym);
220 }
221 }
222 DenseMap<const Ref *, SymbolID> RefToIDs;
223 for (const auto &SymRefs : Refs) {
224 for (const auto &R : SymRefs.second) {
225 auto Path = URICache.resolve(R.Location.FileURI);
226 if (FilesToUpdate.count(Path) != 0) {
227 auto &F = Files[Path];
228 RefToIDs[&R] = SymRefs.first;
229 F.Refs.insert(&R);
230 }
231 }
232 }
233
234 // Build and store new slabs for each updated file.
235 for (const auto &F : Files) {
236 StringRef Path = F.first();
237 vlog("Update symbols in {0}", Path);
238 SymbolSlab::Builder Syms;
239 RefSlab::Builder Refs;
240 for (const auto *S : F.second.Symbols)
241 Syms.insert(*S);
242 for (const auto *R : F.second.Refs)
243 Refs.insert(RefToIDs[R], *R);
244
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000245 auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
246 auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
247
248 auto Hash = FilesToUpdate.lookup(Path);
249 // We need to store shards before updating the index, since the latter
250 // consumes slabs.
251 // FIXME: Store Hash in the Shard.
252 if (IndexStorage) {
253 IndexFileOut Shard;
254 Shard.Symbols = SS.get();
255 Shard.Refs = RS.get();
Kadir Cetinkayaca9e5dc2018-11-19 18:06:29 +0000256 Shard.Digest = &Hash;
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000257 if (auto Error = IndexStorage->storeShard(Path, Shard))
258 elog("Failed to write background-index shard for file {0}: {1}", Path,
259 std::move(Error));
260 }
261
Eric Liuad588af2018-11-06 10:55:21 +0000262 std::lock_guard<std::mutex> Lock(DigestsMu);
263 // This can override a newer version that is added in another thread,
264 // if this thread sees the older version but finishes later. This should be
265 // rare in practice.
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000266 IndexedFileDigests[Path] = Hash;
267 IndexedSymbols.update(Path, std::move(SS), std::move(RS));
Eric Liuad588af2018-11-06 10:55:21 +0000268 }
269}
270
271// Creates a filter to not collect index results from files with unchanged
272// digests.
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000273// \p FileDigests contains file digests for the current indexed files, and all
274// changed files will be added to \p FilesToUpdate.
Eric Liuad588af2018-11-06 10:55:21 +0000275decltype(SymbolCollector::Options::FileFilter) createFileFilter(
276 const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
277 llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
278 return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
279 StringRef Path;
280 if (const auto *F = SM.getFileEntryForID(FID))
281 Path = F->getName();
282 if (Path.empty())
283 return false; // Skip invalid files.
284 SmallString<128> AbsPath(Path);
285 if (std::error_code EC =
286 SM.getFileManager().getVirtualFileSystem()->makeAbsolute(AbsPath)) {
287 elog("Warning: could not make absolute file: {0}", EC.message());
288 return false; // Skip files without absolute path.
289 }
290 sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
291 auto Digest = digestFile(SM, FID);
292 if (!Digest)
293 return false;
294 auto D = FileDigests.find(AbsPath);
295 if (D != FileDigests.end() && D->second == Digest)
296 return false; // Skip files that haven't changed.
297
298 FilesToUpdate[AbsPath] = *Digest;
299 return true;
300 };
301}
302
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000303Error BackgroundIndex::index(tooling::CompileCommand Cmd,
304 BackgroundIndexStorage *IndexStorage) {
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000305 trace::Span Tracer("BackgroundIndex");
306 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
Kadir Cetinkayaed18e782018-11-15 10:34:39 +0000307 SmallString<128> AbsolutePath;
308 if (sys::path::is_absolute(Cmd.Filename)) {
309 AbsolutePath = Cmd.Filename;
310 } else {
311 AbsolutePath = Cmd.Directory;
312 sys::path::append(AbsolutePath, Cmd.Filename);
313 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000314
315 auto FS = FSProvider.getFileSystem();
316 auto Buf = FS->getBufferForFile(AbsolutePath);
317 if (!Buf)
318 return errorCodeToError(Buf.getError());
Eric Liuad588af2018-11-06 10:55:21 +0000319 auto Hash = digest(Buf->get()->getBuffer());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000320
Eric Liuad588af2018-11-06 10:55:21 +0000321 // Take a snapshot of the digests to avoid locking for each file in the TU.
322 llvm::StringMap<FileDigest> DigestsSnapshot;
323 {
324 std::lock_guard<std::mutex> Lock(DigestsMu);
325 if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
326 vlog("No need to index {0}, already up to date", AbsolutePath);
327 return Error::success();
328 }
329
330 DigestsSnapshot = IndexedFileDigests;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000331 }
332
333 log("Indexing {0}", Cmd.Filename, toHex(Hash));
334 ParseInputs Inputs;
335 Inputs.FS = std::move(FS);
336 Inputs.FS->setCurrentWorkingDirectory(Cmd.Directory);
337 Inputs.CompileCommand = std::move(Cmd);
338 auto CI = buildCompilerInvocation(Inputs);
339 if (!CI)
Sam McCallc008af62018-10-20 15:30:37 +0000340 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000341 "Couldn't build compiler invocation");
342 IgnoreDiagnostics IgnoreDiags;
343 auto Clang = prepareCompilerInstance(
344 std::move(CI), /*Preamble=*/nullptr, std::move(*Buf),
345 std::make_shared<PCHContainerOperations>(), Inputs.FS, IgnoreDiags);
346 if (!Clang)
Sam McCallc008af62018-10-20 15:30:37 +0000347 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000348 "Couldn't build compiler instance");
349
350 SymbolCollector::Options IndexOpts;
Eric Liuad588af2018-11-06 10:55:21 +0000351 StringMap<FileDigest> FilesToUpdate;
352 IndexOpts.FileFilter = createFileFilter(DigestsSnapshot, FilesToUpdate);
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000353 SymbolSlab Symbols;
354 RefSlab Refs;
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000355 auto Action = createStaticIndexingAction(
356 IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); },
357 [&](RefSlab R) { Refs = std::move(R); });
358
359 // We're going to run clang here, and it could potentially crash.
360 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
361 // but the leaky "recovery" is pretty scary too in a long-running process.
362 // If crashes are a real problem, maybe we should fork a child process.
363
364 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
365 if (!Action->BeginSourceFile(*Clang, Input))
Sam McCallc008af62018-10-20 15:30:37 +0000366 return createStringError(inconvertibleErrorCode(),
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000367 "BeginSourceFile() failed");
368 if (!Action->Execute())
Sam McCallc008af62018-10-20 15:30:37 +0000369 return createStringError(inconvertibleErrorCode(), "Execute() failed");
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000370 Action->EndSourceFile();
371
372 log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename,
Haojian Wu6ece6e72018-10-18 15:33:20 +0000373 Symbols.size(), Refs.numRefs());
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000374 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
Haojian Wu6ece6e72018-10-18 15:33:20 +0000375 SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
Kadir Cetinkaya06553bf2018-11-16 09:03:56 +0000376 update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
377 IndexStorage);
Eric Liuad588af2018-11-06 10:55:21 +0000378 {
379 // Make sure hash for the main file is always updated even if there is no
380 // index data in it.
381 std::lock_guard<std::mutex> Lock(DigestsMu);
382 IndexedFileDigests[AbsolutePath] = Hash;
383 }
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000384
385 // FIXME: this should rebuild once-in-a-while, not after every file.
386 // At that point we should use Dex, too.
387 vlog("Rebuilding automatic index");
Eric Liuc0ac4bb2018-11-22 15:02:05 +0000388 reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge));
Eric Liuad588af2018-11-06 10:55:21 +0000389
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000390 return Error::success();
391}
392
Sam McCall8dc9dbb2018-10-15 13:34:10 +0000393} // namespace clangd
394} // namespace clang