blob: 959b241dac120a8017d258c15a87dc943e79ff7c [file] [log] [blame]
Sam McCallcf3a5852019-09-04 07:35:00 +00001//===--- Preamble.cpp - Reusing expensive parts of the AST ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Preamble.h"
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +010010#include "Compiler.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020011#include "Headers.h"
Kadir Cetinkaya717bef62020-04-23 17:44:51 +020012#include "SourceCode.h"
Sam McCallad97ccf2020-04-28 17:49:17 +020013#include "support/Logger.h"
14#include "support/Trace.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020015#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/LangOptions.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000017#include "clang/Basic/SourceLocation.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020018#include "clang/Basic/TokenKinds.h"
19#include "clang/Frontend/CompilerInvocation.h"
20#include "clang/Frontend/FrontendActions.h"
21#include "clang/Lex/Lexer.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000022#include "clang/Lex/PPCallbacks.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020023#include "clang/Lex/Preprocessor.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000024#include "clang/Lex/PreprocessorOptions.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020025#include "clang/Tooling/CompilationDatabase.h"
26#include "llvm/ADT/ArrayRef.h"
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +020027#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/DenseSet.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020029#include "llvm/ADT/IntrusiveRefCntPtr.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallString.h"
Kadir Cetinkaya717bef62020-04-23 17:44:51 +020032#include "llvm/ADT/StringExtras.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020033#include "llvm/ADT/StringRef.h"
34#include "llvm/ADT/StringSet.h"
35#include "llvm/Support/Error.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/FormatVariadic.h"
38#include "llvm/Support/MemoryBuffer.h"
39#include "llvm/Support/Path.h"
40#include "llvm/Support/VirtualFileSystem.h"
41#include "llvm/Support/raw_ostream.h"
42#include <iterator>
43#include <memory>
44#include <string>
45#include <system_error>
46#include <utility>
47#include <vector>
Sam McCallcf3a5852019-09-04 07:35:00 +000048
49namespace clang {
50namespace clangd {
51namespace {
52
53bool compileCommandsAreEqual(const tooling::CompileCommand &LHS,
54 const tooling::CompileCommand &RHS) {
55 // We don't check for Output, it should not matter to clangd.
56 return LHS.Directory == RHS.Directory && LHS.Filename == RHS.Filename &&
57 llvm::makeArrayRef(LHS.CommandLine).equals(RHS.CommandLine);
58}
59
Sam McCallcf3a5852019-09-04 07:35:00 +000060class CppFilePreambleCallbacks : public PreambleCallbacks {
61public:
62 CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
Haojian Wu7e3c74b2019-09-24 11:14:06 +000063 : File(File), ParsedCallback(ParsedCallback) {}
Sam McCallcf3a5852019-09-04 07:35:00 +000064
65 IncludeStructure takeIncludes() { return std::move(Includes); }
66
Haojian Wu7e3c74b2019-09-24 11:14:06 +000067 MainFileMacros takeMacros() { return std::move(Macros); }
Sam McCallcf3a5852019-09-04 07:35:00 +000068
69 CanonicalIncludes takeCanonicalIncludes() { return std::move(CanonIncludes); }
70
71 void AfterExecute(CompilerInstance &CI) override {
72 if (!ParsedCallback)
73 return;
74 trace::Span Tracer("Running PreambleCallback");
75 ParsedCallback(CI.getASTContext(), CI.getPreprocessorPtr(), CanonIncludes);
76 }
77
78 void BeforeExecute(CompilerInstance &CI) override {
Ilya Biryukov8b767092019-09-09 15:32:51 +000079 CanonIncludes.addSystemHeadersMapping(CI.getLangOpts());
Haojian Wu7e3c74b2019-09-24 11:14:06 +000080 LangOpts = &CI.getLangOpts();
Sam McCallcf3a5852019-09-04 07:35:00 +000081 SourceMgr = &CI.getSourceManager();
82 }
83
84 std::unique_ptr<PPCallbacks> createPPCallbacks() override {
Haojian Wu7e3c74b2019-09-24 11:14:06 +000085 assert(SourceMgr && LangOpts &&
86 "SourceMgr and LangOpts must be set at this point");
87
Sam McCallcf3a5852019-09-04 07:35:00 +000088 return std::make_unique<PPChainedCallbacks>(
89 collectIncludeStructureCallback(*SourceMgr, &Includes),
Kadir Cetinkaya37550392020-03-01 16:05:12 +010090 std::make_unique<CollectMainFileMacros>(*SourceMgr, Macros));
Sam McCallcf3a5852019-09-04 07:35:00 +000091 }
92
93 CommentHandler *getCommentHandler() override {
94 IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes);
95 return IWYUHandler.get();
96 }
97
98private:
99 PathRef File;
100 PreambleParsedCallback ParsedCallback;
101 IncludeStructure Includes;
102 CanonicalIncludes CanonIncludes;
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000103 MainFileMacros Macros;
Sam McCallcf3a5852019-09-04 07:35:00 +0000104 std::unique_ptr<CommentHandler> IWYUHandler = nullptr;
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000105 const clang::LangOptions *LangOpts = nullptr;
106 const SourceManager *SourceMgr = nullptr;
Sam McCallcf3a5852019-09-04 07:35:00 +0000107};
108
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200109/// Gets the includes in the preamble section of the file by running
110/// preprocessor over \p Contents. Returned includes do not contain resolved
111/// paths. \p VFS and \p Cmd is used to build the compiler invocation, which
112/// might stat/read files.
113llvm::Expected<std::vector<Inclusion>>
114scanPreambleIncludes(llvm::StringRef Contents,
115 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
116 const tooling::CompileCommand &Cmd) {
117 // Build and run Preprocessor over the preamble.
118 ParseInputs PI;
119 PI.Contents = Contents.str();
120 PI.FS = std::move(VFS);
121 PI.CompileCommand = Cmd;
122 IgnoringDiagConsumer IgnoreDiags;
123 auto CI = buildCompilerInvocation(PI, IgnoreDiags);
124 if (!CI)
125 return llvm::createStringError(llvm::inconvertibleErrorCode(),
126 "failed to create compiler invocation");
127 CI->getDiagnosticOpts().IgnoreWarnings = true;
128 auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(Contents);
Kadir Cetinkaya34e39eb2020-05-05 17:55:11 +0200129 // This means we're scanning (though not preprocessing) the preamble section
130 // twice. However, it's important to precisely follow the preamble bounds used
131 // elsewhere.
132 auto Bounds =
133 ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0);
134 auto PreambleContents =
135 llvm::MemoryBuffer::getMemBufferCopy(Contents.substr(0, Bounds.Size));
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200136 auto Clang = prepareCompilerInstance(
Kadir Cetinkaya34e39eb2020-05-05 17:55:11 +0200137 std::move(CI), nullptr, std::move(PreambleContents),
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200138 // Provide an empty FS to prevent preprocessor from performing IO. This
139 // also implies missing resolved paths for includes.
140 new llvm::vfs::InMemoryFileSystem, IgnoreDiags);
141 if (Clang->getFrontendOpts().Inputs.empty())
142 return llvm::createStringError(llvm::inconvertibleErrorCode(),
143 "compiler instance had no inputs");
144 // We are only interested in main file includes.
145 Clang->getPreprocessorOpts().SingleFileParseMode = true;
Kadir Cetinkaya34e39eb2020-05-05 17:55:11 +0200146 PreprocessOnlyAction Action;
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200147 if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
148 return llvm::createStringError(llvm::inconvertibleErrorCode(),
149 "failed BeginSourceFile");
150 Preprocessor &PP = Clang->getPreprocessor();
151 IncludeStructure Includes;
152 PP.addPPCallbacks(
153 collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
154 if (llvm::Error Err = Action.Execute())
155 return std::move(Err);
156 Action.EndSourceFile();
157 return Includes.MainFileIncludes;
158}
159
160const char *spellingForIncDirective(tok::PPKeywordKind IncludeDirective) {
161 switch (IncludeDirective) {
162 case tok::pp_include:
163 return "include";
164 case tok::pp_import:
165 return "import";
166 case tok::pp_include_next:
167 return "include_next";
168 default:
169 break;
170 }
171 llvm_unreachable("not an include directive");
172}
Sam McCallcf3a5852019-09-04 07:35:00 +0000173} // namespace
174
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100175PreambleData::PreambleData(const ParseInputs &Inputs,
Sam McCall2cd33e62020-03-04 00:33:29 +0100176 PrecompiledPreamble Preamble,
Sam McCallcf3a5852019-09-04 07:35:00 +0000177 std::vector<Diag> Diags, IncludeStructure Includes,
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000178 MainFileMacros Macros,
Sam McCallcf3a5852019-09-04 07:35:00 +0000179 std::unique_ptr<PreambleFileStatusCache> StatCache,
180 CanonicalIncludes CanonIncludes)
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100181 : Version(Inputs.Version), CompileCommand(Inputs.CompileCommand),
182 Preamble(std::move(Preamble)), Diags(std::move(Diags)),
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000183 Includes(std::move(Includes)), Macros(std::move(Macros)),
Sam McCallcf3a5852019-09-04 07:35:00 +0000184 StatCache(std::move(StatCache)), CanonIncludes(std::move(CanonIncludes)) {
185}
186
187std::shared_ptr<const PreambleData>
Kadir Cetinkaya276a95b2020-03-13 11:52:19 +0100188buildPreamble(PathRef FileName, CompilerInvocation CI,
Sam McCallcf3a5852019-09-04 07:35:00 +0000189 const ParseInputs &Inputs, bool StoreInMemory,
190 PreambleParsedCallback PreambleCallback) {
191 // Note that we don't need to copy the input contents, preamble can live
192 // without those.
193 auto ContentsBuffer =
194 llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
195 auto Bounds =
196 ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
197
Sam McCallcf3a5852019-09-04 07:35:00 +0000198 trace::Span Tracer("BuildPreamble");
199 SPAN_ATTACH(Tracer, "File", FileName);
200 StoreDiags PreambleDiagnostics;
201 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> PreambleDiagsEngine =
202 CompilerInstance::createDiagnostics(&CI.getDiagnosticOpts(),
203 &PreambleDiagnostics, false);
204
205 // Skip function bodies when building the preamble to speed up building
206 // the preamble and make it smaller.
207 assert(!CI.getFrontendOpts().SkipFunctionBodies);
208 CI.getFrontendOpts().SkipFunctionBodies = true;
209 // We don't want to write comment locations into PCH. They are racy and slow
210 // to read back. We rely on dynamic index for the comments instead.
211 CI.getPreprocessorOpts().WriteCommentListToPCH = false;
212
213 CppFilePreambleCallbacks SerializedDeclsCollector(FileName, PreambleCallback);
214 if (Inputs.FS->setCurrentWorkingDirectory(Inputs.CompileCommand.Directory)) {
215 log("Couldn't set working directory when building the preamble.");
216 // We proceed anyway, our lit-tests rely on results for non-existing working
217 // dirs.
218 }
219
220 llvm::SmallString<32> AbsFileName(FileName);
221 Inputs.FS->makeAbsolute(AbsFileName);
222 auto StatCache = std::make_unique<PreambleFileStatusCache>(AbsFileName);
223 auto BuiltPreamble = PrecompiledPreamble::Build(
224 CI, ContentsBuffer.get(), Bounds, *PreambleDiagsEngine,
225 StatCache->getProducingFS(Inputs.FS),
226 std::make_shared<PCHContainerOperations>(), StoreInMemory,
227 SerializedDeclsCollector);
228
229 // When building the AST for the main file, we do want the function
230 // bodies.
231 CI.getFrontendOpts().SkipFunctionBodies = false;
232
233 if (BuiltPreamble) {
Sam McCall2cd33e62020-03-04 00:33:29 +0100234 vlog("Built preamble of size {0} for file {1} version {2}",
235 BuiltPreamble->getSize(), FileName, Inputs.Version);
Sam McCallcf3a5852019-09-04 07:35:00 +0000236 std::vector<Diag> Diags = PreambleDiagnostics.take();
237 return std::make_shared<PreambleData>(
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100238 Inputs, std::move(*BuiltPreamble), std::move(Diags),
Sam McCallcf3a5852019-09-04 07:35:00 +0000239 SerializedDeclsCollector.takeIncludes(),
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000240 SerializedDeclsCollector.takeMacros(), std::move(StatCache),
Sam McCallcf3a5852019-09-04 07:35:00 +0000241 SerializedDeclsCollector.takeCanonicalIncludes());
242 } else {
Adam Czachorowski55b92dc2020-03-19 15:09:28 +0100243 elog("Could not build a preamble for file {0} version {1}", FileName,
Sam McCall2cd33e62020-03-04 00:33:29 +0100244 Inputs.Version);
Sam McCallcf3a5852019-09-04 07:35:00 +0000245 return nullptr;
246 }
247}
248
Kadir Cetinkayac31367e2020-03-15 21:43:00 +0100249bool isPreambleCompatible(const PreambleData &Preamble,
250 const ParseInputs &Inputs, PathRef FileName,
251 const CompilerInvocation &CI) {
252 auto ContentsBuffer =
253 llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
254 auto Bounds =
255 ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
256 return compileCommandsAreEqual(Inputs.CompileCommand,
257 Preamble.CompileCommand) &&
258 Preamble.Preamble.CanReuse(CI, ContentsBuffer.get(), Bounds,
259 Inputs.FS.get());
260}
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200261
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200262void escapeBackslashAndQuotes(llvm::StringRef Text, llvm::raw_ostream &OS) {
263 for (char C : Text) {
264 switch (C) {
265 case '\\':
266 case '"':
267 OS << '\\';
268 break;
269 default:
270 break;
271 }
272 OS << C;
273 }
274}
275
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200276PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
277 const ParseInputs &Modified,
278 const PreambleData &Baseline) {
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200279 assert(llvm::sys::path::is_absolute(FileName) && "relative FileName!");
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200280 // First scan the include directives in Baseline and Modified. These will be
281 // used to figure out newly added directives in Modified. Scanning can fail,
282 // the code just bails out and creates an empty patch in such cases, as:
283 // - If scanning for Baseline fails, no knowledge of existing includes hence
284 // patch will contain all the includes in Modified. Leading to rebuild of
285 // whole preamble, which is terribly slow.
286 // - If scanning for Modified fails, cannot figure out newly added ones so
287 // there's nothing to do but generate an empty patch.
288 auto BaselineIncludes = scanPreambleIncludes(
289 // Contents needs to be null-terminated.
290 Baseline.Preamble.getContents().str(),
291 Baseline.StatCache->getConsumingFS(Modified.FS), Modified.CompileCommand);
292 if (!BaselineIncludes) {
293 elog("Failed to scan includes for baseline of {0}: {1}", FileName,
294 BaselineIncludes.takeError());
295 return {};
296 }
297 auto ModifiedIncludes = scanPreambleIncludes(
298 Modified.Contents, Baseline.StatCache->getConsumingFS(Modified.FS),
299 Modified.CompileCommand);
300 if (!ModifiedIncludes) {
301 elog("Failed to scan includes for modified contents of {0}: {1}", FileName,
302 ModifiedIncludes.takeError());
303 return {};
304 }
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200305 // No patch needed if includes are equal.
306 if (*BaselineIncludes == *ModifiedIncludes)
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200307 return PreamblePatch::unmodified(Baseline);
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200308
309 PreamblePatch PP;
310 // This shouldn't coincide with any real file name.
311 llvm::SmallString<128> PatchName;
312 llvm::sys::path::append(PatchName, llvm::sys::path::parent_path(FileName),
313 "__preamble_patch__.h");
314 PP.PatchFileName = PatchName.str().str();
315
316 // We are only interested in newly added includes, record the ones in Baseline
317 // for exclusion.
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200318 llvm::DenseMap<std::pair<tok::PPKeywordKind, llvm::StringRef>,
319 /*Resolved=*/llvm::StringRef>
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200320 ExistingIncludes;
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200321 for (const auto &Inc : Baseline.Includes.MainFileIncludes)
322 ExistingIncludes[{Inc.Directive, Inc.Written}] = Inc.Resolved;
323 // There might be includes coming from disabled regions, record these for
324 // exclusion too. note that we don't have resolved paths for those.
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200325 for (const auto &Inc : *BaselineIncludes)
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200326 ExistingIncludes.try_emplace({Inc.Directive, Inc.Written});
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200327 // Calculate extra includes that needs to be inserted.
328 llvm::raw_string_ostream Patch(PP.PatchContents);
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200329 // Set default filename for subsequent #line directives
330 Patch << "#line 0 \"";
331 // FileName part of a line directive is subject to backslash escaping, which
332 // might lead to problems on windows especially.
333 escapeBackslashAndQuotes(FileName, Patch);
334 Patch << "\"\n";
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200335 for (auto &Inc : *ModifiedIncludes) {
336 auto It = ExistingIncludes.find({Inc.Directive, Inc.Written});
337 // Include already present in the baseline preamble. Set resolved path and
338 // put into preamble includes.
339 if (It != ExistingIncludes.end()) {
340 Inc.Resolved = It->second.str();
341 PP.PreambleIncludes.push_back(Inc);
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200342 continue;
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200343 }
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200344 // Include is new in the modified preamble. Inject it into the patch and use
345 // #line to set the presumed location to where it is spelled.
346 auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset);
347 Patch << llvm::formatv("#line {0}\n", LineCol.first);
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200348 Patch << llvm::formatv("#{0} {1}\n", spellingForIncDirective(Inc.Directive),
349 Inc.Written);
350 }
351 Patch.flush();
352
353 // FIXME: Handle more directives, e.g. define/undef.
354 return PP;
355}
356
357void PreamblePatch::apply(CompilerInvocation &CI) const {
358 // No need to map an empty file.
359 if (PatchContents.empty())
360 return;
361 auto &PPOpts = CI.getPreprocessorOpts();
362 auto PatchBuffer =
363 // we copy here to ensure contents are still valid if CI outlives the
364 // PreamblePatch.
365 llvm::MemoryBuffer::getMemBufferCopy(PatchContents, PatchFileName);
366 // CI will take care of the lifetime of the buffer.
367 PPOpts.addRemappedFile(PatchFileName, PatchBuffer.release());
368 // The patch will be parsed after loading the preamble ast and before parsing
369 // the main file.
370 PPOpts.Includes.push_back(PatchFileName);
371}
372
Kadir Cetinkayab742eaa2020-04-02 10:53:45 +0200373std::vector<Inclusion> PreamblePatch::preambleIncludes() const {
374 return PreambleIncludes;
375}
376
377PreamblePatch PreamblePatch::unmodified(const PreambleData &Preamble) {
378 PreamblePatch PP;
379 PP.PreambleIncludes = Preamble.Includes.MainFileIncludes;
380 return PP;
381}
382
Sam McCallcf3a5852019-09-04 07:35:00 +0000383} // namespace clangd
384} // namespace clang