blob: 640a2c66b3c91d485f9ce025ddf0c00dfc4fa662 [file] [log] [blame]
Sam McCallcf3a5852019-09-04 07:35:00 +00001//===--- Preamble.cpp - Reusing expensive parts of the AST ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Preamble.h"
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +010010#include "Compiler.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020011#include "Headers.h"
Kadir Cetinkaya717bef62020-04-23 17:44:51 +020012#include "SourceCode.h"
Sam McCallad97ccf2020-04-28 17:49:17 +020013#include "support/Logger.h"
14#include "support/Trace.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020015#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/LangOptions.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000017#include "clang/Basic/SourceLocation.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020018#include "clang/Basic/TokenKinds.h"
19#include "clang/Frontend/CompilerInvocation.h"
20#include "clang/Frontend/FrontendActions.h"
21#include "clang/Lex/Lexer.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000022#include "clang/Lex/PPCallbacks.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020023#include "clang/Lex/Preprocessor.h"
Sam McCallcf3a5852019-09-04 07:35:00 +000024#include "clang/Lex/PreprocessorOptions.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020025#include "clang/Tooling/CompilationDatabase.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/IntrusiveRefCntPtr.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallString.h"
Kadir Cetinkaya717bef62020-04-23 17:44:51 +020030#include "llvm/ADT/StringExtras.h"
Kadir Cetinkaya2214b902020-04-02 10:53:23 +020031#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSet.h"
33#include "llvm/Support/Error.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/FormatVariadic.h"
36#include "llvm/Support/MemoryBuffer.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/VirtualFileSystem.h"
39#include "llvm/Support/raw_ostream.h"
40#include <iterator>
41#include <memory>
42#include <string>
43#include <system_error>
44#include <utility>
45#include <vector>
Sam McCallcf3a5852019-09-04 07:35:00 +000046
47namespace clang {
48namespace clangd {
49namespace {
50
51bool compileCommandsAreEqual(const tooling::CompileCommand &LHS,
52 const tooling::CompileCommand &RHS) {
53 // We don't check for Output, it should not matter to clangd.
54 return LHS.Directory == RHS.Directory && LHS.Filename == RHS.Filename &&
55 llvm::makeArrayRef(LHS.CommandLine).equals(RHS.CommandLine);
56}
57
Sam McCallcf3a5852019-09-04 07:35:00 +000058class CppFilePreambleCallbacks : public PreambleCallbacks {
59public:
60 CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
Haojian Wu7e3c74b2019-09-24 11:14:06 +000061 : File(File), ParsedCallback(ParsedCallback) {}
Sam McCallcf3a5852019-09-04 07:35:00 +000062
63 IncludeStructure takeIncludes() { return std::move(Includes); }
64
Haojian Wu7e3c74b2019-09-24 11:14:06 +000065 MainFileMacros takeMacros() { return std::move(Macros); }
Sam McCallcf3a5852019-09-04 07:35:00 +000066
67 CanonicalIncludes takeCanonicalIncludes() { return std::move(CanonIncludes); }
68
69 void AfterExecute(CompilerInstance &CI) override {
70 if (!ParsedCallback)
71 return;
72 trace::Span Tracer("Running PreambleCallback");
73 ParsedCallback(CI.getASTContext(), CI.getPreprocessorPtr(), CanonIncludes);
74 }
75
76 void BeforeExecute(CompilerInstance &CI) override {
Ilya Biryukov8b767092019-09-09 15:32:51 +000077 CanonIncludes.addSystemHeadersMapping(CI.getLangOpts());
Haojian Wu7e3c74b2019-09-24 11:14:06 +000078 LangOpts = &CI.getLangOpts();
Sam McCallcf3a5852019-09-04 07:35:00 +000079 SourceMgr = &CI.getSourceManager();
80 }
81
82 std::unique_ptr<PPCallbacks> createPPCallbacks() override {
Haojian Wu7e3c74b2019-09-24 11:14:06 +000083 assert(SourceMgr && LangOpts &&
84 "SourceMgr and LangOpts must be set at this point");
85
Sam McCallcf3a5852019-09-04 07:35:00 +000086 return std::make_unique<PPChainedCallbacks>(
87 collectIncludeStructureCallback(*SourceMgr, &Includes),
Kadir Cetinkaya37550392020-03-01 16:05:12 +010088 std::make_unique<CollectMainFileMacros>(*SourceMgr, Macros));
Sam McCallcf3a5852019-09-04 07:35:00 +000089 }
90
91 CommentHandler *getCommentHandler() override {
92 IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes);
93 return IWYUHandler.get();
94 }
95
96private:
97 PathRef File;
98 PreambleParsedCallback ParsedCallback;
99 IncludeStructure Includes;
100 CanonicalIncludes CanonIncludes;
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000101 MainFileMacros Macros;
Sam McCallcf3a5852019-09-04 07:35:00 +0000102 std::unique_ptr<CommentHandler> IWYUHandler = nullptr;
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000103 const clang::LangOptions *LangOpts = nullptr;
104 const SourceManager *SourceMgr = nullptr;
Sam McCallcf3a5852019-09-04 07:35:00 +0000105};
106
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200107// Runs preprocessor over preamble section.
108class PreambleOnlyAction : public PreprocessorFrontendAction {
109protected:
110 void ExecuteAction() override {
111 Preprocessor &PP = getCompilerInstance().getPreprocessor();
112 auto &SM = PP.getSourceManager();
113 PP.EnterMainSourceFile();
114 auto Bounds = ComputePreambleBounds(getCompilerInstance().getLangOpts(),
115 SM.getBuffer(SM.getMainFileID()), 0);
116 Token Tok;
117 do {
118 PP.Lex(Tok);
119 assert(SM.isInMainFile(Tok.getLocation()));
120 } while (Tok.isNot(tok::eof) &&
121 SM.getDecomposedLoc(Tok.getLocation()).second < Bounds.Size);
122 }
123};
124
125/// Gets the includes in the preamble section of the file by running
126/// preprocessor over \p Contents. Returned includes do not contain resolved
127/// paths. \p VFS and \p Cmd is used to build the compiler invocation, which
128/// might stat/read files.
129llvm::Expected<std::vector<Inclusion>>
130scanPreambleIncludes(llvm::StringRef Contents,
131 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
132 const tooling::CompileCommand &Cmd) {
133 // Build and run Preprocessor over the preamble.
134 ParseInputs PI;
135 PI.Contents = Contents.str();
136 PI.FS = std::move(VFS);
137 PI.CompileCommand = Cmd;
138 IgnoringDiagConsumer IgnoreDiags;
139 auto CI = buildCompilerInvocation(PI, IgnoreDiags);
140 if (!CI)
141 return llvm::createStringError(llvm::inconvertibleErrorCode(),
142 "failed to create compiler invocation");
143 CI->getDiagnosticOpts().IgnoreWarnings = true;
144 auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(Contents);
145 auto Clang = prepareCompilerInstance(
146 std::move(CI), nullptr, std::move(ContentsBuffer),
147 // Provide an empty FS to prevent preprocessor from performing IO. This
148 // also implies missing resolved paths for includes.
149 new llvm::vfs::InMemoryFileSystem, IgnoreDiags);
150 if (Clang->getFrontendOpts().Inputs.empty())
151 return llvm::createStringError(llvm::inconvertibleErrorCode(),
152 "compiler instance had no inputs");
153 // We are only interested in main file includes.
154 Clang->getPreprocessorOpts().SingleFileParseMode = true;
155 PreambleOnlyAction Action;
156 if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
157 return llvm::createStringError(llvm::inconvertibleErrorCode(),
158 "failed BeginSourceFile");
159 Preprocessor &PP = Clang->getPreprocessor();
160 IncludeStructure Includes;
161 PP.addPPCallbacks(
162 collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
163 if (llvm::Error Err = Action.Execute())
164 return std::move(Err);
165 Action.EndSourceFile();
166 return Includes.MainFileIncludes;
167}
168
169const char *spellingForIncDirective(tok::PPKeywordKind IncludeDirective) {
170 switch (IncludeDirective) {
171 case tok::pp_include:
172 return "include";
173 case tok::pp_import:
174 return "import";
175 case tok::pp_include_next:
176 return "include_next";
177 default:
178 break;
179 }
180 llvm_unreachable("not an include directive");
181}
Sam McCallcf3a5852019-09-04 07:35:00 +0000182} // namespace
183
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100184PreambleData::PreambleData(const ParseInputs &Inputs,
Sam McCall2cd33e62020-03-04 00:33:29 +0100185 PrecompiledPreamble Preamble,
Sam McCallcf3a5852019-09-04 07:35:00 +0000186 std::vector<Diag> Diags, IncludeStructure Includes,
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000187 MainFileMacros Macros,
Sam McCallcf3a5852019-09-04 07:35:00 +0000188 std::unique_ptr<PreambleFileStatusCache> StatCache,
189 CanonicalIncludes CanonIncludes)
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100190 : Version(Inputs.Version), CompileCommand(Inputs.CompileCommand),
191 Preamble(std::move(Preamble)), Diags(std::move(Diags)),
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000192 Includes(std::move(Includes)), Macros(std::move(Macros)),
Sam McCallcf3a5852019-09-04 07:35:00 +0000193 StatCache(std::move(StatCache)), CanonIncludes(std::move(CanonIncludes)) {
194}
195
196std::shared_ptr<const PreambleData>
Kadir Cetinkaya276a95b2020-03-13 11:52:19 +0100197buildPreamble(PathRef FileName, CompilerInvocation CI,
Sam McCallcf3a5852019-09-04 07:35:00 +0000198 const ParseInputs &Inputs, bool StoreInMemory,
199 PreambleParsedCallback PreambleCallback) {
200 // Note that we don't need to copy the input contents, preamble can live
201 // without those.
202 auto ContentsBuffer =
203 llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
204 auto Bounds =
205 ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
206
Sam McCallcf3a5852019-09-04 07:35:00 +0000207 trace::Span Tracer("BuildPreamble");
208 SPAN_ATTACH(Tracer, "File", FileName);
209 StoreDiags PreambleDiagnostics;
210 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> PreambleDiagsEngine =
211 CompilerInstance::createDiagnostics(&CI.getDiagnosticOpts(),
212 &PreambleDiagnostics, false);
213
214 // Skip function bodies when building the preamble to speed up building
215 // the preamble and make it smaller.
216 assert(!CI.getFrontendOpts().SkipFunctionBodies);
217 CI.getFrontendOpts().SkipFunctionBodies = true;
218 // We don't want to write comment locations into PCH. They are racy and slow
219 // to read back. We rely on dynamic index for the comments instead.
220 CI.getPreprocessorOpts().WriteCommentListToPCH = false;
221
Haojian Wu72439b62020-03-31 16:09:49 +0200222 // Recovery expression currently only works for C++.
223 if (CI.getLangOpts()->CPlusPlus)
224 CI.getLangOpts()->RecoveryAST = Inputs.Opts.BuildRecoveryAST;
225
Sam McCallcf3a5852019-09-04 07:35:00 +0000226 CppFilePreambleCallbacks SerializedDeclsCollector(FileName, PreambleCallback);
227 if (Inputs.FS->setCurrentWorkingDirectory(Inputs.CompileCommand.Directory)) {
228 log("Couldn't set working directory when building the preamble.");
229 // We proceed anyway, our lit-tests rely on results for non-existing working
230 // dirs.
231 }
232
233 llvm::SmallString<32> AbsFileName(FileName);
234 Inputs.FS->makeAbsolute(AbsFileName);
235 auto StatCache = std::make_unique<PreambleFileStatusCache>(AbsFileName);
236 auto BuiltPreamble = PrecompiledPreamble::Build(
237 CI, ContentsBuffer.get(), Bounds, *PreambleDiagsEngine,
238 StatCache->getProducingFS(Inputs.FS),
239 std::make_shared<PCHContainerOperations>(), StoreInMemory,
240 SerializedDeclsCollector);
241
242 // When building the AST for the main file, we do want the function
243 // bodies.
244 CI.getFrontendOpts().SkipFunctionBodies = false;
245
246 if (BuiltPreamble) {
Sam McCall2cd33e62020-03-04 00:33:29 +0100247 vlog("Built preamble of size {0} for file {1} version {2}",
248 BuiltPreamble->getSize(), FileName, Inputs.Version);
Sam McCallcf3a5852019-09-04 07:35:00 +0000249 std::vector<Diag> Diags = PreambleDiagnostics.take();
250 return std::make_shared<PreambleData>(
Kadir Cetinkayaecd3e672020-03-11 16:34:01 +0100251 Inputs, std::move(*BuiltPreamble), std::move(Diags),
Sam McCallcf3a5852019-09-04 07:35:00 +0000252 SerializedDeclsCollector.takeIncludes(),
Haojian Wu7e3c74b2019-09-24 11:14:06 +0000253 SerializedDeclsCollector.takeMacros(), std::move(StatCache),
Sam McCallcf3a5852019-09-04 07:35:00 +0000254 SerializedDeclsCollector.takeCanonicalIncludes());
255 } else {
Adam Czachorowski55b92dc2020-03-19 15:09:28 +0100256 elog("Could not build a preamble for file {0} version {1}", FileName,
Sam McCall2cd33e62020-03-04 00:33:29 +0100257 Inputs.Version);
Sam McCallcf3a5852019-09-04 07:35:00 +0000258 return nullptr;
259 }
260}
261
Kadir Cetinkayac31367e2020-03-15 21:43:00 +0100262bool isPreambleCompatible(const PreambleData &Preamble,
263 const ParseInputs &Inputs, PathRef FileName,
264 const CompilerInvocation &CI) {
265 auto ContentsBuffer =
266 llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
267 auto Bounds =
268 ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
269 return compileCommandsAreEqual(Inputs.CompileCommand,
270 Preamble.CompileCommand) &&
271 Preamble.Preamble.CanReuse(CI, ContentsBuffer.get(), Bounds,
272 Inputs.FS.get());
273}
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200274
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200275void escapeBackslashAndQuotes(llvm::StringRef Text, llvm::raw_ostream &OS) {
276 for (char C : Text) {
277 switch (C) {
278 case '\\':
279 case '"':
280 OS << '\\';
281 break;
282 default:
283 break;
284 }
285 OS << C;
286 }
287}
288
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200289PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
290 const ParseInputs &Modified,
291 const PreambleData &Baseline) {
292 // First scan the include directives in Baseline and Modified. These will be
293 // used to figure out newly added directives in Modified. Scanning can fail,
294 // the code just bails out and creates an empty patch in such cases, as:
295 // - If scanning for Baseline fails, no knowledge of existing includes hence
296 // patch will contain all the includes in Modified. Leading to rebuild of
297 // whole preamble, which is terribly slow.
298 // - If scanning for Modified fails, cannot figure out newly added ones so
299 // there's nothing to do but generate an empty patch.
300 auto BaselineIncludes = scanPreambleIncludes(
301 // Contents needs to be null-terminated.
302 Baseline.Preamble.getContents().str(),
303 Baseline.StatCache->getConsumingFS(Modified.FS), Modified.CompileCommand);
304 if (!BaselineIncludes) {
305 elog("Failed to scan includes for baseline of {0}: {1}", FileName,
306 BaselineIncludes.takeError());
307 return {};
308 }
309 auto ModifiedIncludes = scanPreambleIncludes(
310 Modified.Contents, Baseline.StatCache->getConsumingFS(Modified.FS),
311 Modified.CompileCommand);
312 if (!ModifiedIncludes) {
313 elog("Failed to scan includes for modified contents of {0}: {1}", FileName,
314 ModifiedIncludes.takeError());
315 return {};
316 }
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200317 // No patch needed if includes are equal.
318 if (*BaselineIncludes == *ModifiedIncludes)
319 return {};
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200320
321 PreamblePatch PP;
322 // This shouldn't coincide with any real file name.
323 llvm::SmallString<128> PatchName;
324 llvm::sys::path::append(PatchName, llvm::sys::path::parent_path(FileName),
325 "__preamble_patch__.h");
326 PP.PatchFileName = PatchName.str().str();
327
328 // We are only interested in newly added includes, record the ones in Baseline
329 // for exclusion.
330 llvm::DenseSet<std::pair<tok::PPKeywordKind, llvm::StringRef>>
331 ExistingIncludes;
332 for (const auto &Inc : *BaselineIncludes)
333 ExistingIncludes.insert({Inc.Directive, Inc.Written});
334 // Calculate extra includes that needs to be inserted.
335 llvm::raw_string_ostream Patch(PP.PatchContents);
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200336 // Set default filename for subsequent #line directives
337 Patch << "#line 0 \"";
338 // FileName part of a line directive is subject to backslash escaping, which
339 // might lead to problems on windows especially.
340 escapeBackslashAndQuotes(FileName, Patch);
341 Patch << "\"\n";
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200342 for (const auto &Inc : *ModifiedIncludes) {
343 if (ExistingIncludes.count({Inc.Directive, Inc.Written}))
344 continue;
Kadir Cetinkaya717bef62020-04-23 17:44:51 +0200345 // Include is new in the modified preamble. Inject it into the patch and use
346 // #line to set the presumed location to where it is spelled.
347 auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset);
348 Patch << llvm::formatv("#line {0}\n", LineCol.first);
Kadir Cetinkaya2214b902020-04-02 10:53:23 +0200349 Patch << llvm::formatv("#{0} {1}\n", spellingForIncDirective(Inc.Directive),
350 Inc.Written);
351 }
352 Patch.flush();
353
354 // FIXME: Handle more directives, e.g. define/undef.
355 return PP;
356}
357
358void PreamblePatch::apply(CompilerInvocation &CI) const {
359 // No need to map an empty file.
360 if (PatchContents.empty())
361 return;
362 auto &PPOpts = CI.getPreprocessorOpts();
363 auto PatchBuffer =
364 // we copy here to ensure contents are still valid if CI outlives the
365 // PreamblePatch.
366 llvm::MemoryBuffer::getMemBufferCopy(PatchContents, PatchFileName);
367 // CI will take care of the lifetime of the buffer.
368 PPOpts.addRemappedFile(PatchFileName, PatchBuffer.release());
369 // The patch will be parsed after loading the preamble ast and before parsing
370 // the main file.
371 PPOpts.Includes.push_back(PatchFileName);
372}
373
Sam McCallcf3a5852019-09-04 07:35:00 +0000374} // namespace clangd
375} // namespace clang