blob: 4bc7004a0da22bcd6d3555d1d385753f750087ba [file] [log] [blame]
Peter Collingbourne1398a322016-12-16 00:26:30 +00001//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass prepares a module containing type metadata for ThinLTO by splitting
11// it into regular and thin LTO parts if possible, and writing both parts to
12// a multi-module bitcode file. Modules that do not contain type metadata are
13// written unmodified as a single module.
14//
15//===----------------------------------------------------------------------===//
16
Peter Collingbourne002c2d52017-02-14 03:42:38 +000017#include "llvm/Analysis/BasicAliasAnalysis.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000018#include "llvm/Analysis/ModuleSummaryAnalysis.h"
19#include "llvm/Analysis/TypeMetadataUtils.h"
20#include "llvm/Bitcode/BitcodeWriter.h"
21#include "llvm/IR/Constants.h"
Peter Collingbourne28ffd322017-02-08 20:44:00 +000022#include "llvm/IR/DebugInfo.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000023#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/Module.h"
25#include "llvm/IR/PassManager.h"
26#include "llvm/Pass.h"
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +000027#include "llvm/Support/FileSystem.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000028#include "llvm/Support/ScopedPrinter.h"
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +000029#include "llvm/Support/raw_ostream.h"
30#include "llvm/Transforms/IPO.h"
Peter Collingbourne002c2d52017-02-14 03:42:38 +000031#include "llvm/Transforms/IPO/FunctionAttrs.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000032#include "llvm/Transforms/Utils/Cloning.h"
33using namespace llvm;
34
35namespace {
36
Evgeniy Stepanovc4405722017-03-20 18:45:34 +000037// Produce a unique identifier for this module by taking the MD5 sum of the
38// names of the module's strong external symbols. This identifier is
39// normally guaranteed to be unique, or the program would fail to link due to
40// multiply defined symbols.
41//
42// If the module has no strong external symbols (such a module may still have a
43// semantic effect if it performs global initialization), we cannot produce a
44// unique identifier for this module, so we return the empty string, which
45// causes the entire module to be written as a regular LTO module.
46std::string getModuleId(Module *M) {
47 MD5 Md5;
48 bool ExportsSymbols = false;
Peter Collingbourne6b193962017-03-30 23:43:08 +000049 for (auto &GV : M->global_values()) {
Evgeniy Stepanovc4405722017-03-20 18:45:34 +000050 if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
51 !GV.hasExternalLinkage())
Peter Collingbourne6b193962017-03-30 23:43:08 +000052 continue;
Evgeniy Stepanovc4405722017-03-20 18:45:34 +000053 ExportsSymbols = true;
54 Md5.update(GV.getName());
55 Md5.update(ArrayRef<uint8_t>{0});
Peter Collingbourne6b193962017-03-30 23:43:08 +000056 }
Evgeniy Stepanovc4405722017-03-20 18:45:34 +000057
58 if (!ExportsSymbols)
59 return "";
60
61 MD5::MD5Result R;
62 Md5.final(R);
63
64 SmallString<32> Str;
65 MD5::stringifyResult(R, Str);
66 return ("$" + Str).str();
67}
68
Peter Collingbourne1398a322016-12-16 00:26:30 +000069// Promote each local-linkage entity defined by ExportM and used by ImportM by
70// changing visibility and appending the given ModuleId.
71void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
Peter Collingbourne6b193962017-03-30 23:43:08 +000072 for (auto &ExportGV : ExportM.global_values()) {
Peter Collingbourne1398a322016-12-16 00:26:30 +000073 if (!ExportGV.hasLocalLinkage())
Peter Collingbourne6b193962017-03-30 23:43:08 +000074 continue;
Peter Collingbourne1398a322016-12-16 00:26:30 +000075
76 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
77 if (!ImportGV || ImportGV->use_empty())
Peter Collingbourne6b193962017-03-30 23:43:08 +000078 continue;
Peter Collingbourne1398a322016-12-16 00:26:30 +000079
80 std::string NewName = (ExportGV.getName() + ModuleId).str();
81
82 ExportGV.setName(NewName);
83 ExportGV.setLinkage(GlobalValue::ExternalLinkage);
84 ExportGV.setVisibility(GlobalValue::HiddenVisibility);
85
86 ImportGV->setName(NewName);
87 ImportGV->setVisibility(GlobalValue::HiddenVisibility);
Peter Collingbourne6b193962017-03-30 23:43:08 +000088 }
Peter Collingbourne1398a322016-12-16 00:26:30 +000089}
90
91// Promote all internal (i.e. distinct) type ids used by the module by replacing
92// them with external type ids formed using the module id.
93//
94// Note that this needs to be done before we clone the module because each clone
95// will receive its own set of distinct metadata nodes.
96void promoteTypeIds(Module &M, StringRef ModuleId) {
97 DenseMap<Metadata *, Metadata *> LocalToGlobal;
98 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
99 Metadata *MD =
100 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
101
102 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
103 Metadata *&GlobalMD = LocalToGlobal[MD];
104 if (!GlobalMD) {
105 std::string NewName =
106 (to_string(LocalToGlobal.size()) + ModuleId).str();
107 GlobalMD = MDString::get(M.getContext(), NewName);
108 }
109
110 CI->setArgOperand(ArgNo,
111 MetadataAsValue::get(M.getContext(), GlobalMD));
112 }
113 };
114
115 if (Function *TypeTestFunc =
116 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
117 for (const Use &U : TypeTestFunc->uses()) {
118 auto CI = cast<CallInst>(U.getUser());
119 ExternalizeTypeId(CI, 1);
120 }
121 }
122
123 if (Function *TypeCheckedLoadFunc =
124 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
125 for (const Use &U : TypeCheckedLoadFunc->uses()) {
126 auto CI = cast<CallInst>(U.getUser());
127 ExternalizeTypeId(CI, 2);
128 }
129 }
130
131 for (GlobalObject &GO : M.global_objects()) {
132 SmallVector<MDNode *, 1> MDs;
133 GO.getMetadata(LLVMContext::MD_type, MDs);
134
135 GO.eraseMetadata(LLVMContext::MD_type);
136 for (auto MD : MDs) {
137 auto I = LocalToGlobal.find(MD->getOperand(1));
138 if (I == LocalToGlobal.end()) {
139 GO.addMetadata(LLVMContext::MD_type, *MD);
140 continue;
141 }
142 GO.addMetadata(
143 LLVMContext::MD_type,
144 *MDNode::get(M.getContext(),
145 ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
146 }
147 }
148}
149
150// Drop unused globals, and drop type information from function declarations.
151// FIXME: If we made functions typeless then there would be no need to do this.
152void simplifyExternals(Module &M) {
153 FunctionType *EmptyFT =
154 FunctionType::get(Type::getVoidTy(M.getContext()), false);
155
156 for (auto I = M.begin(), E = M.end(); I != E;) {
157 Function &F = *I++;
158 if (F.isDeclaration() && F.use_empty()) {
159 F.eraseFromParent();
160 continue;
161 }
162
163 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
164 continue;
165
166 Function *NewF =
167 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
168 NewF->setVisibility(F.getVisibility());
169 NewF->takeName(&F);
170 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
171 F.eraseFromParent();
172 }
173
174 for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
175 GlobalVariable &GV = *I++;
176 if (GV.isDeclaration() && GV.use_empty()) {
177 GV.eraseFromParent();
178 continue;
179 }
180 }
181}
182
183void filterModule(
Benjamin Kramer061f4a52017-01-13 14:39:03 +0000184 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000185 for (Function &F : *M) {
186 if (ShouldKeepDefinition(&F))
187 continue;
188
189 F.deleteBody();
Peter Collingbourne20a00932017-01-18 20:03:02 +0000190 F.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000191 F.clearMetadata();
192 }
193
194 for (GlobalVariable &GV : M->globals()) {
195 if (ShouldKeepDefinition(&GV))
196 continue;
197
198 GV.setInitializer(nullptr);
199 GV.setLinkage(GlobalValue::ExternalLinkage);
Peter Collingbourne20a00932017-01-18 20:03:02 +0000200 GV.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000201 GV.clearMetadata();
202 }
203
204 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
205 I != E;) {
206 GlobalAlias *GA = &*I++;
207 if (ShouldKeepDefinition(GA))
208 continue;
209
210 GlobalObject *GO;
211 if (I->getValueType()->isFunctionTy())
212 GO = Function::Create(cast<FunctionType>(GA->getValueType()),
213 GlobalValue::ExternalLinkage, "", M);
214 else
215 GO = new GlobalVariable(
216 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
217 (Constant *)nullptr, "", (GlobalVariable *)nullptr,
218 GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
219 GO->takeName(GA);
220 GA->replaceAllUsesWith(GO);
221 GA->eraseFromParent();
222 }
223}
224
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000225void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
226 if (auto *F = dyn_cast<Function>(C))
227 return Fn(F);
Peter Collingbourne3baa72a2017-03-02 23:10:17 +0000228 if (isa<GlobalValue>(C))
229 return;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000230 for (Value *Op : C->operands())
231 forEachVirtualFunction(cast<Constant>(Op), Fn);
232}
233
Peter Collingbourne1398a322016-12-16 00:26:30 +0000234// If it's possible to split M into regular and thin LTO parts, do so and write
235// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
236// regular LTO bitcode file to OS.
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000237void splitAndWriteThinLTOBitcode(
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000238 raw_ostream &OS, raw_ostream *ThinLinkOS,
239 function_ref<AAResults &(Function &)> AARGetter, Module &M) {
Evgeniy Stepanovc4405722017-03-20 18:45:34 +0000240 std::string ModuleId = getModuleId(&M);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000241 if (ModuleId.empty()) {
242 // We couldn't generate a module ID for this module, just write it out as a
243 // regular LTO module.
244 WriteBitcodeToFile(&M, OS);
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000245 if (ThinLinkOS)
246 // We don't have a ThinLTO part, but still write the module to the
247 // ThinLinkOS if requested so that the expected output file is produced.
248 WriteBitcodeToFile(&M, *ThinLinkOS);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000249 return;
250 }
251
252 promoteTypeIds(M, ModuleId);
253
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000254 // Returns whether a global has attached type metadata. Such globals may
255 // participate in CFI or whole-program devirtualization, so they need to
256 // appear in the merged module instead of the thin LTO module.
257 auto HasTypeMetadata = [&](const GlobalObject *GO) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000258 SmallVector<MDNode *, 1> MDs;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000259 GO->getMetadata(LLVMContext::MD_type, MDs);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000260 return !MDs.empty();
261 };
262
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000263 // Collect the set of virtual functions that are eligible for virtual constant
264 // propagation. Each eligible function must not access memory, must return
265 // an integer of width <=64 bits, must take at least one argument, must not
266 // use its first argument (assumed to be "this") and all arguments other than
267 // the first one must be of <=64 bit integer type.
268 //
269 // Note that we test whether this copy of the function is readnone, rather
270 // than testing function attributes, which must hold for any copy of the
271 // function, even a less optimized version substituted at link time. This is
272 // sound because the virtual constant propagation optimizations effectively
273 // inline all implementations of the virtual function into each call site,
274 // rather than using function attributes to perform local optimization.
275 std::set<const Function *> EligibleVirtualFns;
276 for (GlobalVariable &GV : M.globals())
277 if (HasTypeMetadata(&GV))
278 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
279 auto *RT = dyn_cast<IntegerType>(F->getReturnType());
280 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
281 !F->arg_begin()->use_empty())
282 return;
283 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
284 auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
285 if (!ArgT || ArgT->getBitWidth() > 64)
286 return;
287 }
288 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
289 EligibleVirtualFns.insert(F);
290 });
291
Peter Collingbourne1398a322016-12-16 00:26:30 +0000292 ValueToValueMapTy VMap;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000293 std::unique_ptr<Module> MergedM(
294 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
295 if (auto *F = dyn_cast<Function>(GV))
296 return EligibleVirtualFns.count(F);
297 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
298 return HasTypeMetadata(GVar);
299 return false;
300 }));
Peter Collingbourne28ffd322017-02-08 20:44:00 +0000301 StripDebugInfo(*MergedM);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000302
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000303 for (Function &F : *MergedM)
304 if (!F.isDeclaration()) {
305 // Reset the linkage of all functions eligible for virtual constant
306 // propagation. The canonical definitions live in the thin LTO module so
307 // that they can be imported.
308 F.setLinkage(GlobalValue::AvailableExternallyLinkage);
309 F.setComdat(nullptr);
310 }
311
312 // Remove all globals with type metadata, as well as aliases pointing to them,
313 // from the thin LTO module.
314 filterModule(&M, [&](const GlobalValue *GV) {
315 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
316 return !HasTypeMetadata(GVar);
317 return true;
318 });
Peter Collingbourne1398a322016-12-16 00:26:30 +0000319
320 promoteInternals(*MergedM, M, ModuleId);
321 promoteInternals(M, *MergedM, ModuleId);
322
323 simplifyExternals(*MergedM);
324
Peter Collingbourne1398a322016-12-16 00:26:30 +0000325
326 // FIXME: Try to re-use BSI and PFI from the original module here.
327 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000328
329 SmallVector<char, 0> Buffer;
330
331 BitcodeWriter W(Buffer);
332 // Save the module hash produced for the full bitcode, which will
333 // be used in the backends, and use that in the minimized bitcode
334 // produced for the full link.
335 ModuleHash ModHash = {{0}};
Peter Collingbourne1398a322016-12-16 00:26:30 +0000336 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000337 /*GenerateHash=*/true, &ModHash);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000338 W.writeModule(MergedM.get());
Peter Collingbourne1398a322016-12-16 00:26:30 +0000339 OS << Buffer;
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000340
341 // If a minimized bitcode module was requested for the thin link,
342 // strip the debug info (the merged module was already stripped above)
343 // and write it to the given OS.
344 if (ThinLinkOS) {
345 Buffer.clear();
346 BitcodeWriter W2(Buffer);
347 StripDebugInfo(M);
348 W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
349 /*GenerateHash=*/false, &ModHash);
350 W2.writeModule(MergedM.get());
351 *ThinLinkOS << Buffer;
352 }
Peter Collingbourne1398a322016-12-16 00:26:30 +0000353}
354
355// Returns whether this module needs to be split because it uses type metadata.
356bool requiresSplit(Module &M) {
357 SmallVector<MDNode *, 1> MDs;
358 for (auto &GO : M.global_objects()) {
359 GO.getMetadata(LLVMContext::MD_type, MDs);
360 if (!MDs.empty())
361 return true;
362 }
363
364 return false;
365}
366
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000367void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000368 function_ref<AAResults &(Function &)> AARGetter,
369 Module &M, const ModuleSummaryIndex *Index) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000370 // See if this module has any type metadata. If so, we need to split it.
371 if (requiresSplit(M))
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000372 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000373
374 // Otherwise we can just write it out as a regular module.
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000375
376 // Save the module hash produced for the full bitcode, which will
377 // be used in the backends, and use that in the minimized bitcode
378 // produced for the full link.
379 ModuleHash ModHash = {{0}};
Peter Collingbourne1398a322016-12-16 00:26:30 +0000380 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000381 /*GenerateHash=*/true, &ModHash);
382 // If a minimized bitcode module was requested for the thin link,
383 // strip the debug info and write it to the given OS.
384 if (ThinLinkOS) {
385 StripDebugInfo(M);
386 WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
387 Index,
388 /*GenerateHash=*/false, &ModHash);
389 }
Peter Collingbourne1398a322016-12-16 00:26:30 +0000390}
391
392class WriteThinLTOBitcode : public ModulePass {
393 raw_ostream &OS; // raw_ostream to print on
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000394 // The output stream on which to emit a minimized module for use
395 // just in the thin link, if requested.
396 raw_ostream *ThinLinkOS;
Peter Collingbourne1398a322016-12-16 00:26:30 +0000397
398public:
399 static char ID; // Pass identification, replacement for typeid
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000400 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000401 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
402 }
403
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000404 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
405 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000406 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
407 }
408
409 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
410
411 bool runOnModule(Module &M) override {
412 const ModuleSummaryIndex *Index =
413 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000414 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000415 return true;
416 }
417 void getAnalysisUsage(AnalysisUsage &AU) const override {
418 AU.setPreservesAll();
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000419 AU.addRequired<AssumptionCacheTracker>();
Peter Collingbourne1398a322016-12-16 00:26:30 +0000420 AU.addRequired<ModuleSummaryIndexWrapperPass>();
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000421 AU.addRequired<TargetLibraryInfoWrapperPass>();
Peter Collingbourne1398a322016-12-16 00:26:30 +0000422 }
423};
424} // anonymous namespace
425
426char WriteThinLTOBitcode::ID = 0;
427INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
428 "Write ThinLTO Bitcode", false, true)
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000429INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Peter Collingbourne1398a322016-12-16 00:26:30 +0000430INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000431INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
Peter Collingbourne1398a322016-12-16 00:26:30 +0000432INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
433 "Write ThinLTO Bitcode", false, true)
434
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000435ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
436 raw_ostream *ThinLinkOS) {
437 return new WriteThinLTOBitcode(Str, ThinLinkOS);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000438}