blob: 978b6cfee382c73de0dc8e5f7a0ff9546106e6bf [file] [log] [blame]
Peter Collingbourne1398a322016-12-16 00:26:30 +00001//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass prepares a module containing type metadata for ThinLTO by splitting
11// it into regular and thin LTO parts if possible, and writing both parts to
12// a multi-module bitcode file. Modules that do not contain type metadata are
13// written unmodified as a single module.
14//
15//===----------------------------------------------------------------------===//
16
Peter Collingbourne002c2d52017-02-14 03:42:38 +000017#include "llvm/Analysis/BasicAliasAnalysis.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000018#include "llvm/Analysis/ModuleSummaryAnalysis.h"
19#include "llvm/Analysis/TypeMetadataUtils.h"
20#include "llvm/Bitcode/BitcodeWriter.h"
21#include "llvm/IR/Constants.h"
Peter Collingbourne28ffd322017-02-08 20:44:00 +000022#include "llvm/IR/DebugInfo.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000023#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/Module.h"
25#include "llvm/IR/PassManager.h"
26#include "llvm/Pass.h"
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +000027#include "llvm/Support/FileSystem.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000028#include "llvm/Support/ScopedPrinter.h"
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +000029#include "llvm/Support/raw_ostream.h"
30#include "llvm/Transforms/IPO.h"
Peter Collingbourne002c2d52017-02-14 03:42:38 +000031#include "llvm/Transforms/IPO/FunctionAttrs.h"
Peter Collingbourne1398a322016-12-16 00:26:30 +000032#include "llvm/Transforms/Utils/Cloning.h"
33using namespace llvm;
34
35namespace {
36
Evgeniy Stepanovc4405722017-03-20 18:45:34 +000037// Produce a unique identifier for this module by taking the MD5 sum of the
38// names of the module's strong external symbols. This identifier is
39// normally guaranteed to be unique, or the program would fail to link due to
40// multiply defined symbols.
41//
42// If the module has no strong external symbols (such a module may still have a
43// semantic effect if it performs global initialization), we cannot produce a
44// unique identifier for this module, so we return the empty string, which
45// causes the entire module to be written as a regular LTO module.
46std::string getModuleId(Module *M) {
47 MD5 Md5;
48 bool ExportsSymbols = false;
49 auto AddGlobal = [&](GlobalValue &GV) {
50 if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
51 !GV.hasExternalLinkage())
52 return;
53 ExportsSymbols = true;
54 Md5.update(GV.getName());
55 Md5.update(ArrayRef<uint8_t>{0});
56 };
57
58 for (auto &F : *M)
59 AddGlobal(F);
60 for (auto &GV : M->globals())
61 AddGlobal(GV);
62 for (auto &GA : M->aliases())
63 AddGlobal(GA);
64 for (auto &IF : M->ifuncs())
65 AddGlobal(IF);
66
67 if (!ExportsSymbols)
68 return "";
69
70 MD5::MD5Result R;
71 Md5.final(R);
72
73 SmallString<32> Str;
74 MD5::stringifyResult(R, Str);
75 return ("$" + Str).str();
76}
77
Peter Collingbourne1398a322016-12-16 00:26:30 +000078// Promote each local-linkage entity defined by ExportM and used by ImportM by
79// changing visibility and appending the given ModuleId.
80void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
81 auto PromoteInternal = [&](GlobalValue &ExportGV) {
82 if (!ExportGV.hasLocalLinkage())
83 return;
84
85 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
86 if (!ImportGV || ImportGV->use_empty())
87 return;
88
89 std::string NewName = (ExportGV.getName() + ModuleId).str();
90
91 ExportGV.setName(NewName);
92 ExportGV.setLinkage(GlobalValue::ExternalLinkage);
93 ExportGV.setVisibility(GlobalValue::HiddenVisibility);
94
95 ImportGV->setName(NewName);
96 ImportGV->setVisibility(GlobalValue::HiddenVisibility);
97 };
98
99 for (auto &F : ExportM)
100 PromoteInternal(F);
101 for (auto &GV : ExportM.globals())
102 PromoteInternal(GV);
103 for (auto &GA : ExportM.aliases())
104 PromoteInternal(GA);
105 for (auto &IF : ExportM.ifuncs())
106 PromoteInternal(IF);
107}
108
109// Promote all internal (i.e. distinct) type ids used by the module by replacing
110// them with external type ids formed using the module id.
111//
112// Note that this needs to be done before we clone the module because each clone
113// will receive its own set of distinct metadata nodes.
114void promoteTypeIds(Module &M, StringRef ModuleId) {
115 DenseMap<Metadata *, Metadata *> LocalToGlobal;
116 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
117 Metadata *MD =
118 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
119
120 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
121 Metadata *&GlobalMD = LocalToGlobal[MD];
122 if (!GlobalMD) {
123 std::string NewName =
124 (to_string(LocalToGlobal.size()) + ModuleId).str();
125 GlobalMD = MDString::get(M.getContext(), NewName);
126 }
127
128 CI->setArgOperand(ArgNo,
129 MetadataAsValue::get(M.getContext(), GlobalMD));
130 }
131 };
132
133 if (Function *TypeTestFunc =
134 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
135 for (const Use &U : TypeTestFunc->uses()) {
136 auto CI = cast<CallInst>(U.getUser());
137 ExternalizeTypeId(CI, 1);
138 }
139 }
140
141 if (Function *TypeCheckedLoadFunc =
142 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
143 for (const Use &U : TypeCheckedLoadFunc->uses()) {
144 auto CI = cast<CallInst>(U.getUser());
145 ExternalizeTypeId(CI, 2);
146 }
147 }
148
149 for (GlobalObject &GO : M.global_objects()) {
150 SmallVector<MDNode *, 1> MDs;
151 GO.getMetadata(LLVMContext::MD_type, MDs);
152
153 GO.eraseMetadata(LLVMContext::MD_type);
154 for (auto MD : MDs) {
155 auto I = LocalToGlobal.find(MD->getOperand(1));
156 if (I == LocalToGlobal.end()) {
157 GO.addMetadata(LLVMContext::MD_type, *MD);
158 continue;
159 }
160 GO.addMetadata(
161 LLVMContext::MD_type,
162 *MDNode::get(M.getContext(),
163 ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
164 }
165 }
166}
167
168// Drop unused globals, and drop type information from function declarations.
169// FIXME: If we made functions typeless then there would be no need to do this.
170void simplifyExternals(Module &M) {
171 FunctionType *EmptyFT =
172 FunctionType::get(Type::getVoidTy(M.getContext()), false);
173
174 for (auto I = M.begin(), E = M.end(); I != E;) {
175 Function &F = *I++;
176 if (F.isDeclaration() && F.use_empty()) {
177 F.eraseFromParent();
178 continue;
179 }
180
181 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
182 continue;
183
184 Function *NewF =
185 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
186 NewF->setVisibility(F.getVisibility());
187 NewF->takeName(&F);
188 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
189 F.eraseFromParent();
190 }
191
192 for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
193 GlobalVariable &GV = *I++;
194 if (GV.isDeclaration() && GV.use_empty()) {
195 GV.eraseFromParent();
196 continue;
197 }
198 }
199}
200
201void filterModule(
Benjamin Kramer061f4a52017-01-13 14:39:03 +0000202 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000203 for (Function &F : *M) {
204 if (ShouldKeepDefinition(&F))
205 continue;
206
207 F.deleteBody();
Peter Collingbourne20a00932017-01-18 20:03:02 +0000208 F.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000209 F.clearMetadata();
210 }
211
212 for (GlobalVariable &GV : M->globals()) {
213 if (ShouldKeepDefinition(&GV))
214 continue;
215
216 GV.setInitializer(nullptr);
217 GV.setLinkage(GlobalValue::ExternalLinkage);
Peter Collingbourne20a00932017-01-18 20:03:02 +0000218 GV.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000219 GV.clearMetadata();
220 }
221
222 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
223 I != E;) {
224 GlobalAlias *GA = &*I++;
225 if (ShouldKeepDefinition(GA))
226 continue;
227
228 GlobalObject *GO;
229 if (I->getValueType()->isFunctionTy())
230 GO = Function::Create(cast<FunctionType>(GA->getValueType()),
231 GlobalValue::ExternalLinkage, "", M);
232 else
233 GO = new GlobalVariable(
234 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
235 (Constant *)nullptr, "", (GlobalVariable *)nullptr,
236 GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
237 GO->takeName(GA);
238 GA->replaceAllUsesWith(GO);
239 GA->eraseFromParent();
240 }
241}
242
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000243void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
244 if (auto *F = dyn_cast<Function>(C))
245 return Fn(F);
Peter Collingbourne3baa72a2017-03-02 23:10:17 +0000246 if (isa<GlobalValue>(C))
247 return;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000248 for (Value *Op : C->operands())
249 forEachVirtualFunction(cast<Constant>(Op), Fn);
250}
251
Peter Collingbourne1398a322016-12-16 00:26:30 +0000252// If it's possible to split M into regular and thin LTO parts, do so and write
253// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
254// regular LTO bitcode file to OS.
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000255void splitAndWriteThinLTOBitcode(
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000256 raw_ostream &OS, raw_ostream *ThinLinkOS,
257 function_ref<AAResults &(Function &)> AARGetter, Module &M) {
Evgeniy Stepanovc4405722017-03-20 18:45:34 +0000258 std::string ModuleId = getModuleId(&M);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000259 if (ModuleId.empty()) {
260 // We couldn't generate a module ID for this module, just write it out as a
261 // regular LTO module.
262 WriteBitcodeToFile(&M, OS);
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000263 if (ThinLinkOS)
264 // We don't have a ThinLTO part, but still write the module to the
265 // ThinLinkOS if requested so that the expected output file is produced.
266 WriteBitcodeToFile(&M, *ThinLinkOS);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000267 return;
268 }
269
270 promoteTypeIds(M, ModuleId);
271
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000272 // Returns whether a global has attached type metadata. Such globals may
273 // participate in CFI or whole-program devirtualization, so they need to
274 // appear in the merged module instead of the thin LTO module.
275 auto HasTypeMetadata = [&](const GlobalObject *GO) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000276 SmallVector<MDNode *, 1> MDs;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000277 GO->getMetadata(LLVMContext::MD_type, MDs);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000278 return !MDs.empty();
279 };
280
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000281 // Collect the set of virtual functions that are eligible for virtual constant
282 // propagation. Each eligible function must not access memory, must return
283 // an integer of width <=64 bits, must take at least one argument, must not
284 // use its first argument (assumed to be "this") and all arguments other than
285 // the first one must be of <=64 bit integer type.
286 //
287 // Note that we test whether this copy of the function is readnone, rather
288 // than testing function attributes, which must hold for any copy of the
289 // function, even a less optimized version substituted at link time. This is
290 // sound because the virtual constant propagation optimizations effectively
291 // inline all implementations of the virtual function into each call site,
292 // rather than using function attributes to perform local optimization.
293 std::set<const Function *> EligibleVirtualFns;
294 for (GlobalVariable &GV : M.globals())
295 if (HasTypeMetadata(&GV))
296 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
297 auto *RT = dyn_cast<IntegerType>(F->getReturnType());
298 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
299 !F->arg_begin()->use_empty())
300 return;
301 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
302 auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
303 if (!ArgT || ArgT->getBitWidth() > 64)
304 return;
305 }
306 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
307 EligibleVirtualFns.insert(F);
308 });
309
Peter Collingbourne1398a322016-12-16 00:26:30 +0000310 ValueToValueMapTy VMap;
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000311 std::unique_ptr<Module> MergedM(
312 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
313 if (auto *F = dyn_cast<Function>(GV))
314 return EligibleVirtualFns.count(F);
315 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
316 return HasTypeMetadata(GVar);
317 return false;
318 }));
Peter Collingbourne28ffd322017-02-08 20:44:00 +0000319 StripDebugInfo(*MergedM);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000320
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000321 for (Function &F : *MergedM)
322 if (!F.isDeclaration()) {
323 // Reset the linkage of all functions eligible for virtual constant
324 // propagation. The canonical definitions live in the thin LTO module so
325 // that they can be imported.
326 F.setLinkage(GlobalValue::AvailableExternallyLinkage);
327 F.setComdat(nullptr);
328 }
329
330 // Remove all globals with type metadata, as well as aliases pointing to them,
331 // from the thin LTO module.
332 filterModule(&M, [&](const GlobalValue *GV) {
333 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
334 return !HasTypeMetadata(GVar);
335 return true;
336 });
Peter Collingbourne1398a322016-12-16 00:26:30 +0000337
338 promoteInternals(*MergedM, M, ModuleId);
339 promoteInternals(M, *MergedM, ModuleId);
340
341 simplifyExternals(*MergedM);
342
Peter Collingbourne1398a322016-12-16 00:26:30 +0000343
344 // FIXME: Try to re-use BSI and PFI from the original module here.
345 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000346
347 SmallVector<char, 0> Buffer;
348
349 BitcodeWriter W(Buffer);
350 // Save the module hash produced for the full bitcode, which will
351 // be used in the backends, and use that in the minimized bitcode
352 // produced for the full link.
353 ModuleHash ModHash = {{0}};
Peter Collingbourne1398a322016-12-16 00:26:30 +0000354 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000355 /*GenerateHash=*/true, &ModHash);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000356 W.writeModule(MergedM.get());
Peter Collingbourne1398a322016-12-16 00:26:30 +0000357 OS << Buffer;
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000358
359 // If a minimized bitcode module was requested for the thin link,
360 // strip the debug info (the merged module was already stripped above)
361 // and write it to the given OS.
362 if (ThinLinkOS) {
363 Buffer.clear();
364 BitcodeWriter W2(Buffer);
365 StripDebugInfo(M);
366 W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
367 /*GenerateHash=*/false, &ModHash);
368 W2.writeModule(MergedM.get());
369 *ThinLinkOS << Buffer;
370 }
Peter Collingbourne1398a322016-12-16 00:26:30 +0000371}
372
373// Returns whether this module needs to be split because it uses type metadata.
374bool requiresSplit(Module &M) {
375 SmallVector<MDNode *, 1> MDs;
376 for (auto &GO : M.global_objects()) {
377 GO.getMetadata(LLVMContext::MD_type, MDs);
378 if (!MDs.empty())
379 return true;
380 }
381
382 return false;
383}
384
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000385void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000386 function_ref<AAResults &(Function &)> AARGetter,
387 Module &M, const ModuleSummaryIndex *Index) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000388 // See if this module has any type metadata. If so, we need to split it.
389 if (requiresSplit(M))
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000390 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000391
392 // Otherwise we can just write it out as a regular module.
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000393
394 // Save the module hash produced for the full bitcode, which will
395 // be used in the backends, and use that in the minimized bitcode
396 // produced for the full link.
397 ModuleHash ModHash = {{0}};
Peter Collingbourne1398a322016-12-16 00:26:30 +0000398 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000399 /*GenerateHash=*/true, &ModHash);
400 // If a minimized bitcode module was requested for the thin link,
401 // strip the debug info and write it to the given OS.
402 if (ThinLinkOS) {
403 StripDebugInfo(M);
404 WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
405 Index,
406 /*GenerateHash=*/false, &ModHash);
407 }
Peter Collingbourne1398a322016-12-16 00:26:30 +0000408}
409
410class WriteThinLTOBitcode : public ModulePass {
411 raw_ostream &OS; // raw_ostream to print on
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000412 // The output stream on which to emit a minimized module for use
413 // just in the thin link, if requested.
414 raw_ostream *ThinLinkOS;
Peter Collingbourne1398a322016-12-16 00:26:30 +0000415
416public:
417 static char ID; // Pass identification, replacement for typeid
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000418 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000419 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
420 }
421
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000422 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
423 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000424 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
425 }
426
427 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
428
429 bool runOnModule(Module &M) override {
430 const ModuleSummaryIndex *Index =
431 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000432 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000433 return true;
434 }
435 void getAnalysisUsage(AnalysisUsage &AU) const override {
436 AU.setPreservesAll();
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000437 AU.addRequired<AssumptionCacheTracker>();
Peter Collingbourne1398a322016-12-16 00:26:30 +0000438 AU.addRequired<ModuleSummaryIndexWrapperPass>();
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000439 AU.addRequired<TargetLibraryInfoWrapperPass>();
Peter Collingbourne1398a322016-12-16 00:26:30 +0000440 }
441};
442} // anonymous namespace
443
444char WriteThinLTOBitcode::ID = 0;
445INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
446 "Write ThinLTO Bitcode", false, true)
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000447INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Peter Collingbourne1398a322016-12-16 00:26:30 +0000448INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
Peter Collingbourne002c2d52017-02-14 03:42:38 +0000449INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
Peter Collingbourne1398a322016-12-16 00:26:30 +0000450INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
451 "Write ThinLTO Bitcode", false, true)
452
Teresa Johnson0c6a4ff2017-03-23 19:47:39 +0000453ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
454 raw_ostream *ThinLinkOS) {
455 return new WriteThinLTOBitcode(Str, ThinLinkOS);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000456}