blob: 6ec69e32198012f10103b77964dac4184dbf139c [file] [log] [blame]
Peter Collingbourne1398a322016-12-16 00:26:30 +00001//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass prepares a module containing type metadata for ThinLTO by splitting
11// it into regular and thin LTO parts if possible, and writing both parts to
12// a multi-module bitcode file. Modules that do not contain type metadata are
13// written unmodified as a single module.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/IPO.h"
18#include "llvm/Analysis/ModuleSummaryAnalysis.h"
19#include "llvm/Analysis/TypeMetadataUtils.h"
20#include "llvm/Bitcode/BitcodeWriter.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/Intrinsics.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Pass.h"
26#include "llvm/Support/ScopedPrinter.h"
27#include "llvm/Transforms/Utils/Cloning.h"
28using namespace llvm;
29
30namespace {
31
32// Produce a unique identifier for this module by taking the MD5 sum of the
33// names of the module's strong external symbols. This identifier is
34// normally guaranteed to be unique, or the program would fail to link due to
35// multiply defined symbols.
36//
37// If the module has no strong external symbols (such a module may still have a
38// semantic effect if it performs global initialization), we cannot produce a
39// unique identifier for this module, so we return the empty string, which
40// causes the entire module to be written as a regular LTO module.
41std::string getModuleId(Module *M) {
42 MD5 Md5;
43 bool ExportsSymbols = false;
44 auto AddGlobal = [&](GlobalValue &GV) {
45 if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
46 !GV.hasExternalLinkage())
47 return;
48 ExportsSymbols = true;
49 Md5.update(GV.getName());
50 Md5.update(ArrayRef<uint8_t>{0});
51 };
52
53 for (auto &F : *M)
54 AddGlobal(F);
55 for (auto &GV : M->globals())
56 AddGlobal(GV);
57 for (auto &GA : M->aliases())
58 AddGlobal(GA);
59 for (auto &IF : M->ifuncs())
60 AddGlobal(IF);
61
62 if (!ExportsSymbols)
63 return "";
64
65 MD5::MD5Result R;
66 Md5.final(R);
67
68 SmallString<32> Str;
69 MD5::stringifyResult(R, Str);
70 return ("$" + Str).str();
71}
72
73// Promote each local-linkage entity defined by ExportM and used by ImportM by
74// changing visibility and appending the given ModuleId.
75void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
76 auto PromoteInternal = [&](GlobalValue &ExportGV) {
77 if (!ExportGV.hasLocalLinkage())
78 return;
79
80 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
81 if (!ImportGV || ImportGV->use_empty())
82 return;
83
84 std::string NewName = (ExportGV.getName() + ModuleId).str();
85
86 ExportGV.setName(NewName);
87 ExportGV.setLinkage(GlobalValue::ExternalLinkage);
88 ExportGV.setVisibility(GlobalValue::HiddenVisibility);
89
90 ImportGV->setName(NewName);
91 ImportGV->setVisibility(GlobalValue::HiddenVisibility);
92 };
93
94 for (auto &F : ExportM)
95 PromoteInternal(F);
96 for (auto &GV : ExportM.globals())
97 PromoteInternal(GV);
98 for (auto &GA : ExportM.aliases())
99 PromoteInternal(GA);
100 for (auto &IF : ExportM.ifuncs())
101 PromoteInternal(IF);
102}
103
104// Promote all internal (i.e. distinct) type ids used by the module by replacing
105// them with external type ids formed using the module id.
106//
107// Note that this needs to be done before we clone the module because each clone
108// will receive its own set of distinct metadata nodes.
109void promoteTypeIds(Module &M, StringRef ModuleId) {
110 DenseMap<Metadata *, Metadata *> LocalToGlobal;
111 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
112 Metadata *MD =
113 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
114
115 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
116 Metadata *&GlobalMD = LocalToGlobal[MD];
117 if (!GlobalMD) {
118 std::string NewName =
119 (to_string(LocalToGlobal.size()) + ModuleId).str();
120 GlobalMD = MDString::get(M.getContext(), NewName);
121 }
122
123 CI->setArgOperand(ArgNo,
124 MetadataAsValue::get(M.getContext(), GlobalMD));
125 }
126 };
127
128 if (Function *TypeTestFunc =
129 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
130 for (const Use &U : TypeTestFunc->uses()) {
131 auto CI = cast<CallInst>(U.getUser());
132 ExternalizeTypeId(CI, 1);
133 }
134 }
135
136 if (Function *TypeCheckedLoadFunc =
137 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
138 for (const Use &U : TypeCheckedLoadFunc->uses()) {
139 auto CI = cast<CallInst>(U.getUser());
140 ExternalizeTypeId(CI, 2);
141 }
142 }
143
144 for (GlobalObject &GO : M.global_objects()) {
145 SmallVector<MDNode *, 1> MDs;
146 GO.getMetadata(LLVMContext::MD_type, MDs);
147
148 GO.eraseMetadata(LLVMContext::MD_type);
149 for (auto MD : MDs) {
150 auto I = LocalToGlobal.find(MD->getOperand(1));
151 if (I == LocalToGlobal.end()) {
152 GO.addMetadata(LLVMContext::MD_type, *MD);
153 continue;
154 }
155 GO.addMetadata(
156 LLVMContext::MD_type,
157 *MDNode::get(M.getContext(),
158 ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
159 }
160 }
161}
162
163// Drop unused globals, and drop type information from function declarations.
164// FIXME: If we made functions typeless then there would be no need to do this.
165void simplifyExternals(Module &M) {
166 FunctionType *EmptyFT =
167 FunctionType::get(Type::getVoidTy(M.getContext()), false);
168
169 for (auto I = M.begin(), E = M.end(); I != E;) {
170 Function &F = *I++;
171 if (F.isDeclaration() && F.use_empty()) {
172 F.eraseFromParent();
173 continue;
174 }
175
176 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
177 continue;
178
179 Function *NewF =
180 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
181 NewF->setVisibility(F.getVisibility());
182 NewF->takeName(&F);
183 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
184 F.eraseFromParent();
185 }
186
187 for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
188 GlobalVariable &GV = *I++;
189 if (GV.isDeclaration() && GV.use_empty()) {
190 GV.eraseFromParent();
191 continue;
192 }
193 }
194}
195
196void filterModule(
Benjamin Kramer061f4a52017-01-13 14:39:03 +0000197 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
Peter Collingbourne1398a322016-12-16 00:26:30 +0000198 for (Function &F : *M) {
199 if (ShouldKeepDefinition(&F))
200 continue;
201
202 F.deleteBody();
Peter Collingbourne20a00932017-01-18 20:03:02 +0000203 F.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000204 F.clearMetadata();
205 }
206
207 for (GlobalVariable &GV : M->globals()) {
208 if (ShouldKeepDefinition(&GV))
209 continue;
210
211 GV.setInitializer(nullptr);
212 GV.setLinkage(GlobalValue::ExternalLinkage);
Peter Collingbourne20a00932017-01-18 20:03:02 +0000213 GV.setComdat(nullptr);
Peter Collingbourne1398a322016-12-16 00:26:30 +0000214 GV.clearMetadata();
215 }
216
217 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
218 I != E;) {
219 GlobalAlias *GA = &*I++;
220 if (ShouldKeepDefinition(GA))
221 continue;
222
223 GlobalObject *GO;
224 if (I->getValueType()->isFunctionTy())
225 GO = Function::Create(cast<FunctionType>(GA->getValueType()),
226 GlobalValue::ExternalLinkage, "", M);
227 else
228 GO = new GlobalVariable(
229 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
230 (Constant *)nullptr, "", (GlobalVariable *)nullptr,
231 GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
232 GO->takeName(GA);
233 GA->replaceAllUsesWith(GO);
234 GA->eraseFromParent();
235 }
236}
237
238// If it's possible to split M into regular and thin LTO parts, do so and write
239// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
240// regular LTO bitcode file to OS.
241void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
242 std::string ModuleId = getModuleId(&M);
243 if (ModuleId.empty()) {
244 // We couldn't generate a module ID for this module, just write it out as a
245 // regular LTO module.
246 WriteBitcodeToFile(&M, OS);
247 return;
248 }
249
250 promoteTypeIds(M, ModuleId);
251
252 auto IsInMergedM = [&](const GlobalValue *GV) {
253 auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
254 if (!GVar)
255 return false;
256
257 SmallVector<MDNode *, 1> MDs;
258 GVar->getMetadata(LLVMContext::MD_type, MDs);
259 return !MDs.empty();
260 };
261
262 ValueToValueMapTy VMap;
263 std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
264
265 filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
266
267 promoteInternals(*MergedM, M, ModuleId);
268 promoteInternals(M, *MergedM, ModuleId);
269
270 simplifyExternals(*MergedM);
271
272 SmallVector<char, 0> Buffer;
273 BitcodeWriter W(Buffer);
274
275 // FIXME: Try to re-use BSI and PFI from the original module here.
276 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
277 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
278 /*GenerateHash=*/true);
279
280 W.writeModule(MergedM.get());
281
282 OS << Buffer;
283}
284
285// Returns whether this module needs to be split because it uses type metadata.
286bool requiresSplit(Module &M) {
287 SmallVector<MDNode *, 1> MDs;
288 for (auto &GO : M.global_objects()) {
289 GO.getMetadata(LLVMContext::MD_type, MDs);
290 if (!MDs.empty())
291 return true;
292 }
293
294 return false;
295}
296
297void writeThinLTOBitcode(raw_ostream &OS, Module &M,
298 const ModuleSummaryIndex *Index) {
299 // See if this module has any type metadata. If so, we need to split it.
300 if (requiresSplit(M))
301 return splitAndWriteThinLTOBitcode(OS, M);
302
303 // Otherwise we can just write it out as a regular module.
304 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
305 /*GenerateHash=*/true);
306}
307
308class WriteThinLTOBitcode : public ModulePass {
309 raw_ostream &OS; // raw_ostream to print on
310
311public:
312 static char ID; // Pass identification, replacement for typeid
313 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
314 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
315 }
316
317 explicit WriteThinLTOBitcode(raw_ostream &o)
318 : ModulePass(ID), OS(o) {
319 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
320 }
321
322 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
323
324 bool runOnModule(Module &M) override {
325 const ModuleSummaryIndex *Index =
326 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
327 writeThinLTOBitcode(OS, M, Index);
328 return true;
329 }
330 void getAnalysisUsage(AnalysisUsage &AU) const override {
331 AU.setPreservesAll();
332 AU.addRequired<ModuleSummaryIndexWrapperPass>();
333 }
334};
335} // anonymous namespace
336
337char WriteThinLTOBitcode::ID = 0;
338INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
339 "Write ThinLTO Bitcode", false, true)
340INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
341INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
342 "Write ThinLTO Bitcode", false, true)
343
344ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
345 return new WriteThinLTOBitcode(Str);
346}