Peter Collingbourne | 1398a32 | 2016-12-16 00:26:30 +0000 | [diff] [blame] | 1 | //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This pass prepares a module containing type metadata for ThinLTO by splitting |
| 11 | // it into regular and thin LTO parts if possible, and writing both parts to |
| 12 | // a multi-module bitcode file. Modules that do not contain type metadata are |
| 13 | // written unmodified as a single module. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "llvm/Transforms/IPO.h" |
| 18 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
| 19 | #include "llvm/Analysis/TypeMetadataUtils.h" |
| 20 | #include "llvm/Bitcode/BitcodeWriter.h" |
| 21 | #include "llvm/IR/Constants.h" |
| 22 | #include "llvm/IR/Intrinsics.h" |
| 23 | #include "llvm/IR/Module.h" |
| 24 | #include "llvm/IR/PassManager.h" |
| 25 | #include "llvm/Pass.h" |
| 26 | #include "llvm/Support/ScopedPrinter.h" |
| 27 | #include "llvm/Transforms/Utils/Cloning.h" |
| 28 | using namespace llvm; |
| 29 | |
| 30 | namespace { |
| 31 | |
| 32 | // Produce a unique identifier for this module by taking the MD5 sum of the |
| 33 | // names of the module's strong external symbols. This identifier is |
| 34 | // normally guaranteed to be unique, or the program would fail to link due to |
| 35 | // multiply defined symbols. |
| 36 | // |
| 37 | // If the module has no strong external symbols (such a module may still have a |
| 38 | // semantic effect if it performs global initialization), we cannot produce a |
| 39 | // unique identifier for this module, so we return the empty string, which |
| 40 | // causes the entire module to be written as a regular LTO module. |
| 41 | std::string getModuleId(Module *M) { |
| 42 | MD5 Md5; |
| 43 | bool ExportsSymbols = false; |
| 44 | auto AddGlobal = [&](GlobalValue &GV) { |
| 45 | if (GV.isDeclaration() || GV.getName().startswith("llvm.") || |
| 46 | !GV.hasExternalLinkage()) |
| 47 | return; |
| 48 | ExportsSymbols = true; |
| 49 | Md5.update(GV.getName()); |
| 50 | Md5.update(ArrayRef<uint8_t>{0}); |
| 51 | }; |
| 52 | |
| 53 | for (auto &F : *M) |
| 54 | AddGlobal(F); |
| 55 | for (auto &GV : M->globals()) |
| 56 | AddGlobal(GV); |
| 57 | for (auto &GA : M->aliases()) |
| 58 | AddGlobal(GA); |
| 59 | for (auto &IF : M->ifuncs()) |
| 60 | AddGlobal(IF); |
| 61 | |
| 62 | if (!ExportsSymbols) |
| 63 | return ""; |
| 64 | |
| 65 | MD5::MD5Result R; |
| 66 | Md5.final(R); |
| 67 | |
| 68 | SmallString<32> Str; |
| 69 | MD5::stringifyResult(R, Str); |
| 70 | return ("$" + Str).str(); |
| 71 | } |
| 72 | |
| 73 | // Promote each local-linkage entity defined by ExportM and used by ImportM by |
| 74 | // changing visibility and appending the given ModuleId. |
| 75 | void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { |
| 76 | auto PromoteInternal = [&](GlobalValue &ExportGV) { |
| 77 | if (!ExportGV.hasLocalLinkage()) |
| 78 | return; |
| 79 | |
| 80 | GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); |
| 81 | if (!ImportGV || ImportGV->use_empty()) |
| 82 | return; |
| 83 | |
| 84 | std::string NewName = (ExportGV.getName() + ModuleId).str(); |
| 85 | |
| 86 | ExportGV.setName(NewName); |
| 87 | ExportGV.setLinkage(GlobalValue::ExternalLinkage); |
| 88 | ExportGV.setVisibility(GlobalValue::HiddenVisibility); |
| 89 | |
| 90 | ImportGV->setName(NewName); |
| 91 | ImportGV->setVisibility(GlobalValue::HiddenVisibility); |
| 92 | }; |
| 93 | |
| 94 | for (auto &F : ExportM) |
| 95 | PromoteInternal(F); |
| 96 | for (auto &GV : ExportM.globals()) |
| 97 | PromoteInternal(GV); |
| 98 | for (auto &GA : ExportM.aliases()) |
| 99 | PromoteInternal(GA); |
| 100 | for (auto &IF : ExportM.ifuncs()) |
| 101 | PromoteInternal(IF); |
| 102 | } |
| 103 | |
| 104 | // Promote all internal (i.e. distinct) type ids used by the module by replacing |
| 105 | // them with external type ids formed using the module id. |
| 106 | // |
| 107 | // Note that this needs to be done before we clone the module because each clone |
| 108 | // will receive its own set of distinct metadata nodes. |
| 109 | void promoteTypeIds(Module &M, StringRef ModuleId) { |
| 110 | DenseMap<Metadata *, Metadata *> LocalToGlobal; |
| 111 | auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { |
| 112 | Metadata *MD = |
| 113 | cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); |
| 114 | |
| 115 | if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { |
| 116 | Metadata *&GlobalMD = LocalToGlobal[MD]; |
| 117 | if (!GlobalMD) { |
| 118 | std::string NewName = |
| 119 | (to_string(LocalToGlobal.size()) + ModuleId).str(); |
| 120 | GlobalMD = MDString::get(M.getContext(), NewName); |
| 121 | } |
| 122 | |
| 123 | CI->setArgOperand(ArgNo, |
| 124 | MetadataAsValue::get(M.getContext(), GlobalMD)); |
| 125 | } |
| 126 | }; |
| 127 | |
| 128 | if (Function *TypeTestFunc = |
| 129 | M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { |
| 130 | for (const Use &U : TypeTestFunc->uses()) { |
| 131 | auto CI = cast<CallInst>(U.getUser()); |
| 132 | ExternalizeTypeId(CI, 1); |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | if (Function *TypeCheckedLoadFunc = |
| 137 | M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { |
| 138 | for (const Use &U : TypeCheckedLoadFunc->uses()) { |
| 139 | auto CI = cast<CallInst>(U.getUser()); |
| 140 | ExternalizeTypeId(CI, 2); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | for (GlobalObject &GO : M.global_objects()) { |
| 145 | SmallVector<MDNode *, 1> MDs; |
| 146 | GO.getMetadata(LLVMContext::MD_type, MDs); |
| 147 | |
| 148 | GO.eraseMetadata(LLVMContext::MD_type); |
| 149 | for (auto MD : MDs) { |
| 150 | auto I = LocalToGlobal.find(MD->getOperand(1)); |
| 151 | if (I == LocalToGlobal.end()) { |
| 152 | GO.addMetadata(LLVMContext::MD_type, *MD); |
| 153 | continue; |
| 154 | } |
| 155 | GO.addMetadata( |
| 156 | LLVMContext::MD_type, |
| 157 | *MDNode::get(M.getContext(), |
| 158 | ArrayRef<Metadata *>{MD->getOperand(0), I->second})); |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | // Drop unused globals, and drop type information from function declarations. |
| 164 | // FIXME: If we made functions typeless then there would be no need to do this. |
| 165 | void simplifyExternals(Module &M) { |
| 166 | FunctionType *EmptyFT = |
| 167 | FunctionType::get(Type::getVoidTy(M.getContext()), false); |
| 168 | |
| 169 | for (auto I = M.begin(), E = M.end(); I != E;) { |
| 170 | Function &F = *I++; |
| 171 | if (F.isDeclaration() && F.use_empty()) { |
| 172 | F.eraseFromParent(); |
| 173 | continue; |
| 174 | } |
| 175 | |
| 176 | if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) |
| 177 | continue; |
| 178 | |
| 179 | Function *NewF = |
| 180 | Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); |
| 181 | NewF->setVisibility(F.getVisibility()); |
| 182 | NewF->takeName(&F); |
| 183 | F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); |
| 184 | F.eraseFromParent(); |
| 185 | } |
| 186 | |
| 187 | for (auto I = M.global_begin(), E = M.global_end(); I != E;) { |
| 188 | GlobalVariable &GV = *I++; |
| 189 | if (GV.isDeclaration() && GV.use_empty()) { |
| 190 | GV.eraseFromParent(); |
| 191 | continue; |
| 192 | } |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | void filterModule( |
Benjamin Kramer | 061f4a5 | 2017-01-13 14:39:03 +0000 | [diff] [blame] | 197 | Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { |
Peter Collingbourne | 1398a32 | 2016-12-16 00:26:30 +0000 | [diff] [blame] | 198 | for (Function &F : *M) { |
| 199 | if (ShouldKeepDefinition(&F)) |
| 200 | continue; |
| 201 | |
| 202 | F.deleteBody(); |
Peter Collingbourne | 20a0093 | 2017-01-18 20:03:02 +0000 | [diff] [blame^] | 203 | F.setComdat(nullptr); |
Peter Collingbourne | 1398a32 | 2016-12-16 00:26:30 +0000 | [diff] [blame] | 204 | F.clearMetadata(); |
| 205 | } |
| 206 | |
| 207 | for (GlobalVariable &GV : M->globals()) { |
| 208 | if (ShouldKeepDefinition(&GV)) |
| 209 | continue; |
| 210 | |
| 211 | GV.setInitializer(nullptr); |
| 212 | GV.setLinkage(GlobalValue::ExternalLinkage); |
Peter Collingbourne | 20a0093 | 2017-01-18 20:03:02 +0000 | [diff] [blame^] | 213 | GV.setComdat(nullptr); |
Peter Collingbourne | 1398a32 | 2016-12-16 00:26:30 +0000 | [diff] [blame] | 214 | GV.clearMetadata(); |
| 215 | } |
| 216 | |
| 217 | for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); |
| 218 | I != E;) { |
| 219 | GlobalAlias *GA = &*I++; |
| 220 | if (ShouldKeepDefinition(GA)) |
| 221 | continue; |
| 222 | |
| 223 | GlobalObject *GO; |
| 224 | if (I->getValueType()->isFunctionTy()) |
| 225 | GO = Function::Create(cast<FunctionType>(GA->getValueType()), |
| 226 | GlobalValue::ExternalLinkage, "", M); |
| 227 | else |
| 228 | GO = new GlobalVariable( |
| 229 | *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, |
| 230 | (Constant *)nullptr, "", (GlobalVariable *)nullptr, |
| 231 | GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); |
| 232 | GO->takeName(GA); |
| 233 | GA->replaceAllUsesWith(GO); |
| 234 | GA->eraseFromParent(); |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | // If it's possible to split M into regular and thin LTO parts, do so and write |
| 239 | // a multi-module bitcode file with the two parts to OS. Otherwise, write only a |
| 240 | // regular LTO bitcode file to OS. |
| 241 | void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { |
| 242 | std::string ModuleId = getModuleId(&M); |
| 243 | if (ModuleId.empty()) { |
| 244 | // We couldn't generate a module ID for this module, just write it out as a |
| 245 | // regular LTO module. |
| 246 | WriteBitcodeToFile(&M, OS); |
| 247 | return; |
| 248 | } |
| 249 | |
| 250 | promoteTypeIds(M, ModuleId); |
| 251 | |
| 252 | auto IsInMergedM = [&](const GlobalValue *GV) { |
| 253 | auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject()); |
| 254 | if (!GVar) |
| 255 | return false; |
| 256 | |
| 257 | SmallVector<MDNode *, 1> MDs; |
| 258 | GVar->getMetadata(LLVMContext::MD_type, MDs); |
| 259 | return !MDs.empty(); |
| 260 | }; |
| 261 | |
| 262 | ValueToValueMapTy VMap; |
| 263 | std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM)); |
| 264 | |
| 265 | filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); }); |
| 266 | |
| 267 | promoteInternals(*MergedM, M, ModuleId); |
| 268 | promoteInternals(M, *MergedM, ModuleId); |
| 269 | |
| 270 | simplifyExternals(*MergedM); |
| 271 | |
| 272 | SmallVector<char, 0> Buffer; |
| 273 | BitcodeWriter W(Buffer); |
| 274 | |
| 275 | // FIXME: Try to re-use BSI and PFI from the original module here. |
| 276 | ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); |
| 277 | W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, |
| 278 | /*GenerateHash=*/true); |
| 279 | |
| 280 | W.writeModule(MergedM.get()); |
| 281 | |
| 282 | OS << Buffer; |
| 283 | } |
| 284 | |
| 285 | // Returns whether this module needs to be split because it uses type metadata. |
| 286 | bool requiresSplit(Module &M) { |
| 287 | SmallVector<MDNode *, 1> MDs; |
| 288 | for (auto &GO : M.global_objects()) { |
| 289 | GO.getMetadata(LLVMContext::MD_type, MDs); |
| 290 | if (!MDs.empty()) |
| 291 | return true; |
| 292 | } |
| 293 | |
| 294 | return false; |
| 295 | } |
| 296 | |
| 297 | void writeThinLTOBitcode(raw_ostream &OS, Module &M, |
| 298 | const ModuleSummaryIndex *Index) { |
| 299 | // See if this module has any type metadata. If so, we need to split it. |
| 300 | if (requiresSplit(M)) |
| 301 | return splitAndWriteThinLTOBitcode(OS, M); |
| 302 | |
| 303 | // Otherwise we can just write it out as a regular module. |
| 304 | WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, |
| 305 | /*GenerateHash=*/true); |
| 306 | } |
| 307 | |
| 308 | class WriteThinLTOBitcode : public ModulePass { |
| 309 | raw_ostream &OS; // raw_ostream to print on |
| 310 | |
| 311 | public: |
| 312 | static char ID; // Pass identification, replacement for typeid |
| 313 | WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { |
| 314 | initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); |
| 315 | } |
| 316 | |
| 317 | explicit WriteThinLTOBitcode(raw_ostream &o) |
| 318 | : ModulePass(ID), OS(o) { |
| 319 | initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); |
| 320 | } |
| 321 | |
| 322 | StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } |
| 323 | |
| 324 | bool runOnModule(Module &M) override { |
| 325 | const ModuleSummaryIndex *Index = |
| 326 | &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); |
| 327 | writeThinLTOBitcode(OS, M, Index); |
| 328 | return true; |
| 329 | } |
| 330 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 331 | AU.setPreservesAll(); |
| 332 | AU.addRequired<ModuleSummaryIndexWrapperPass>(); |
| 333 | } |
| 334 | }; |
| 335 | } // anonymous namespace |
| 336 | |
| 337 | char WriteThinLTOBitcode::ID = 0; |
| 338 | INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", |
| 339 | "Write ThinLTO Bitcode", false, true) |
| 340 | INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) |
| 341 | INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", |
| 342 | "Write ThinLTO Bitcode", false, true) |
| 343 | |
| 344 | ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { |
| 345 | return new WriteThinLTOBitcode(Str); |
| 346 | } |