[LTO] Support for embedding bitcode section during LTO
Summary:
This adds support for embedding bitcode in a binary during LTO. The libLTO gains supports the `-lto-embed-bitcode` flag. The option allows users of the LTO library to embed a bitcode section. For example, LLD can pass the option via `ld.lld -mllvm=-lto-embed-bitcode`.
This feature allows doing something comparable to `clang -c -fembed-bitcode`, but on the (LTO) linker level. Having bitcode alongside native code has many use-cases. To give an example, the MacOS linker can create a `-bitcode_bundle` section containing bitcode. Also, having this feature built into LLVM is an alternative to 3rd party tools such as [[ https://github.com/travitch/whole-program-llvm | wllvm ]] or [[ https://github.com/SRI-CSL/gllvm | gllvm ]]. As with these tools, this feature simplifies creating "whole-program" llvm bitcode files, but in contrast to wllvm/gllvm it does not rely on a specific llvm frontend/driver.
Patch by Josef Eisl <josef.eisl@oracle.com>
Reviewers: #llvm, #clang, rsmith, pcc, alexshap, tejohnson
Reviewed By: tejohnson
Subscribers: tejohnson, mehdi_amini, inglorion, hiraditya, aheejin, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits, #llvm, #clang
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D68213
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index c54d15d..7cf6041 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1558,129 +1558,14 @@
}
}
-static const char* getSectionNameForBitcode(const Triple &T) {
- switch (T.getObjectFormat()) {
- case Triple::MachO:
- return "__LLVM,__bitcode";
- case Triple::COFF:
- case Triple::ELF:
- case Triple::Wasm:
- case Triple::UnknownObjectFormat:
- return ".llvmbc";
- case Triple::XCOFF:
- llvm_unreachable("XCOFF is not yet implemented");
- break;
- }
- llvm_unreachable("Unimplemented ObjectFormatType");
-}
-
-static const char* getSectionNameForCommandline(const Triple &T) {
- switch (T.getObjectFormat()) {
- case Triple::MachO:
- return "__LLVM,__cmdline";
- case Triple::COFF:
- case Triple::ELF:
- case Triple::Wasm:
- case Triple::UnknownObjectFormat:
- return ".llvmcmd";
- case Triple::XCOFF:
- llvm_unreachable("XCOFF is not yet implemented");
- break;
- }
- llvm_unreachable("Unimplemented ObjectFormatType");
-}
-
// With -fembed-bitcode, save a copy of the llvm IR as data in the
// __LLVM,__bitcode section.
void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
llvm::MemoryBufferRef Buf) {
if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off)
return;
-
- // Save llvm.compiler.used and remote it.
- SmallVector<Constant*, 2> UsedArray;
- SmallPtrSet<GlobalValue*, 4> UsedGlobals;
- Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
- GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
- for (auto *GV : UsedGlobals) {
- if (GV->getName() != "llvm.embedded.module" &&
- GV->getName() != "llvm.cmdline")
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
- }
- if (Used)
- Used->eraseFromParent();
-
- // Embed the bitcode for the llvm module.
- std::string Data;
- ArrayRef<uint8_t> ModuleData;
- Triple T(M->getTargetTriple());
- // Create a constant that contains the bitcode.
- // In case of embedding a marker, ignore the input Buf and use the empty
- // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
- if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) {
- if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
- (const unsigned char *)Buf.getBufferEnd())) {
- // If the input is LLVM Assembly, bitcode is produced by serializing
- // the module. Use-lists order need to be perserved in this case.
- llvm::raw_string_ostream OS(Data);
- llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true);
- ModuleData =
- ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
- } else
- // If the input is LLVM bitcode, write the input byte stream directly.
- ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
- Buf.getBufferSize());
- }
- llvm::Constant *ModuleConstant =
- llvm::ConstantDataArray::get(M->getContext(), ModuleData);
- llvm::GlobalVariable *GV = new llvm::GlobalVariable(
- *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
- ModuleConstant);
- GV->setSection(getSectionNameForBitcode(T));
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
- if (llvm::GlobalVariable *Old =
- M->getGlobalVariable("llvm.embedded.module", true)) {
- assert(Old->hasOneUse() &&
- "llvm.embedded.module can only be used once in llvm.compiler.used");
- GV->takeName(Old);
- Old->eraseFromParent();
- } else {
- GV->setName("llvm.embedded.module");
- }
-
- // Skip if only bitcode needs to be embedded.
- if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) {
- // Embed command-line options.
- ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()),
- CGOpts.CmdArgs.size());
- llvm::Constant *CmdConstant =
- llvm::ConstantDataArray::get(M->getContext(), CmdData);
- GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
- llvm::GlobalValue::PrivateLinkage,
- CmdConstant);
- GV->setSection(getSectionNameForCommandline(T));
- UsedArray.push_back(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
- if (llvm::GlobalVariable *Old =
- M->getGlobalVariable("llvm.cmdline", true)) {
- assert(Old->hasOneUse() &&
- "llvm.cmdline can only be used once in llvm.compiler.used");
- GV->takeName(Old);
- Old->eraseFromParent();
- } else {
- GV->setName("llvm.cmdline");
- }
- }
-
- if (UsedArray.empty())
- return;
-
- // Recreate llvm.compiler.used.
- ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
- auto *NewUsed = new GlobalVariable(
- *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
- llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
- NewUsed->setSection("llvm.metadata");
+ llvm::EmbedBitcodeInModule(
+ *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker,
+ CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode,
+ &CGOpts.CmdArgs);
}