[LTO] Support for embedding bitcode section during LTO

Summary:
This adds support for embedding bitcode in a binary during LTO. The libLTO gains supports the `-lto-embed-bitcode` flag. The option allows users of the LTO library to embed a bitcode section. For example, LLD can pass the option via `ld.lld -mllvm=-lto-embed-bitcode`.

This feature allows doing something comparable to `clang -c -fembed-bitcode`, but on the (LTO) linker level. Having bitcode alongside native code has many use-cases. To give an example, the MacOS linker can create a `-bitcode_bundle` section containing bitcode. Also, having this feature built into LLVM is an alternative to 3rd party tools such as [[ https://github.com/travitch/whole-program-llvm | wllvm ]] or [[ https://github.com/SRI-CSL/gllvm | gllvm ]]. As with these tools, this feature simplifies creating "whole-program" llvm bitcode files, but in contrast to wllvm/gllvm it does not rely on a specific llvm frontend/driver.

Patch by Josef Eisl <josef.eisl@oracle.com>

Reviewers: #llvm, #clang, rsmith, pcc, alexshap, tejohnson

Reviewed By: tejohnson

Subscribers: tejohnson, mehdi_amini, inglorion, hiraditya, aheejin, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits, #llvm, #clang

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D68213
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index c54d15d..7cf6041 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1558,129 +1558,14 @@
   }
 }
 
-static const char* getSectionNameForBitcode(const Triple &T) {
-  switch (T.getObjectFormat()) {
-  case Triple::MachO:
-    return "__LLVM,__bitcode";
-  case Triple::COFF:
-  case Triple::ELF:
-  case Triple::Wasm:
-  case Triple::UnknownObjectFormat:
-    return ".llvmbc";
-  case Triple::XCOFF:
-    llvm_unreachable("XCOFF is not yet implemented");
-    break;
-  }
-  llvm_unreachable("Unimplemented ObjectFormatType");
-}
-
-static const char* getSectionNameForCommandline(const Triple &T) {
-  switch (T.getObjectFormat()) {
-  case Triple::MachO:
-    return "__LLVM,__cmdline";
-  case Triple::COFF:
-  case Triple::ELF:
-  case Triple::Wasm:
-  case Triple::UnknownObjectFormat:
-    return ".llvmcmd";
-  case Triple::XCOFF:
-    llvm_unreachable("XCOFF is not yet implemented");
-    break;
-  }
-  llvm_unreachable("Unimplemented ObjectFormatType");
-}
-
 // With -fembed-bitcode, save a copy of the llvm IR as data in the
 // __LLVM,__bitcode section.
 void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
                          llvm::MemoryBufferRef Buf) {
   if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off)
     return;
-
-  // Save llvm.compiler.used and remote it.
-  SmallVector<Constant*, 2> UsedArray;
-  SmallPtrSet<GlobalValue*, 4> UsedGlobals;
-  Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
-  GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
-  for (auto *GV : UsedGlobals) {
-    if (GV->getName() != "llvm.embedded.module" &&
-        GV->getName() != "llvm.cmdline")
-      UsedArray.push_back(
-          ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
-  }
-  if (Used)
-    Used->eraseFromParent();
-
-  // Embed the bitcode for the llvm module.
-  std::string Data;
-  ArrayRef<uint8_t> ModuleData;
-  Triple T(M->getTargetTriple());
-  // Create a constant that contains the bitcode.
-  // In case of embedding a marker, ignore the input Buf and use the empty
-  // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
-  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) {
-    if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
-                   (const unsigned char *)Buf.getBufferEnd())) {
-      // If the input is LLVM Assembly, bitcode is produced by serializing
-      // the module. Use-lists order need to be perserved in this case.
-      llvm::raw_string_ostream OS(Data);
-      llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true);
-      ModuleData =
-          ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
-    } else
-      // If the input is LLVM bitcode, write the input byte stream directly.
-      ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
-                                     Buf.getBufferSize());
-  }
-  llvm::Constant *ModuleConstant =
-      llvm::ConstantDataArray::get(M->getContext(), ModuleData);
-  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
-      *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
-      ModuleConstant);
-  GV->setSection(getSectionNameForBitcode(T));
-  UsedArray.push_back(
-      ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
-  if (llvm::GlobalVariable *Old =
-          M->getGlobalVariable("llvm.embedded.module", true)) {
-    assert(Old->hasOneUse() &&
-           "llvm.embedded.module can only be used once in llvm.compiler.used");
-    GV->takeName(Old);
-    Old->eraseFromParent();
-  } else {
-    GV->setName("llvm.embedded.module");
-  }
-
-  // Skip if only bitcode needs to be embedded.
-  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) {
-    // Embed command-line options.
-    ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()),
-                              CGOpts.CmdArgs.size());
-    llvm::Constant *CmdConstant =
-      llvm::ConstantDataArray::get(M->getContext(), CmdData);
-    GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
-                                  llvm::GlobalValue::PrivateLinkage,
-                                  CmdConstant);
-    GV->setSection(getSectionNameForCommandline(T));
-    UsedArray.push_back(
-        ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
-    if (llvm::GlobalVariable *Old =
-            M->getGlobalVariable("llvm.cmdline", true)) {
-      assert(Old->hasOneUse() &&
-             "llvm.cmdline can only be used once in llvm.compiler.used");
-      GV->takeName(Old);
-      Old->eraseFromParent();
-    } else {
-      GV->setName("llvm.cmdline");
-    }
-  }
-
-  if (UsedArray.empty())
-    return;
-
-  // Recreate llvm.compiler.used.
-  ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
-  auto *NewUsed = new GlobalVariable(
-      *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
-      llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
-  NewUsed->setSection("llvm.metadata");
+  llvm::EmbedBitcodeInModule(
+      *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker,
+      CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode,
+      &CGOpts.CmdArgs);
 }