[HIP] Support offloading by linker script
To support linking device code in different source files, it is necessary to
embed fat binary at host linking stage.
This patch emits an external symbol for fat binary in host codegen, then
embed the fat binary by lld through a linker script.
Differential Revision: https://reviews.llvm.org/D46472
llvm-svn: 332724
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 9a3ab8d..b7e86cb 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -146,12 +146,14 @@
Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
for (const auto &II : Inputs) {
- // If the current tool chain refers to an OpenMP offloading host, we should
- // ignore inputs that refer to OpenMP offloading devices - they will be
- // embedded according to a proper linker script.
+ // If the current tool chain refers to an OpenMP or HIP offloading host, we
+ // should ignore inputs that refer to OpenMP or HIP offloading devices -
+ // they will be embedded according to a proper linker script.
if (auto *IA = II.getAction())
- if (JA.isHostOffloading(Action::OFK_OpenMP) &&
- IA->isDeviceOffloading(Action::OFK_OpenMP))
+ if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
+ IA->isDeviceOffloading(Action::OFK_OpenMP)) ||
+ (JA.isHostOffloading(Action::OFK_HIP) &&
+ IA->isDeviceOffloading(Action::OFK_HIP)))
continue;
if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
@@ -1288,6 +1290,124 @@
Lksf << LksBuffer;
}
+/// Add HIP linker script arguments at the end of the argument list so that
+/// the fat binary is built by embedding the device images into the host. The
+/// linker script also defines a symbol required by the code generation so that
+/// the image can be retrieved at runtime. This should be used only in tool
+/// chains that support linker scripts.
+void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
+ const InputInfo &Output,
+ const InputInfoList &Inputs, const ArgList &Args,
+ ArgStringList &CmdArgs, const JobAction &JA,
+ const Tool &T) {
+
+ // If this is not a HIP host toolchain, we don't need to do anything.
+ if (!JA.isHostOffloading(Action::OFK_HIP))
+ return;
+
+ // Create temporary linker script. Keep it if save-temps is enabled.
+ const char *LKS;
+ SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
+ if (C.getDriver().isSaveTempsEnabled()) {
+ llvm::sys::path::replace_extension(Name, "lk");
+ LKS = C.getArgs().MakeArgString(Name.c_str());
+ } else {
+ llvm::sys::path::replace_extension(Name, "");
+ Name = C.getDriver().GetTemporaryPath(Name, "lk");
+ LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
+ }
+
+ // Add linker script option to the command.
+ CmdArgs.push_back("-T");
+ CmdArgs.push_back(LKS);
+
+ // Create a buffer to write the contents of the linker script.
+ std::string LksBuffer;
+ llvm::raw_string_ostream LksStream(LksBuffer);
+
+ // Get the HIP offload tool chain.
+ auto *HIPTC = static_cast<const toolchains::CudaToolChain *>(
+ C.getSingleOffloadToolChain<Action::OFK_HIP>());
+ assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&
+ "Wrong platform");
+
+ // Construct clang-offload-bundler command to bundle object files for
+ // for different GPU archs.
+ ArgStringList BundlerArgs;
+ BundlerArgs.push_back(Args.MakeArgString("-type=o"));
+
+ // ToDo: Remove the dummy host binary entry which is required by
+ // clang-offload-bundler.
+ std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
+ std::string BundlerInputArg = "-inputs=/dev/null";
+
+ for (const auto &II : Inputs) {
+ const Action *A = II.getAction();
+ // Is this a device linking action?
+ if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
+ BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
+ StringRef(A->getOffloadingArch()).str();
+ BundlerInputArg = BundlerInputArg + "," + II.getFilename();
+ }
+ }
+ BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
+ BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
+
+ std::string BundleFileName = C.getDriver().GetTemporaryPath("BUNDLE", "o");
+ const char *BundleFile =
+ C.addTempFile(C.getArgs().MakeArgString(BundleFileName.c_str()));
+ auto BundlerOutputArg =
+ Args.MakeArgString(std::string("-outputs=").append(BundleFile));
+ BundlerArgs.push_back(BundlerOutputArg);
+
+ SmallString<128> BundlerPath(C.getDriver().Dir);
+ llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
+ const char *Bundler = Args.MakeArgString(BundlerPath);
+ C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
+
+ // Add commands to embed target binaries. We ensure that each section and
+ // image is 16-byte aligned. This is not mandatory, but increases the
+ // likelihood of data to be aligned with a cache block in several main host
+ // machines.
+ LksStream << "/*\n";
+ LksStream << " HIP Offload Linker Script\n";
+ LksStream << " *** Automatically generated by Clang ***\n";
+ LksStream << "*/\n";
+ LksStream << "TARGET(binary)\n";
+ LksStream << "INPUT(" << BundleFileName << ")\n";
+ LksStream << "SECTIONS\n";
+ LksStream << "{\n";
+ LksStream << " .hip_fatbin :\n";
+ LksStream << " ALIGN(0x10)\n";
+ LksStream << " {\n";
+ LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n";
+ LksStream << " " << BundleFileName << "\n";
+ LksStream << " }\n";
+ LksStream << "}\n";
+ LksStream << "INSERT BEFORE .data\n";
+ LksStream.flush();
+
+ // Dump the contents of the linker script if the user requested that. We
+ // support this option to enable testing of behavior with -###.
+ if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
+ llvm::errs() << LksBuffer;
+
+ // If this is a dry run, do not create the linker script file.
+ if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
+ return;
+
+ // Open script file and write the contents.
+ std::error_code EC;
+ llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None);
+
+ if (EC) {
+ C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
+ return;
+ }
+
+ Lksf << LksBuffer;
+}
+
SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input,