[OPENMP50]Support for declare variant directive for NVPTX target.
NVPTX does not support global aliases. Instead, we have to copy the full
body of the variant function for the original function.
llvm-svn: 374387
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 2ad6d01..7626f7a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1264,9 +1264,10 @@
loadOffloadInfoMetadata();
}
-static bool tryEmitAlias(CodeGenModule &CGM, const GlobalDecl &NewGD,
- const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr,
- bool IsForDefinition) {
+bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
// Emit at least a definition for the aliasee if the the address of the
// original function is requested.
if (IsForDefinition || OrigAddr)
@@ -1327,8 +1328,8 @@
StringRef MangledName = CGM.getMangledName(Pair.second.second);
llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
// If not able to emit alias, just emit original declaration.
- (void)tryEmitAlias(CGM, Pair.second.first, Pair.second.second, Addr,
- /*IsForDefinition=*/false);
+ (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
+ /*IsForDefinition=*/false);
}
}
@@ -11273,7 +11274,7 @@
if (NewFD == D)
return false;
GlobalDecl NewGD = GD.getWithDecl(NewFD);
- if (tryEmitAlias(CGM, NewGD, GD, Orig, IsForDefinition)) {
+ if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
DeferredVariantFunction.erase(D);
return true;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index b8137a2..9215bd6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -291,6 +291,17 @@
/// default location.
virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
+ /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+ /// \param OrigAddr LLVM IR value for \p OldGD.
+ /// \param IsForDefinition true, if requested emission for the definition of
+ /// \p OldGD.
+ /// \returns true, was able to emit a definition function for \p OldGD, which
+ /// points to \p NewGD.
+ virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition);
+
/// Returns default flags for the barriers depending on the directive, for
/// which this barier is going to be emitted.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 9e70a5a..83f74fe 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1895,6 +1895,19 @@
llvm_unreachable("Unknown flags are requested.");
}
+bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
+ // Emit the function in OldGD with the body from NewGD, if NewGD is defined.
+ auto *NewFD = cast<FunctionDecl>(NewGD.getDecl());
+ if (NewFD->isDefined()) {
+ CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr);
+ return true;
+ }
+ return false;
+}
+
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index e7fd458..0f78627 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -193,6 +193,18 @@
/// Full/Lightweight runtime mode. Used for better optimization.
unsigned getDefaultLocationReserved2Flags() const override;
+ /// Tries to emit declare variant function for \p OldGD from \p NewGD.
+ /// \param OrigAddr LLVM IR value for \p OldGD.
+ /// \param IsForDefinition true, if requested emission for the definition of
+ /// \p OldGD.
+ /// \returns true, was able to emit a definition function for \p OldGD, which
+ /// points to \p NewGD.
+ /// NVPTX backend does not support global aliases, so just use the function,
+ /// emitted for \p NewGD instead of \p OldGD.
+ bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) override;
+
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
void clear() override;
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 080914a..eab48cc 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2742,6 +2742,50 @@
EmitGlobalFunctionDefinition(GD, GV);
}
+void CodeGenModule::emitOpenMPDeviceFunctionRedefinition(
+ GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) {
+ assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
+ OpenMPRuntime && "Expected OpenMP device mode.");
+ const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+
+ // Compute the function info and LLVM type.
+ const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD);
+ llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+
+ // Get or create the prototype for the function.
+ if (!GV || (GV->getType()->getElementType() != Ty)) {
+ GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction(
+ getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false,
+ /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(),
+ ForDefinition));
+ SetFunctionAttributes(OldGD, cast<llvm::Function>(GV),
+ /*IsIncompleteFunction=*/false,
+ /*IsThunk=*/false);
+ }
+ // We need to set linkage and visibility on the function before
+ // generating code for it because various parts of IR generation
+ // want to propagate this information down (e.g. to local static
+ // declarations).
+ auto *Fn = cast<llvm::Function>(GV);
+ setFunctionLinkage(OldGD, Fn);
+
+ // FIXME: this is redundant with part of
+ // setFunctionDefinitionAttributes
+ setGVProperties(Fn, OldGD);
+
+ MaybeHandleStaticInExternC(D, Fn);
+
+ maybeSetTrivialComdat(*D, *Fn);
+
+ CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI);
+
+ setNonAliasAttributes(OldGD, Fn);
+ SetLLVMFunctionAttributesForDefinition(D, Fn);
+
+ if (D->hasAttr<AnnotateAttr>())
+ AddGlobalAnnotations(D, Fn);
+}
+
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 95964af..597b8d7 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1270,6 +1270,11 @@
/// \param D Requires declaration
void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
+ /// Emits the definition of \p OldGD function with body from \p NewGD.
+ /// Required for proper handling of declare variant directive on the GPU.
+ void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD,
+ llvm::GlobalValue *GV);
+
/// Returns whether the given record has hidden LTO visibility and therefore
/// may participate in (single-module) CFI and whole-program vtable
/// optimization.