[OpenMP] Codegen for the 'target parallel' directive on the NVPTX device.
This patch adds codegen for the 'target parallel' directive on the NVPTX
device. We term offload OpenMP directives such as 'target parallel' and
'target teams distribute parallel for' as SPMD constructs. SPMD constructs,
in contrast to Generic ones like the plain 'target', can never contain
a serial region.
SPMD constructs can be handled more efficiently on the GPU and do not
require the Warp Loop of the Generic codegen scheme. This patch adds
SPMD codegen support for 'target parallel' on the NVPTX device and can
be reused for other SPMD constructs.
Reviewers: ABataev
Differential Revision: https://reviews.llvm.org/D28755
llvm-svn: 292428
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index a69f051..9647870 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -43,6 +43,8 @@
void createWorkerFunction(CodeGenModule &CGM);
};
+ bool isInSpmdExecutionMode() const;
+
/// \brief Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -58,6 +60,13 @@
/// function.
void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+ /// \brief Helper for Spmd mode target directive's entry function.
+ void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ const OMPExecutableDirective &D);
+
+ /// \brief Signal termination of Spmd mode execution.
+ void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
/// \brief Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
@@ -87,6 +96,22 @@
llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// \brief Emit outlined function specialized for the Single Program
+ /// Multiple Data programming model for applicable target directives on the
+ /// NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// \param CodeGen Object containing the target statements.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
+
/// \brief Emit outlined function for 'target' directive on the NVPTX
/// device.
/// \param D Directive to emit.
@@ -118,6 +143,22 @@
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond);
+ /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// This call is for a parallel directive within an SPMD target directive.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ ///
+ void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond);
+
protected:
/// \brief Get the function name of an outlined region.
// The name can be customized depending on the target.
@@ -192,6 +233,25 @@
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+
+public:
+ /// Target codegen is specialized based on two programming models: the
+ /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd'
+ /// model for constructs like 'target parallel' that support it.
+ enum ExecutionMode {
+ /// Single Program Multiple Data.
+ Spmd,
+ /// Generic codegen to support fork-join model.
+ Generic,
+ Unknown,
+ };
+
+private:
+ // Track the execution mode when codegening directives within a target
+ // region. The appropriate mode (generic/spmd) is set on entry to the
+ // target region and used by containing directives such as 'parallel'
+ // to emit optimized code.
+ ExecutionMode CurrentExecutionMode;
};
} // CodeGen namespace.