blob: e18d28cdda9f1a7d8bb37dafacf9e294414e6cdf [file] [log] [blame]
Samuel Antao45bfe4c2016-02-08 15:59:20 +00001//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for OpenMP runtime code generation specialized to NVPTX
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
16#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
17
18#include "CGOpenMPRuntime.h"
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +000019#include "CodeGenFunction.h"
20#include "clang/AST/StmtOpenMP.h"
21#include "llvm/IR/CallSite.h"
Samuel Antao45bfe4c2016-02-08 15:59:20 +000022
23namespace clang {
24namespace CodeGen {
25
26class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
Alexey Bataev14fa1c62016-03-29 05:34:15 +000027public:
Alexey Bataev5e87c342016-12-22 19:44:05 +000028 struct EntryFunctionState {
29 llvm::BasicBlock *ExitBB = nullptr;
Alexey Bataev14fa1c62016-03-29 05:34:15 +000030 };
31
32 class WorkerFunctionState {
33 public:
34 llvm::Function *WorkerFn;
35 const CGFunctionInfo *CGFI;
36
37 WorkerFunctionState(CodeGenModule &CGM);
38
39 private:
40 void createWorkerFunction(CodeGenModule &CGM);
41 };
42
43 /// \brief Helper for target entry function. Guide the master and worker
44 /// threads to their respective locations.
45 void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
46 WorkerFunctionState &WST);
47
48 /// \brief Signal termination of OMP execution.
49 void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
50
51private:
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +000052 //
53 // NVPTX calls.
54 //
55
56 /// \brief Get the GPU warp size.
57 llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
58
59 /// \brief Get the id of the current thread on the GPU.
60 llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
61
62 // \brief Get the maximum number of threads in a block of the GPU.
63 llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
64
65 /// \brief Get barrier to synchronize all threads in a block.
66 void getNVPTXCTABarrier(CodeGenFunction &CGF);
67
68 // \brief Synchronize all GPU threads in a block.
69 void syncCTAThreads(CodeGenFunction &CGF);
70
71 //
72 // OMP calls.
73 //
74
75 /// \brief Get the thread id of the OMP master thread.
76 /// The master thread id is the first thread (lane) of the last warp in the
77 /// GPU block. Warp size is assumed to be some power of 2.
78 /// Thread id is 0 indexed.
79 /// E.g: If NumThreads is 33, master id is 32.
80 /// If NumThreads is 64, master id is 32.
81 /// If NumThreads is 1024, master id is 992.
82 llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
83
84 //
85 // Private state and methods.
86 //
87
88 // Master-worker control state.
89 // Number of requested OMP threads in parallel region.
90 llvm::GlobalVariable *ActiveWorkers;
91 // Outlined function for the workers to execute.
92 llvm::GlobalVariable *WorkID;
93
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +000094 /// \brief Initialize master-worker control state.
95 void initializeEnvironment();
96
97 /// \brief Emit the worker function for the current target region.
98 void emitWorkerFunction(WorkerFunctionState &WST);
99
100 /// \brief Helper for worker function. Emit body of worker loop.
101 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
102
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000103 /// \brief Returns specified OpenMP runtime function for the current OpenMP
104 /// implementation. Specialized for the NVPTX device.
105 /// \param Function OpenMP runtime function.
106 /// \return Specified function.
107 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
108
109 //
110 // Base class overrides.
111 //
112
113 /// \brief Creates offloading entry for the provided entry ID \a ID,
114 /// address \a Addr and size \a Size.
115 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
116 uint64_t Size) override;
117
118 /// \brief Emit outlined function for 'target' directive on the NVPTX
119 /// device.
120 /// \param D Directive to emit.
121 /// \param ParentName Name of the function that encloses the target region.
122 /// \param OutlinedFn Outlined function value to be defined by this call.
123 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
124 /// \param IsOffloadEntry True if the outlined function is an offload entry.
125 /// An outlined function may not be an entry if, e.g. the if clause always
126 /// evaluates to false.
127 void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
128 StringRef ParentName,
129 llvm::Function *&OutlinedFn,
130 llvm::Constant *&OutlinedFnID,
Alexey Bataev14fa1c62016-03-29 05:34:15 +0000131 bool IsOffloadEntry,
132 const RegionCodeGenTy &CodeGen) override;
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000133
Samuel Antao45bfe4c2016-02-08 15:59:20 +0000134public:
135 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
Carlo Bertollic6872252016-04-04 15:55:02 +0000136
137 /// \brief This function ought to emit, in the general case, a call to
138 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
139 // as these numbers are obtained through the PTX grid and block configuration.
140 /// \param NumTeams An integer expression of teams.
141 /// \param ThreadLimit An integer expression of threads.
142 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
143 const Expr *ThreadLimit, SourceLocation Loc) override;
144
145 /// \brief Emits inlined function for the specified OpenMP parallel
146 // directive but an inlined function for teams.
147 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
148 /// kmp_int32 BoundID, struct context_vars*).
149 /// \param D OpenMP directive.
150 /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
151 /// \param InnermostKind Kind of innermost directive (for simple directives it
152 /// is a directive itself, for combined - its innermost directive).
153 /// \param CodeGen Code generation sequence for the \a D directive.
154 llvm::Value *
155 emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
156 const VarDecl *ThreadIDVar,
157 OpenMPDirectiveKind InnermostKind,
158 const RegionCodeGenTy &CodeGen) override;
159
160 /// \brief Emits code for teams call of the \a OutlinedFn with
161 /// variables captured in a record which address is stored in \a
162 /// CapturedStruct.
163 /// \param OutlinedFn Outlined function to be run by team masters. Type of
164 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
165 /// \param CapturedVars A pointer to the record with the references to
166 /// variables used in \a OutlinedFn function.
167 ///
168 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
169 SourceLocation Loc, llvm::Value *OutlinedFn,
170 ArrayRef<llvm::Value *> CapturedVars) override;
Samuel Antao45bfe4c2016-02-08 15:59:20 +0000171};
172
173} // CodeGen namespace.
174} // clang namespace.
175
176#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H