blob: a6c64b2f6d67150eecc3b09a7456cc298798c0ed [file] [log] [blame]
Pirama Arumuga Nainar4967a712016-09-19 22:19:55 -07001//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for OpenMP runtime code generation specialized to NVPTX
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
16#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
17
18#include "CGOpenMPRuntime.h"
19#include "CodeGenFunction.h"
20#include "clang/AST/StmtOpenMP.h"
21#include "llvm/IR/CallSite.h"
22
23namespace clang {
24namespace CodeGen {
25
26class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
27public:
28 class EntryFunctionState {
29 public:
30 llvm::BasicBlock *ExitBB;
31
32 EntryFunctionState() : ExitBB(nullptr){};
33 };
34
35 class WorkerFunctionState {
36 public:
37 llvm::Function *WorkerFn;
38 const CGFunctionInfo *CGFI;
39
40 WorkerFunctionState(CodeGenModule &CGM);
41
42 private:
43 void createWorkerFunction(CodeGenModule &CGM);
44 };
45
46 /// \brief Helper for target entry function. Guide the master and worker
47 /// threads to their respective locations.
48 void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
49 WorkerFunctionState &WST);
50
51 /// \brief Signal termination of OMP execution.
52 void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
53
54private:
55 //
56 // NVPTX calls.
57 //
58
59 /// \brief Get the GPU warp size.
60 llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
61
62 /// \brief Get the id of the current thread on the GPU.
63 llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
64
65 // \brief Get the maximum number of threads in a block of the GPU.
66 llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
67
68 /// \brief Get barrier to synchronize all threads in a block.
69 void getNVPTXCTABarrier(CodeGenFunction &CGF);
70
71 // \brief Synchronize all GPU threads in a block.
72 void syncCTAThreads(CodeGenFunction &CGF);
73
74 //
75 // OMP calls.
76 //
77
78 /// \brief Get the thread id of the OMP master thread.
79 /// The master thread id is the first thread (lane) of the last warp in the
80 /// GPU block. Warp size is assumed to be some power of 2.
81 /// Thread id is 0 indexed.
82 /// E.g: If NumThreads is 33, master id is 32.
83 /// If NumThreads is 64, master id is 32.
84 /// If NumThreads is 1024, master id is 992.
85 llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
86
87 //
88 // Private state and methods.
89 //
90
91 // Master-worker control state.
92 // Number of requested OMP threads in parallel region.
93 llvm::GlobalVariable *ActiveWorkers;
94 // Outlined function for the workers to execute.
95 llvm::GlobalVariable *WorkID;
96
97 /// \brief Initialize master-worker control state.
98 void initializeEnvironment();
99
100 /// \brief Emit the worker function for the current target region.
101 void emitWorkerFunction(WorkerFunctionState &WST);
102
103 /// \brief Helper for worker function. Emit body of worker loop.
104 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
105
106 /// \brief Returns specified OpenMP runtime function for the current OpenMP
107 /// implementation. Specialized for the NVPTX device.
108 /// \param Function OpenMP runtime function.
109 /// \return Specified function.
110 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
111
112 //
113 // Base class overrides.
114 //
115
116 /// \brief Creates offloading entry for the provided entry ID \a ID,
117 /// address \a Addr and size \a Size.
118 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
119 uint64_t Size) override;
120
121 /// \brief Emit outlined function for 'target' directive on the NVPTX
122 /// device.
123 /// \param D Directive to emit.
124 /// \param ParentName Name of the function that encloses the target region.
125 /// \param OutlinedFn Outlined function value to be defined by this call.
126 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
127 /// \param IsOffloadEntry True if the outlined function is an offload entry.
128 /// An outlined function may not be an entry if, e.g. the if clause always
129 /// evaluates to false.
130 void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
131 StringRef ParentName,
132 llvm::Function *&OutlinedFn,
133 llvm::Constant *&OutlinedFnID,
134 bool IsOffloadEntry,
135 const RegionCodeGenTy &CodeGen) override;
136
137public:
138 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
139
140 /// \brief This function ought to emit, in the general case, a call to
141 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
142 // as these numbers are obtained through the PTX grid and block configuration.
143 /// \param NumTeams An integer expression of teams.
144 /// \param ThreadLimit An integer expression of threads.
145 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
146 const Expr *ThreadLimit, SourceLocation Loc) override;
147
148 /// \brief Emits inlined function for the specified OpenMP parallel
149 // directive but an inlined function for teams.
150 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
151 /// kmp_int32 BoundID, struct context_vars*).
152 /// \param D OpenMP directive.
153 /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
154 /// \param InnermostKind Kind of innermost directive (for simple directives it
155 /// is a directive itself, for combined - its innermost directive).
156 /// \param CodeGen Code generation sequence for the \a D directive.
157 llvm::Value *
158 emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
159 const VarDecl *ThreadIDVar,
160 OpenMPDirectiveKind InnermostKind,
161 const RegionCodeGenTy &CodeGen) override;
162
163 /// \brief Emits code for teams call of the \a OutlinedFn with
164 /// variables captured in a record which address is stored in \a
165 /// CapturedStruct.
166 /// \param OutlinedFn Outlined function to be run by team masters. Type of
167 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
168 /// \param CapturedVars A pointer to the record with the references to
169 /// variables used in \a OutlinedFn function.
170 ///
171 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
172 SourceLocation Loc, llvm::Value *OutlinedFn,
173 ArrayRef<llvm::Value *> CapturedVars) override;
174};
175
176} // CodeGen namespace.
177} // clang namespace.
178
179#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H