blob: 2f6546514c2065fc3c7e21bca24915f6bcb3a78d [file] [log] [blame]
Samuel Antao45bfe4c2016-02-08 15:59:20 +00001//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for OpenMP runtime code generation specialized to NVPTX
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15#include "CGOpenMPRuntimeNVPTX.h"
Alexey Bataevc5b1d322016-03-04 09:22:22 +000016#include "clang/AST/DeclOpenMP.h"
Carlo Bertollic6872252016-04-04 15:55:02 +000017#include "CodeGenFunction.h"
18#include "clang/AST/StmtOpenMP.h"
Samuel Antao45bfe4c2016-02-08 15:59:20 +000019
20using namespace clang;
21using namespace CodeGen;
22
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000023namespace {
24enum OpenMPRTLFunctionNVPTX {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +000025 /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit);
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000026 OMPRTL_NVPTX__kmpc_kernel_init,
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +000027 /// \brief Call to void __kmpc_kernel_deinit();
28 OMPRTL_NVPTX__kmpc_kernel_deinit,
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +000029 /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
30 /// short RequiresOMPRuntime, short RequiresDataSharing);
31 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
32 /// \brief Call to void __kmpc_spmd_kernel_deinit();
33 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +000034 /// \brief Call to void __kmpc_kernel_prepare_parallel(void
35 /// *outlined_function);
36 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
37 /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function);
38 OMPRTL_NVPTX__kmpc_kernel_parallel,
39 /// \brief Call to void __kmpc_kernel_end_parallel();
40 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
41 /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
42 /// global_tid);
43 OMPRTL_NVPTX__kmpc_serialized_parallel,
44 /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
45 /// global_tid);
46 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000047};
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +000048
49/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
50class NVPTXActionTy final : public PrePostActionTy {
51 llvm::Value *EnterCallee;
52 ArrayRef<llvm::Value *> EnterArgs;
53 llvm::Value *ExitCallee;
54 ArrayRef<llvm::Value *> ExitArgs;
55 bool Conditional;
56 llvm::BasicBlock *ContBlock = nullptr;
57
58public:
59 NVPTXActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
60 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
61 bool Conditional = false)
62 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
63 ExitArgs(ExitArgs), Conditional(Conditional) {}
64 void Enter(CodeGenFunction &CGF) override {
65 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
66 if (Conditional) {
67 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
68 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
69 ContBlock = CGF.createBasicBlock("omp_if.end");
70 // Generate the branch (If-stmt)
71 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
72 CGF.EmitBlock(ThenBlock);
73 }
74 }
75 void Done(CodeGenFunction &CGF) {
76 // Emit the rest of blocks/branches
77 CGF.EmitBranch(ContBlock);
78 CGF.EmitBlock(ContBlock, true);
79 }
80 void Exit(CodeGenFunction &CGF) override {
81 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
82 }
83};
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +000084
85// A class to track the execution mode when codegening directives within
86// a target region. The appropriate mode (generic/spmd) is set on entry
87// to the target region and used by containing directives such as 'parallel'
88// to emit optimized code.
89class ExecutionModeRAII {
90private:
91 CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
92 CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
93
94public:
95 ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode,
96 CGOpenMPRuntimeNVPTX::ExecutionMode NewMode)
97 : Mode(Mode) {
98 SavedMode = Mode;
99 Mode = NewMode;
100 }
101 ~ExecutionModeRAII() { Mode = SavedMode; }
102};
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000103} // anonymous namespace
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000104
105/// Get the GPU warp size.
106static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000107 CGBuilderTy &Bld = CGF.Builder;
108 return Bld.CreateCall(
109 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000110 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000111 llvm::None, "nvptx_warp_size");
112}
113
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000114/// Get the id of the current thread on the GPU.
115static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000116 CGBuilderTy &Bld = CGF.Builder;
117 return Bld.CreateCall(
118 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000119 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000120 llvm::None, "nvptx_tid");
121}
122
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000123/// Get the maximum number of threads in a block of the GPU.
124static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000125 CGBuilderTy &Bld = CGF.Builder;
126 return Bld.CreateCall(
127 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000128 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000129 llvm::None, "nvptx_num_threads");
130}
131
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000132/// Get barrier to synchronize all threads in a block.
133static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000134 CGBuilderTy &Bld = CGF.Builder;
135 Bld.CreateCall(llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000136 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000137}
138
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000139/// Synchronize all GPU threads in a block.
140static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000141
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000142/// Get the value of the thread_limit clause in the teams directive.
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000143/// For the 'generic' execution mode, the runtime encodes thread_limit in
144/// the launch parameters, always starting thread_limit+warpSize threads per
145/// CTA. The threads in the last warp are reserved for master execution.
146/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
147static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
148 bool IsInSpmdExecutionMode = false) {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000149 CGBuilderTy &Bld = CGF.Builder;
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000150 return IsInSpmdExecutionMode
151 ? getNVPTXNumThreads(CGF)
152 : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
153 "thread_limit");
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000154}
155
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000156/// Get the thread id of the OMP master thread.
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000157/// The master thread id is the first thread (lane) of the last warp in the
158/// GPU block. Warp size is assumed to be some power of 2.
159/// Thread id is 0 indexed.
160/// E.g: If NumThreads is 33, master id is 32.
161/// If NumThreads is 64, master id is 32.
162/// If NumThreads is 1024, master id is 992.
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000163static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000164 CGBuilderTy &Bld = CGF.Builder;
165 llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
166
167 // We assume that the warp size is a power of 2.
168 llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
169
170 return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
171 Bld.CreateNot(Mask), "master_tid");
172}
173
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000174CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
175 CodeGenModule &CGM)
176 : WorkerFn(nullptr), CGFI(nullptr) {
177 createWorkerFunction(CGM);
Vasileios Kalintirise5c09592016-03-22 10:41:20 +0000178}
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000179
180void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
181 CodeGenModule &CGM) {
182 // Create an worker function with no arguments.
183 CGFI = &CGM.getTypes().arrangeNullaryFunction();
184
185 WorkerFn = llvm::Function::Create(
186 CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
187 /* placeholder */ "_worker", &CGM.getModule());
188 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000189}
190
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000191bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
192 return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
193}
194
195static CGOpenMPRuntimeNVPTX::ExecutionMode
196getExecutionModeForDirective(CodeGenModule &CGM,
197 const OMPExecutableDirective &D) {
198 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
199 switch (DirectiveKind) {
200 case OMPD_target:
Arpith Chacko Jacobcca61a32017-01-26 15:43:27 +0000201 case OMPD_target_teams:
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000202 return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
203 case OMPD_target_parallel:
204 return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
205 default:
206 llvm_unreachable("Unsupported directive on NVPTX device.");
207 }
208 llvm_unreachable("Unsupported directive on NVPTX device.");
209}
210
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000211void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
212 StringRef ParentName,
213 llvm::Function *&OutlinedFn,
214 llvm::Constant *&OutlinedFnID,
215 bool IsOffloadEntry,
216 const RegionCodeGenTy &CodeGen) {
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000217 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
218 CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000219 EntryFunctionState EST;
220 WorkerFunctionState WST(CGM);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000221 Work.clear();
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000222
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000223 // Emit target region as a standalone region.
224 class NVPTXPrePostActionTy : public PrePostActionTy {
225 CGOpenMPRuntimeNVPTX &RT;
226 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
227 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000228
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000229 public:
230 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
231 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
232 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
233 : RT(RT), EST(EST), WST(WST) {}
234 void Enter(CodeGenFunction &CGF) override {
235 RT.emitGenericEntryHeader(CGF, EST, WST);
236 }
237 void Exit(CodeGenFunction &CGF) override {
238 RT.emitGenericEntryFooter(CGF, EST);
239 }
240 } Action(*this, EST, WST);
241 CodeGen.setAction(Action);
242 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
243 IsOffloadEntry, CodeGen);
244
245 // Create the worker function
246 emitWorkerFunction(WST);
247
248 // Now change the name of the worker function to correspond to this target
249 // region's entry function.
250 WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
251}
252
253// Setup NVPTX threads for master-worker OpenMP scheme.
254void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
255 EntryFunctionState &EST,
256 WorkerFunctionState &WST) {
257 CGBuilderTy &Bld = CGF.Builder;
258
259 llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
260 llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
261 llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
262 EST.ExitBB = CGF.createBasicBlock(".exit");
263
264 auto *IsWorker =
265 Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
266 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
267
268 CGF.EmitBlock(WorkerBB);
269 CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
270 CGF.EmitBranch(EST.ExitBB);
271
272 CGF.EmitBlock(MasterCheckBB);
273 auto *IsMaster =
274 Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
275 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
276
277 CGF.EmitBlock(MasterBB);
278 // First action in sequential region:
279 // Initialize the state of the OpenMP runtime library on the GPU.
280 llvm::Value *Args[] = {getThreadLimit(CGF)};
281 CGF.EmitRuntimeCall(
282 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
283}
284
285void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
286 EntryFunctionState &EST) {
287 if (!EST.ExitBB)
288 EST.ExitBB = CGF.createBasicBlock(".exit");
289
290 llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
291 CGF.EmitBranch(TerminateBB);
292
293 CGF.EmitBlock(TerminateBB);
294 // Signal termination condition.
295 CGF.EmitRuntimeCall(
296 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None);
297 // Barrier to terminate worker threads.
298 syncCTAThreads(CGF);
299 // Master thread jumps to exit point.
300 CGF.EmitBranch(EST.ExitBB);
301
302 CGF.EmitBlock(EST.ExitBB);
303 EST.ExitBB = nullptr;
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000304}
305
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000306void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
307 StringRef ParentName,
308 llvm::Function *&OutlinedFn,
309 llvm::Constant *&OutlinedFnID,
310 bool IsOffloadEntry,
311 const RegionCodeGenTy &CodeGen) {
312 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
313 CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd);
314 EntryFunctionState EST;
315
316 // Emit target region as a standalone region.
317 class NVPTXPrePostActionTy : public PrePostActionTy {
318 CGOpenMPRuntimeNVPTX &RT;
319 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
320 const OMPExecutableDirective &D;
321
322 public:
323 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
324 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
325 const OMPExecutableDirective &D)
326 : RT(RT), EST(EST), D(D) {}
327 void Enter(CodeGenFunction &CGF) override {
328 RT.emitSpmdEntryHeader(CGF, EST, D);
329 }
330 void Exit(CodeGenFunction &CGF) override {
331 RT.emitSpmdEntryFooter(CGF, EST);
332 }
333 } Action(*this, EST, D);
334 CodeGen.setAction(Action);
335 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
336 IsOffloadEntry, CodeGen);
337 return;
338}
339
340void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
341 CodeGenFunction &CGF, EntryFunctionState &EST,
342 const OMPExecutableDirective &D) {
343 auto &Bld = CGF.Builder;
344
345 // Setup BBs in entry function.
346 llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
347 EST.ExitBB = CGF.createBasicBlock(".exit");
348
349 // Initialize the OMP state in the runtime; called by all active threads.
350 // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
351 // based on code analysis of the target region.
352 llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
353 /*RequiresOMPRuntime=*/Bld.getInt16(1),
354 /*RequiresDataSharing=*/Bld.getInt16(1)};
355 CGF.EmitRuntimeCall(
356 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
357 CGF.EmitBranch(ExecuteBB);
358
359 CGF.EmitBlock(ExecuteBB);
360}
361
362void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
363 EntryFunctionState &EST) {
364 if (!EST.ExitBB)
365 EST.ExitBB = CGF.createBasicBlock(".exit");
366
367 llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
368 CGF.EmitBranch(OMPDeInitBB);
369
370 CGF.EmitBlock(OMPDeInitBB);
371 // DeInitialize the OMP state in the runtime; called by all active threads.
372 CGF.EmitRuntimeCall(
373 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
374 CGF.EmitBranch(EST.ExitBB);
375
376 CGF.EmitBlock(EST.ExitBB);
377 EST.ExitBB = nullptr;
378}
379
380// Create a unique global variable to indicate the execution mode of this target
381// region. The execution mode is either 'generic', or 'spmd' depending on the
382// target directive. This variable is picked up by the offload library to setup
383// the device appropriately before kernel launch. If the execution mode is
384// 'generic', the runtime reserves one warp for the master, otherwise, all
385// warps participate in parallel work.
386static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
387 CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
388 (void)new llvm::GlobalVariable(
389 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
390 llvm::GlobalValue::WeakAnyLinkage,
391 llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
392}
393
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000394void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
395 auto &Ctx = CGM.getContext();
396
397 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000398 CGF.disableDebugInfo();
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000399 CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
400 emitWorkerLoop(CGF, WST);
401 CGF.FinishFunction();
402}
403
404void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
405 WorkerFunctionState &WST) {
406 //
407 // The workers enter this loop and wait for parallel work from the master.
408 // When the master encounters a parallel region it sets up the work + variable
409 // arguments, and wakes up the workers. The workers first check to see if
410 // they are required for the parallel region, i.e., within the # of requested
411 // parallel threads. The activated workers load the variable arguments and
412 // execute the parallel work.
413 //
414
415 CGBuilderTy &Bld = CGF.Builder;
416
417 llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
418 llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
419 llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
420 llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
421 llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
422 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
423
424 CGF.EmitBranch(AwaitBB);
425
426 // Workers wait for work from master.
427 CGF.EmitBlock(AwaitBB);
428 // Wait for parallel work
429 syncCTAThreads(CGF);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000430
431 Address WorkFn =
432 CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
433 Address ExecStatus =
434 CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
435 CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
436 CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
437
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000438 llvm::Value *Args[] = {WorkFn.getPointer()};
439 llvm::Value *Ret = CGF.EmitRuntimeCall(
440 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
441 Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000442
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000443 // On termination condition (workid == 0), exit loop.
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000444 llvm::Value *ShouldTerminate =
445 Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate");
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000446 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
447
448 // Activate requested workers.
449 CGF.EmitBlock(SelectWorkersBB);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000450 llvm::Value *IsActive =
451 Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
452 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000453
454 // Signal start of parallel region.
455 CGF.EmitBlock(ExecuteBB);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000456
457 // Process work items: outlined parallel functions.
458 for (auto *W : Work) {
459 // Try to match this outlined function.
460 auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
461
462 llvm::Value *WorkFnMatch =
463 Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
464
465 llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
466 llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
467 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
468
469 // Execute this outlined function.
470 CGF.EmitBlock(ExecuteFNBB);
471
472 // Insert call to work function.
473 // FIXME: Pass arguments to outlined function from master thread.
474 auto *Fn = cast<llvm::Function>(W);
475 Address ZeroAddr =
476 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr");
477 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0));
478 llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()};
479 CGF.EmitCallOrInvoke(Fn, FnArgs);
480
481 // Go to end of parallel region.
482 CGF.EmitBranch(TerminateBB);
483
484 CGF.EmitBlock(CheckNextBB);
485 }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000486
487 // Signal end of parallel region.
488 CGF.EmitBlock(TerminateBB);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000489 CGF.EmitRuntimeCall(
490 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
491 llvm::None);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000492 CGF.EmitBranch(BarrierBB);
493
494 // All active and inactive workers wait at a barrier after parallel region.
495 CGF.EmitBlock(BarrierBB);
496 // Barrier after parallel region.
497 syncCTAThreads(CGF);
498 CGF.EmitBranch(AwaitBB);
499
500 // Exit target region.
501 CGF.EmitBlock(ExitBB);
502}
503
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000504/// \brief Returns specified OpenMP runtime function for the current OpenMP
505/// implementation. Specialized for the NVPTX device.
506/// \param Function OpenMP runtime function.
507/// \return Specified function.
508llvm::Constant *
509CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
510 llvm::Constant *RTLFn = nullptr;
511 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
512 case OMPRTL_NVPTX__kmpc_kernel_init: {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000513 // Build void __kmpc_kernel_init(kmp_int32 thread_limit);
514 llvm::Type *TypeParams[] = {CGM.Int32Ty};
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000515 llvm::FunctionType *FnTy =
516 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
517 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
518 break;
519 }
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000520 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
521 // Build void __kmpc_kernel_deinit();
522 llvm::FunctionType *FnTy =
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000523 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000524 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
525 break;
526 }
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000527 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
528 // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
529 // short RequiresOMPRuntime, short RequiresDataSharing);
530 llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
531 llvm::FunctionType *FnTy =
532 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
533 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
534 break;
535 }
536 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
537 // Build void __kmpc_spmd_kernel_deinit();
538 llvm::FunctionType *FnTy =
539 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
540 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
541 break;
542 }
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000543 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
544 /// Build void __kmpc_kernel_prepare_parallel(
545 /// void *outlined_function);
546 llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
547 llvm::FunctionType *FnTy =
548 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
549 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
550 break;
551 }
552 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
553 /// Build bool __kmpc_kernel_parallel(void **outlined_function);
554 llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
555 llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
556 llvm::FunctionType *FnTy =
557 llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
558 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
559 break;
560 }
561 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
562 /// Build void __kmpc_kernel_end_parallel();
563 llvm::FunctionType *FnTy =
564 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
565 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
566 break;
567 }
568 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
569 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
570 // global_tid);
571 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
572 llvm::FunctionType *FnTy =
573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
575 break;
576 }
577 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
578 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
579 // global_tid);
580 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
581 llvm::FunctionType *FnTy =
582 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
583 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
584 break;
585 }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000586 }
587 return RTLFn;
588}
589
590void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
591 llvm::Constant *Addr,
Samuel Antaof83efdb2017-01-05 16:02:49 +0000592 uint64_t Size, int32_t) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000593 auto *F = dyn_cast<llvm::Function>(Addr);
594 // TODO: Add support for global variables on the device after declare target
595 // support.
596 if (!F)
597 return;
598 llvm::Module *M = F->getParent();
599 llvm::LLVMContext &Ctx = M->getContext();
600
601 // Get "nvvm.annotations" metadata node
602 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
603
604 llvm::Metadata *MDVals[] = {
605 llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
606 llvm::ConstantAsMetadata::get(
607 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
608 // Append metadata to nvvm.annotations
609 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
610}
611
612void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
613 const OMPExecutableDirective &D, StringRef ParentName,
614 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
Alexey Bataev14fa1c62016-03-29 05:34:15 +0000615 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000616 if (!IsOffloadEntry) // Nothing to do.
617 return;
618
619 assert(!ParentName.empty() && "Invalid target region parent name!");
620
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000621 CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
622 getExecutionModeForDirective(CGM, D);
623 switch (Mode) {
624 case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
625 emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
626 CodeGen);
627 break;
628 case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd:
629 emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
630 CodeGen);
631 break;
632 case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown:
633 llvm_unreachable(
634 "Unknown programming model for OpenMP directive on NVPTX target.");
635 }
636
637 setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000638}
639
Samuel Antao45bfe4c2016-02-08 15:59:20 +0000640CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000641 : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000642 if (!CGM.getLangOpts().OpenMPIsDevice)
643 llvm_unreachable("OpenMP NVPTX can only handle device code.");
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000644}
Carlo Bertollic6872252016-04-04 15:55:02 +0000645
Arpith Chacko Jacob2cd6eea2017-01-25 16:55:10 +0000646void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
647 OpenMPProcBindClauseKind ProcBind,
648 SourceLocation Loc) {
649 // Do nothing in case of Spmd mode and L0 parallel.
650 // TODO: If in Spmd mode and L1 parallel emit the clause.
651 if (isInSpmdExecutionMode())
652 return;
653
654 CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
655}
656
Arpith Chacko Jacobe04da5d2017-01-25 01:18:34 +0000657void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
658 llvm::Value *NumThreads,
659 SourceLocation Loc) {
660 // Do nothing in case of Spmd mode and L0 parallel.
661 // TODO: If in Spmd mode and L1 parallel emit the clause.
662 if (isInSpmdExecutionMode())
663 return;
664
665 CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
666}
667
Carlo Bertollic6872252016-04-04 15:55:02 +0000668void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
669 const Expr *NumTeams,
670 const Expr *ThreadLimit,
671 SourceLocation Loc) {}
672
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000673llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
674 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
675 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
676 return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
677 InnermostKind, CodeGen);
678}
679
680llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
Carlo Bertollic6872252016-04-04 15:55:02 +0000681 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
682 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
683
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000684 llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
685 D, ThreadIDVar, InnermostKind, CodeGen);
686 llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
687 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
688 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
Carlo Bertollic6872252016-04-04 15:55:02 +0000689
690 return OutlinedFun;
691}
692
693void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
694 const OMPExecutableDirective &D,
695 SourceLocation Loc,
696 llvm::Value *OutlinedFn,
697 ArrayRef<llvm::Value *> CapturedVars) {
698 if (!CGF.HaveInsertPoint())
699 return;
700
701 Address ZeroAddr =
702 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
703 /*Name*/ ".zero.addr");
704 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
705 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
706 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
707 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
708 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
709 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
710}
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000711
712void CGOpenMPRuntimeNVPTX::emitParallelCall(
713 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
714 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
715 if (!CGF.HaveInsertPoint())
716 return;
717
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000718 if (isInSpmdExecutionMode())
719 emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
720 else
721 emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000722}
723
724void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
725 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
726 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
727 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
728
Malcolm Parsonsc6e45832017-01-13 18:55:32 +0000729 auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) {
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000730 CGBuilderTy &Bld = CGF.Builder;
731
732 // Prepare for parallel region. Indicate the outlined function.
733 llvm::Value *Args[] = {Bld.CreateBitOrPointerCast(Fn, CGM.Int8PtrTy)};
734 CGF.EmitRuntimeCall(
735 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
736 Args);
737
738 // Activate workers. This barrier is used by the master to signal
739 // work for the workers.
740 syncCTAThreads(CGF);
741
742 // OpenMP [2.5, Parallel Construct, p.49]
743 // There is an implied barrier at the end of a parallel region. After the
744 // end of a parallel region, only the master thread of the team resumes
745 // execution of the enclosing task region.
746 //
747 // The master waits at this barrier until all workers are done.
748 syncCTAThreads(CGF);
749
750 // Remember for post-processing in worker loop.
751 Work.push_back(Fn);
752 };
753
754 auto *RTLoc = emitUpdateLocation(CGF, Loc);
755 auto *ThreadID = getThreadID(CGF, Loc);
756 llvm::Value *Args[] = {RTLoc, ThreadID};
757
758 auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
759 PrePostActionTy &) {
Malcolm Parsonsc6e45832017-01-13 18:55:32 +0000760 auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
761 PrePostActionTy &Action) {
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000762 Action.Enter(CGF);
763
764 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
765 OutlinedFnArgs.push_back(
766 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
767 OutlinedFnArgs.push_back(
768 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
769 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
770 CGF.EmitCallOrInvoke(Fn, OutlinedFnArgs);
771 };
772
773 RegionCodeGenTy RCG(CodeGen);
774 NVPTXActionTy Action(
775 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
776 Args,
777 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
778 Args);
779 RCG.setAction(Action);
780 RCG(CGF);
781 };
782
783 if (IfCond)
784 emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen);
785 else {
786 CodeGenFunction::RunCleanupsScope Scope(CGF);
787 RegionCodeGenTy ThenRCG(L0ParallelGen);
788 ThenRCG(CGF);
789 }
790}
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000791
792void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
793 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
794 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
795 // Just call the outlined function to execute the parallel region.
796 // OutlinedFn(&GTid, &zero, CapturedStruct);
797 //
798 // TODO: Do something with IfCond when support for the 'if' clause
799 // is added on Spmd target directives.
800 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
801 OutlinedFnArgs.push_back(
802 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
803 OutlinedFnArgs.push_back(
804 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
805 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
806 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
807}