blob: 90702a19e4fe45c40bab74bf4108f74ccc20e6ab [file] [log] [blame]
Samuel Antao45bfe4c2016-02-08 15:59:20 +00001//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for OpenMP runtime code generation specialized to NVPTX
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15#include "CGOpenMPRuntimeNVPTX.h"
Alexey Bataevc5b1d322016-03-04 09:22:22 +000016#include "clang/AST/DeclOpenMP.h"
Carlo Bertollic6872252016-04-04 15:55:02 +000017#include "CodeGenFunction.h"
18#include "clang/AST/StmtOpenMP.h"
Samuel Antao45bfe4c2016-02-08 15:59:20 +000019
20using namespace clang;
21using namespace CodeGen;
22
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000023namespace {
24enum OpenMPRTLFunctionNVPTX {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +000025 /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit);
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000026 OMPRTL_NVPTX__kmpc_kernel_init,
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +000027 /// \brief Call to void __kmpc_kernel_deinit();
28 OMPRTL_NVPTX__kmpc_kernel_deinit,
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +000029 /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
30 /// short RequiresOMPRuntime, short RequiresDataSharing);
31 OMPRTL_NVPTX__kmpc_spmd_kernel_init,
32 /// \brief Call to void __kmpc_spmd_kernel_deinit();
33 OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +000034 /// \brief Call to void __kmpc_kernel_prepare_parallel(void
35 /// *outlined_function);
36 OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
37 /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function);
38 OMPRTL_NVPTX__kmpc_kernel_parallel,
39 /// \brief Call to void __kmpc_kernel_end_parallel();
40 OMPRTL_NVPTX__kmpc_kernel_end_parallel,
41 /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
42 /// global_tid);
43 OMPRTL_NVPTX__kmpc_serialized_parallel,
44 /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
45 /// global_tid);
46 OMPRTL_NVPTX__kmpc_end_serialized_parallel,
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +000047};
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +000048
49/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
50class NVPTXActionTy final : public PrePostActionTy {
51 llvm::Value *EnterCallee;
52 ArrayRef<llvm::Value *> EnterArgs;
53 llvm::Value *ExitCallee;
54 ArrayRef<llvm::Value *> ExitArgs;
55 bool Conditional;
56 llvm::BasicBlock *ContBlock = nullptr;
57
58public:
59 NVPTXActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
60 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
61 bool Conditional = false)
62 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
63 ExitArgs(ExitArgs), Conditional(Conditional) {}
64 void Enter(CodeGenFunction &CGF) override {
65 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
66 if (Conditional) {
67 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
68 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
69 ContBlock = CGF.createBasicBlock("omp_if.end");
70 // Generate the branch (If-stmt)
71 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
72 CGF.EmitBlock(ThenBlock);
73 }
74 }
75 void Done(CodeGenFunction &CGF) {
76 // Emit the rest of blocks/branches
77 CGF.EmitBranch(ContBlock);
78 CGF.EmitBlock(ContBlock, true);
79 }
80 void Exit(CodeGenFunction &CGF) override {
81 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
82 }
83};
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +000084
85// A class to track the execution mode when codegening directives within
86// a target region. The appropriate mode (generic/spmd) is set on entry
87// to the target region and used by containing directives such as 'parallel'
88// to emit optimized code.
89class ExecutionModeRAII {
90private:
91 CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
92 CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
93
94public:
95 ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode,
96 CGOpenMPRuntimeNVPTX::ExecutionMode NewMode)
97 : Mode(Mode) {
98 SavedMode = Mode;
99 Mode = NewMode;
100 }
101 ~ExecutionModeRAII() { Mode = SavedMode; }
102};
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000103} // anonymous namespace
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000104
105/// Get the GPU warp size.
106static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000107 CGBuilderTy &Bld = CGF.Builder;
108 return Bld.CreateCall(
109 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000110 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000111 llvm::None, "nvptx_warp_size");
112}
113
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000114/// Get the id of the current thread on the GPU.
115static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000116 CGBuilderTy &Bld = CGF.Builder;
117 return Bld.CreateCall(
118 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000119 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000120 llvm::None, "nvptx_tid");
121}
122
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000123/// Get the maximum number of threads in a block of the GPU.
124static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000125 CGBuilderTy &Bld = CGF.Builder;
126 return Bld.CreateCall(
127 llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000128 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000129 llvm::None, "nvptx_num_threads");
130}
131
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000132/// Get barrier to synchronize all threads in a block.
133static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000134 CGBuilderTy &Bld = CGF.Builder;
135 Bld.CreateCall(llvm::Intrinsic::getDeclaration(
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000136 &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000137}
138
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000139/// Synchronize all GPU threads in a block.
140static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000141
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000142/// Get the value of the thread_limit clause in the teams directive.
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000143/// For the 'generic' execution mode, the runtime encodes thread_limit in
144/// the launch parameters, always starting thread_limit+warpSize threads per
145/// CTA. The threads in the last warp are reserved for master execution.
146/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
147static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
148 bool IsInSpmdExecutionMode = false) {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000149 CGBuilderTy &Bld = CGF.Builder;
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000150 return IsInSpmdExecutionMode
151 ? getNVPTXNumThreads(CGF)
152 : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
153 "thread_limit");
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000154}
155
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000156/// Get the thread id of the OMP master thread.
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000157/// The master thread id is the first thread (lane) of the last warp in the
158/// GPU block. Warp size is assumed to be some power of 2.
159/// Thread id is 0 indexed.
160/// E.g: If NumThreads is 33, master id is 32.
161/// If NumThreads is 64, master id is 32.
162/// If NumThreads is 1024, master id is 992.
Arpith Chacko Jacobccf2f732017-01-03 20:19:56 +0000163static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000164 CGBuilderTy &Bld = CGF.Builder;
165 llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
166
167 // We assume that the warp size is a power of 2.
168 llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
169
170 return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
171 Bld.CreateNot(Mask), "master_tid");
172}
173
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000174CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
175 CodeGenModule &CGM)
176 : WorkerFn(nullptr), CGFI(nullptr) {
177 createWorkerFunction(CGM);
Vasileios Kalintirise5c09592016-03-22 10:41:20 +0000178}
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000179
180void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
181 CodeGenModule &CGM) {
182 // Create an worker function with no arguments.
183 CGFI = &CGM.getTypes().arrangeNullaryFunction();
184
185 WorkerFn = llvm::Function::Create(
186 CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
187 /* placeholder */ "_worker", &CGM.getModule());
188 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000189}
190
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000191bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
192 return CurrentExecutionMode == CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
193}
194
195static CGOpenMPRuntimeNVPTX::ExecutionMode
196getExecutionModeForDirective(CodeGenModule &CGM,
197 const OMPExecutableDirective &D) {
198 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
199 switch (DirectiveKind) {
200 case OMPD_target:
201 return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
202 case OMPD_target_parallel:
203 return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
204 default:
205 llvm_unreachable("Unsupported directive on NVPTX device.");
206 }
207 llvm_unreachable("Unsupported directive on NVPTX device.");
208}
209
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000210void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
211 StringRef ParentName,
212 llvm::Function *&OutlinedFn,
213 llvm::Constant *&OutlinedFnID,
214 bool IsOffloadEntry,
215 const RegionCodeGenTy &CodeGen) {
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000216 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
217 CGOpenMPRuntimeNVPTX::ExecutionMode::Generic);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000218 EntryFunctionState EST;
219 WorkerFunctionState WST(CGM);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000220 Work.clear();
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000221
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000222 // Emit target region as a standalone region.
223 class NVPTXPrePostActionTy : public PrePostActionTy {
224 CGOpenMPRuntimeNVPTX &RT;
225 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
226 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000227
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000228 public:
229 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
230 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
231 CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
232 : RT(RT), EST(EST), WST(WST) {}
233 void Enter(CodeGenFunction &CGF) override {
234 RT.emitGenericEntryHeader(CGF, EST, WST);
235 }
236 void Exit(CodeGenFunction &CGF) override {
237 RT.emitGenericEntryFooter(CGF, EST);
238 }
239 } Action(*this, EST, WST);
240 CodeGen.setAction(Action);
241 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
242 IsOffloadEntry, CodeGen);
243
244 // Create the worker function
245 emitWorkerFunction(WST);
246
247 // Now change the name of the worker function to correspond to this target
248 // region's entry function.
249 WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
250}
251
252// Setup NVPTX threads for master-worker OpenMP scheme.
253void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
254 EntryFunctionState &EST,
255 WorkerFunctionState &WST) {
256 CGBuilderTy &Bld = CGF.Builder;
257
258 llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
259 llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
260 llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
261 EST.ExitBB = CGF.createBasicBlock(".exit");
262
263 auto *IsWorker =
264 Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
265 Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
266
267 CGF.EmitBlock(WorkerBB);
268 CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
269 CGF.EmitBranch(EST.ExitBB);
270
271 CGF.EmitBlock(MasterCheckBB);
272 auto *IsMaster =
273 Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
274 Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
275
276 CGF.EmitBlock(MasterBB);
277 // First action in sequential region:
278 // Initialize the state of the OpenMP runtime library on the GPU.
279 llvm::Value *Args[] = {getThreadLimit(CGF)};
280 CGF.EmitRuntimeCall(
281 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
282}
283
284void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
285 EntryFunctionState &EST) {
286 if (!EST.ExitBB)
287 EST.ExitBB = CGF.createBasicBlock(".exit");
288
289 llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
290 CGF.EmitBranch(TerminateBB);
291
292 CGF.EmitBlock(TerminateBB);
293 // Signal termination condition.
294 CGF.EmitRuntimeCall(
295 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None);
296 // Barrier to terminate worker threads.
297 syncCTAThreads(CGF);
298 // Master thread jumps to exit point.
299 CGF.EmitBranch(EST.ExitBB);
300
301 CGF.EmitBlock(EST.ExitBB);
302 EST.ExitBB = nullptr;
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000303}
304
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000305void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
306 StringRef ParentName,
307 llvm::Function *&OutlinedFn,
308 llvm::Constant *&OutlinedFnID,
309 bool IsOffloadEntry,
310 const RegionCodeGenTy &CodeGen) {
311 ExecutionModeRAII ModeRAII(CurrentExecutionMode,
312 CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd);
313 EntryFunctionState EST;
314
315 // Emit target region as a standalone region.
316 class NVPTXPrePostActionTy : public PrePostActionTy {
317 CGOpenMPRuntimeNVPTX &RT;
318 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
319 const OMPExecutableDirective &D;
320
321 public:
322 NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
323 CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
324 const OMPExecutableDirective &D)
325 : RT(RT), EST(EST), D(D) {}
326 void Enter(CodeGenFunction &CGF) override {
327 RT.emitSpmdEntryHeader(CGF, EST, D);
328 }
329 void Exit(CodeGenFunction &CGF) override {
330 RT.emitSpmdEntryFooter(CGF, EST);
331 }
332 } Action(*this, EST, D);
333 CodeGen.setAction(Action);
334 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
335 IsOffloadEntry, CodeGen);
336 return;
337}
338
339void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
340 CodeGenFunction &CGF, EntryFunctionState &EST,
341 const OMPExecutableDirective &D) {
342 auto &Bld = CGF.Builder;
343
344 // Setup BBs in entry function.
345 llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
346 EST.ExitBB = CGF.createBasicBlock(".exit");
347
348 // Initialize the OMP state in the runtime; called by all active threads.
349 // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
350 // based on code analysis of the target region.
351 llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true),
352 /*RequiresOMPRuntime=*/Bld.getInt16(1),
353 /*RequiresDataSharing=*/Bld.getInt16(1)};
354 CGF.EmitRuntimeCall(
355 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
356 CGF.EmitBranch(ExecuteBB);
357
358 CGF.EmitBlock(ExecuteBB);
359}
360
361void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
362 EntryFunctionState &EST) {
363 if (!EST.ExitBB)
364 EST.ExitBB = CGF.createBasicBlock(".exit");
365
366 llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
367 CGF.EmitBranch(OMPDeInitBB);
368
369 CGF.EmitBlock(OMPDeInitBB);
370 // DeInitialize the OMP state in the runtime; called by all active threads.
371 CGF.EmitRuntimeCall(
372 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
373 CGF.EmitBranch(EST.ExitBB);
374
375 CGF.EmitBlock(EST.ExitBB);
376 EST.ExitBB = nullptr;
377}
378
379// Create a unique global variable to indicate the execution mode of this target
380// region. The execution mode is either 'generic', or 'spmd' depending on the
381// target directive. This variable is picked up by the offload library to setup
382// the device appropriately before kernel launch. If the execution mode is
383// 'generic', the runtime reserves one warp for the master, otherwise, all
384// warps participate in parallel work.
385static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
386 CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
387 (void)new llvm::GlobalVariable(
388 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
389 llvm::GlobalValue::WeakAnyLinkage,
390 llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
391}
392
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000393void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
394 auto &Ctx = CGM.getContext();
395
396 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000397 CGF.disableDebugInfo();
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000398 CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
399 emitWorkerLoop(CGF, WST);
400 CGF.FinishFunction();
401}
402
403void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
404 WorkerFunctionState &WST) {
405 //
406 // The workers enter this loop and wait for parallel work from the master.
407 // When the master encounters a parallel region it sets up the work + variable
408 // arguments, and wakes up the workers. The workers first check to see if
409 // they are required for the parallel region, i.e., within the # of requested
410 // parallel threads. The activated workers load the variable arguments and
411 // execute the parallel work.
412 //
413
414 CGBuilderTy &Bld = CGF.Builder;
415
416 llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
417 llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
418 llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
419 llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
420 llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
421 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
422
423 CGF.EmitBranch(AwaitBB);
424
425 // Workers wait for work from master.
426 CGF.EmitBlock(AwaitBB);
427 // Wait for parallel work
428 syncCTAThreads(CGF);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000429
430 Address WorkFn =
431 CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
432 Address ExecStatus =
433 CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
434 CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
435 CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
436
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000437 llvm::Value *Args[] = {WorkFn.getPointer()};
438 llvm::Value *Ret = CGF.EmitRuntimeCall(
439 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
440 Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000441
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000442 // On termination condition (workid == 0), exit loop.
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000443 llvm::Value *ShouldTerminate =
444 Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate");
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000445 Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
446
447 // Activate requested workers.
448 CGF.EmitBlock(SelectWorkersBB);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000449 llvm::Value *IsActive =
450 Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
451 Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000452
453 // Signal start of parallel region.
454 CGF.EmitBlock(ExecuteBB);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000455
456 // Process work items: outlined parallel functions.
457 for (auto *W : Work) {
458 // Try to match this outlined function.
459 auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
460
461 llvm::Value *WorkFnMatch =
462 Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
463
464 llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
465 llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
466 Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
467
468 // Execute this outlined function.
469 CGF.EmitBlock(ExecuteFNBB);
470
471 // Insert call to work function.
472 // FIXME: Pass arguments to outlined function from master thread.
473 auto *Fn = cast<llvm::Function>(W);
474 Address ZeroAddr =
475 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr");
476 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0));
477 llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()};
478 CGF.EmitCallOrInvoke(Fn, FnArgs);
479
480 // Go to end of parallel region.
481 CGF.EmitBranch(TerminateBB);
482
483 CGF.EmitBlock(CheckNextBB);
484 }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000485
486 // Signal end of parallel region.
487 CGF.EmitBlock(TerminateBB);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000488 CGF.EmitRuntimeCall(
489 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
490 llvm::None);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000491 CGF.EmitBranch(BarrierBB);
492
493 // All active and inactive workers wait at a barrier after parallel region.
494 CGF.EmitBlock(BarrierBB);
495 // Barrier after parallel region.
496 syncCTAThreads(CGF);
497 CGF.EmitBranch(AwaitBB);
498
499 // Exit target region.
500 CGF.EmitBlock(ExitBB);
501}
502
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000503/// \brief Returns specified OpenMP runtime function for the current OpenMP
504/// implementation. Specialized for the NVPTX device.
505/// \param Function OpenMP runtime function.
506/// \return Specified function.
507llvm::Constant *
508CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
509 llvm::Constant *RTLFn = nullptr;
510 switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
511 case OMPRTL_NVPTX__kmpc_kernel_init: {
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000512 // Build void __kmpc_kernel_init(kmp_int32 thread_limit);
513 llvm::Type *TypeParams[] = {CGM.Int32Ty};
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000514 llvm::FunctionType *FnTy =
515 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
517 break;
518 }
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000519 case OMPRTL_NVPTX__kmpc_kernel_deinit: {
520 // Build void __kmpc_kernel_deinit();
521 llvm::FunctionType *FnTy =
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000522 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
Arpith Chacko Jacob406acdb2017-01-05 15:24:05 +0000523 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
524 break;
525 }
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000526 case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
527 // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
528 // short RequiresOMPRuntime, short RequiresDataSharing);
529 llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
530 llvm::FunctionType *FnTy =
531 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
532 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
533 break;
534 }
535 case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
536 // Build void __kmpc_spmd_kernel_deinit();
537 llvm::FunctionType *FnTy =
538 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
539 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
540 break;
541 }
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000542 case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
543 /// Build void __kmpc_kernel_prepare_parallel(
544 /// void *outlined_function);
545 llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
546 llvm::FunctionType *FnTy =
547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
549 break;
550 }
551 case OMPRTL_NVPTX__kmpc_kernel_parallel: {
552 /// Build bool __kmpc_kernel_parallel(void **outlined_function);
553 llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
554 llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
555 llvm::FunctionType *FnTy =
556 llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
557 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
558 break;
559 }
560 case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
561 /// Build void __kmpc_kernel_end_parallel();
562 llvm::FunctionType *FnTy =
563 llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
564 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
565 break;
566 }
567 case OMPRTL_NVPTX__kmpc_serialized_parallel: {
568 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
569 // global_tid);
570 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
571 llvm::FunctionType *FnTy =
572 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
573 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
574 break;
575 }
576 case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
577 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
578 // global_tid);
579 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580 llvm::FunctionType *FnTy =
581 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
582 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
583 break;
584 }
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000585 }
586 return RTLFn;
587}
588
589void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
590 llvm::Constant *Addr,
Samuel Antaof83efdb2017-01-05 16:02:49 +0000591 uint64_t Size, int32_t) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000592 auto *F = dyn_cast<llvm::Function>(Addr);
593 // TODO: Add support for global variables on the device after declare target
594 // support.
595 if (!F)
596 return;
597 llvm::Module *M = F->getParent();
598 llvm::LLVMContext &Ctx = M->getContext();
599
600 // Get "nvvm.annotations" metadata node
601 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
602
603 llvm::Metadata *MDVals[] = {
604 llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
605 llvm::ConstantAsMetadata::get(
606 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
607 // Append metadata to nvvm.annotations
608 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
609}
610
611void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
612 const OMPExecutableDirective &D, StringRef ParentName,
613 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
Alexey Bataev14fa1c62016-03-29 05:34:15 +0000614 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000615 if (!IsOffloadEntry) // Nothing to do.
616 return;
617
618 assert(!ParentName.empty() && "Invalid target region parent name!");
619
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000620 CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
621 getExecutionModeForDirective(CGM, D);
622 switch (Mode) {
623 case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
624 emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
625 CodeGen);
626 break;
627 case CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd:
628 emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
629 CodeGen);
630 break;
631 case CGOpenMPRuntimeNVPTX::ExecutionMode::Unknown:
632 llvm_unreachable(
633 "Unknown programming model for OpenMP directive on NVPTX target.");
634 }
635
636 setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000637}
638
Samuel Antao45bfe4c2016-02-08 15:59:20 +0000639CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000640 : CGOpenMPRuntime(CGM), CurrentExecutionMode(ExecutionMode::Unknown) {
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000641 if (!CGM.getLangOpts().OpenMPIsDevice)
642 llvm_unreachable("OpenMP NVPTX can only handle device code.");
Arpith Chacko Jacob5c309e42016-03-22 01:48:56 +0000643}
Carlo Bertollic6872252016-04-04 15:55:02 +0000644
Arpith Chacko Jacob2cd6eea2017-01-25 16:55:10 +0000645void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
646 OpenMPProcBindClauseKind ProcBind,
647 SourceLocation Loc) {
648 // Do nothing in case of Spmd mode and L0 parallel.
649 // TODO: If in Spmd mode and L1 parallel emit the clause.
650 if (isInSpmdExecutionMode())
651 return;
652
653 CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
654}
655
Arpith Chacko Jacobe04da5d2017-01-25 01:18:34 +0000656void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
657 llvm::Value *NumThreads,
658 SourceLocation Loc) {
659 // Do nothing in case of Spmd mode and L0 parallel.
660 // TODO: If in Spmd mode and L1 parallel emit the clause.
661 if (isInSpmdExecutionMode())
662 return;
663
664 CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
665}
666
Carlo Bertollic6872252016-04-04 15:55:02 +0000667void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
668 const Expr *NumTeams,
669 const Expr *ThreadLimit,
670 SourceLocation Loc) {}
671
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000672llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
673 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
674 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
675 return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar,
676 InnermostKind, CodeGen);
677}
678
679llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
Carlo Bertollic6872252016-04-04 15:55:02 +0000680 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
681 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
682
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000683 llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
684 D, ThreadIDVar, InnermostKind, CodeGen);
685 llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
686 OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
687 OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
Carlo Bertollic6872252016-04-04 15:55:02 +0000688
689 return OutlinedFun;
690}
691
692void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
693 const OMPExecutableDirective &D,
694 SourceLocation Loc,
695 llvm::Value *OutlinedFn,
696 ArrayRef<llvm::Value *> CapturedVars) {
697 if (!CGF.HaveInsertPoint())
698 return;
699
700 Address ZeroAddr =
701 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
702 /*Name*/ ".zero.addr");
703 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
704 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
705 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
706 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
707 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
708 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
709}
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000710
711void CGOpenMPRuntimeNVPTX::emitParallelCall(
712 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
713 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
714 if (!CGF.HaveInsertPoint())
715 return;
716
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000717 if (isInSpmdExecutionMode())
718 emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
719 else
720 emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000721}
722
723void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
724 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
725 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
726 llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
727
Malcolm Parsonsc6e45832017-01-13 18:55:32 +0000728 auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) {
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000729 CGBuilderTy &Bld = CGF.Builder;
730
731 // Prepare for parallel region. Indicate the outlined function.
732 llvm::Value *Args[] = {Bld.CreateBitOrPointerCast(Fn, CGM.Int8PtrTy)};
733 CGF.EmitRuntimeCall(
734 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
735 Args);
736
737 // Activate workers. This barrier is used by the master to signal
738 // work for the workers.
739 syncCTAThreads(CGF);
740
741 // OpenMP [2.5, Parallel Construct, p.49]
742 // There is an implied barrier at the end of a parallel region. After the
743 // end of a parallel region, only the master thread of the team resumes
744 // execution of the enclosing task region.
745 //
746 // The master waits at this barrier until all workers are done.
747 syncCTAThreads(CGF);
748
749 // Remember for post-processing in worker loop.
750 Work.push_back(Fn);
751 };
752
753 auto *RTLoc = emitUpdateLocation(CGF, Loc);
754 auto *ThreadID = getThreadID(CGF, Loc);
755 llvm::Value *Args[] = {RTLoc, ThreadID};
756
757 auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF,
758 PrePostActionTy &) {
Malcolm Parsonsc6e45832017-01-13 18:55:32 +0000759 auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF,
760 PrePostActionTy &Action) {
Arpith Chacko Jacobbb36fe82017-01-10 15:42:51 +0000761 Action.Enter(CGF);
762
763 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
764 OutlinedFnArgs.push_back(
765 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
766 OutlinedFnArgs.push_back(
767 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
768 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
769 CGF.EmitCallOrInvoke(Fn, OutlinedFnArgs);
770 };
771
772 RegionCodeGenTy RCG(CodeGen);
773 NVPTXActionTy Action(
774 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
775 Args,
776 createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
777 Args);
778 RCG.setAction(Action);
779 RCG(CGF);
780 };
781
782 if (IfCond)
783 emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen);
784 else {
785 CodeGenFunction::RunCleanupsScope Scope(CGF);
786 RegionCodeGenTy ThenRCG(L0ParallelGen);
787 ThenRCG(CGF);
788 }
789}
Arpith Chacko Jacob44a87c92017-01-18 19:35:00 +0000790
791void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
792 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
793 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
794 // Just call the outlined function to execute the parallel region.
795 // OutlinedFn(&GTid, &zero, CapturedStruct);
796 //
797 // TODO: Do something with IfCond when support for the 'if' clause
798 // is added on Spmd target directives.
799 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
800 OutlinedFnArgs.push_back(
801 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
802 OutlinedFnArgs.push_back(
803 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
804 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
805 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
806}