blob: b160f17ccad5823f0e3b60afa955209154c1d54a [file] [log] [blame]
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This contains code to emit OpenMP nodes as LLVM code.
//
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
#include "TargetInfo.h"
using namespace clang;
using namespace CodeGen;
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
/// CodeGen(true);
/// } else {
/// CodeGen(false);
/// }
static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
const std::function<void(bool)> &CodeGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
bool CondConstant;
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
CodeGen(CondConstant);
return;
}
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
// Emit the 'then' code.
CGF.EmitBlock(ThenBlock);
CodeGen(/*ThenBlock*/ true);
CGF.EmitBranch(ContBlock);
// Emit the 'else' code if present.
{
// There is no need to emit line number for unconditional branch.
SuppressDebugLocation SDL(CGF.Builder);
CGF.EmitBlock(ElseBlock);
}
CodeGen(/*ThenBlock*/ false);
{
// There is no need to emit line number for unconditional branch.
SuppressDebugLocation SDL(CGF.Builder);
CGF.EmitBranch(ContBlock);
}
// Emit the continuation block for code after the if.
CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
}
void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
llvm::Value *PrivateAddr,
const Expr *AssignExpr,
QualType OriginalType,
const VarDecl *VDInit) {
EmitBlock(createBasicBlock(".omp.assign.begin."));
if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
// Perform simple memcpy.
EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
AssignExpr->getType());
} else {
// Perform element-by-element initialization.
QualType ElementTy;
auto SrcBegin = OriginalAddr.getAddress();
auto DestBegin = PrivateAddr;
auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
// The basic structure here is a do-while loop, because we don't
// need to check for the zero-element case.
auto BodyBB = createBasicBlock("omp.arraycpy.body");
auto DoneBB = createBasicBlock("omp.arraycpy.done");
auto IsEmpty =
Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
auto EntryBB = Builder.GetInsertBlock();
EmitBlock(BodyBB);
auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPast->addIncoming(SrcEnd, EntryBB);
auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
"omp.arraycpy.destElementPast");
DestElementPast->addIncoming(DestEnd, EntryBB);
// Shift the address back by one element.
auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
"omp.arraycpy.dest.element");
auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
"omp.arraycpy.src.element");
{
// Create RunCleanScope to cleanup possible temps.
CodeGenFunction::RunCleanupsScope Init(*this);
// Emit initialization for single element.
LocalDeclMap[VDInit] = SrcElement;
EmitAnyExprToMem(AssignExpr, DestElement,
AssignExpr->getType().getQualifiers(),
/*IsInitializer*/ false);
LocalDeclMap.erase(VDInit);
}
// Check whether we've reached the end.
auto Done =
Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
// Done.
EmitBlock(DoneBB, true);
}
EmitBlock(createBasicBlock(".omp.assign.end."));
}
void CodeGenFunction::EmitOMPFirstprivateClause(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
auto PrivateFilter = [](const OMPClause *C) -> bool {
return C->getClauseKind() == OMPC_firstprivate;
};
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
I(D.clauses(), PrivateFilter); I; ++I) {
auto *C = cast<OMPFirstprivateClause>(*I);
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered;
if (*InitsRef != nullptr) {
// Emit VarDecl with copy init for arrays.
auto *FD = CapturedStmtInfo->lookup(OrigVD);
LValue Base = MakeNaturalAlignAddrLValue(
CapturedStmtInfo->getContextValue(),
getContext().getTagDeclType(FD->getParent()));
auto OriginalAddr = EmitLValueForField(Base, FD);
auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
auto Emission = EmitAutoVarAlloca(*VD);
// Emit initialization of aggregate firstprivate vars.
EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
VD->getInit(), (*IRef)->getType(), VDInit);
EmitAutoVarCleanups(Emission);
return Emission.getAllocatedAddress();
});
} else
IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
++IRef, ++InitsRef;
}
}
}
void CodeGenFunction::EmitOMPPrivateClause(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
auto PrivateFilter = [](const OMPClause *C) -> bool {
return C->getClauseKind() == OMPC_private;
};
for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
I(D.clauses(), PrivateFilter); I; ++I) {
auto *C = cast<OMPPrivateClause>(*I);
auto IRef = C->varlist_begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
bool IsRegistered =
PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
++IRef;
}
}
}
/// \brief Emits code for OpenMP parallel directive in the parallel region.
static void EmitOMPParallelCall(CodeGenFunction &CGF,
const OMPParallelDirective &S,
llvm::Value *OutlinedFn,
llvm::Value *CapturedStruct) {
if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign*/ true);
CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause(
CGF, NumThreads, NumThreadsClause->getLocStart());
}
CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(),
OutlinedFn, CapturedStruct);
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin());
if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
auto Cond = cast<OMPIfClause>(C)->getCondition();
EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
if (ThenBlock)
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
else
CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(),
OutlinedFn, CapturedStruct);
});
} else
EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
}
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
bool SeparateIter) {
RunCleanupsScope BodyScope(*this);
// Update counters values on current iteration.
for (auto I : S.updates()) {
EmitIgnoredExpr(I);
}
// On a continue in the body, jump to the end.
auto Continue = getJumpDestInCurrentScope("omp.body.continue");
BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
// Emit loop body.
EmitStmt(S.getBody());
// The end (updates/cleanups).
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (SeparateIter) {
// TODO: Update lastprivates if the SeparateIter flag is true.
// This will be implemented in a follow-up OMPLastprivateClause patch, but
// result should be still correct without it, as we do not make these
// variables private yet.
}
}
void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
OMPPrivateScope &LoopScope,
bool SeparateIter) {
auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
auto Cnt = getPGORegionCounter(&S);
// Start the loop with a block that tests the condition.
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
LoopStack.push(CondBlock);
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
auto ExitBlock = LoopExit.getBlock();
if (LoopScope.requiresCleanups())
ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
auto LoopBody = createBasicBlock("omp.inner.for.body");
// Emit condition: "IV < LastIteration + 1 [ - 1]"
// ("- 1" when lastprivate clause is present - separate one iteration).
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
EmitBranchThroughCleanup(LoopExit);
}
EmitBlock(LoopBody);
Cnt.beginRegion(Builder);
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
EmitOMPLoopBody(S);
EmitStopPoint(&S);
// Emit "IV = IV + 1" and a back-edge to the condition block.
EmitBlock(Continue.getBlock());
EmitIgnoredExpr(S.getInc());
BreakContinueStack.pop_back();
EmitBranch(CondBlock);
LoopStack.pop();
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock());
}
void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
auto IC = S.counters().begin();
for (auto F : S.finals()) {
if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
EmitIgnoredExpr(F);
}
++IC;
}
}
static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
const OMPAlignedClause &Clause) {
unsigned ClauseAlignment = 0;
if (auto AlignmentExpr = Clause.getAlignment()) {
auto AlignmentCI =
cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
}
for (auto E : Clause.varlists()) {
unsigned Alignment = ClauseAlignment;
if (Alignment == 0) {
// OpenMP [2.8.1, Description]
// If no optional parameter is specified, implementation-defined default
// alignments for SIMD instructions on the target platforms are assumed.
Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
E->getType());
}
assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
CGF.EmitAlignmentAssumption(PtrValue, Alignment);
}
}
}
static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
CodeGenFunction::OMPPrivateScope &LoopScope,
ArrayRef<Expr *> Counters) {
for (auto *E : Counters) {
auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
// Emit var without initialization.
auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
CGF.EmitAutoVarCleanups(VarEmission);
return VarEmission.getAllocatedAddress();
});
assert(IsRegistered && "counter already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
}
(void)LoopScope.Privatize();
}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Pragma 'simd' code depends on presence of 'lastprivate'.
// If present, we have to separate last iteration of the loop:
//
// if (LastIteration != 0) {
// for (IV in 0..LastIteration-1) BODY;
// BODY with updates of lastprivate vars;
// <Final counter/linear vars updates>;
// }
//
// otherwise (when there's no lastprivate):
//
// for (IV in 0..LastIteration) BODY;
// <Final counter/linear vars updates>;
//
// Walk clauses and process safelen/lastprivate.
bool SeparateIter = false;
LoopStack.setParallel();
LoopStack.setVectorizerEnable(true);
for (auto C : S.clauses()) {
switch (C->getClauseKind()) {
case OMPC_safelen: {
RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
AggValueSlot::ignored(), true);
llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
LoopStack.setVectorizerWidth(Val->getZExtValue());
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
LoopStack.setParallel(false);
break;
}
case OMPC_aligned:
EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
break;
case OMPC_lastprivate:
SeparateIter = true;
break;
default:
// Not handled yet
;
}
}
RunCleanupsScope DirectiveScope(*this);
CGDebugInfo *DI = getDebugInfo();
if (DI)
DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
// Emit the loop iteration variable.
const Expr *IVExpr = S.getIterationVariable();
const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
EmitVarDecl(*IVDecl);
EmitIgnoredExpr(S.getInit());
// Emit the iterations count variable.
// If it is not a variable, Sema decided to calculate iterations count on each
// iteration (e.g., it is foldable into a constant).
if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
// Emit calculation of the iterations count.
EmitIgnoredExpr(S.getCalcLastIteration());
}
if (SeparateIter) {
// Emit: if (LastIteration > 0) - begin.
RegionCounter Cnt = getPGORegionCounter(&S);
auto ThenBlock = createBasicBlock("simd.if.then");
auto ContBlock = createBasicBlock("simd.if.end");
EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
EmitBlock(ThenBlock);
Cnt.beginRegion(Builder);
// Emit 'then' code.
{
OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
EmitOMPLoopBody(S, /* SeparateIter */ true);
}
EmitOMPSimdFinal(S);
// Emit: if (LastIteration != 0) - end.
EmitBranch(ContBlock);
EmitBlock(ContBlock, true);
} else {
{
OMPPrivateScope LoopScope(*this);
EmitPrivateLoopCounters(*this, LoopScope, S.counters());
EmitOMPInnerLoop(S, LoopScope);
}
EmitOMPSimdFinal(S);
}
if (DI)
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
}
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
}
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &) {
llvm_unreachable("CodeGen for 'omp single' is not supported yet.");
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &) {
llvm_unreachable("CodeGen for 'omp master' is not supported yet.");
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
// __kmpc_critical();
// <captured_body>
// __kmpc_end_critical();
//
auto Lock = CGM.getOpenMPRuntime().GetCriticalRegionLock(
S.getDirectiveName().getAsString());
CGM.getOpenMPRuntime().EmitOMPCriticalRegionStart(*this, Lock,
S.getLocStart());
{
RunCleanupsScope Scope(*this);
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
EnsureInsertPoint();
}
CGM.getOpenMPRuntime().EmitOMPCriticalRegionEnd(*this, Lock, S.getLocEnd());
}
void
CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
const OMPParallelForSimdDirective &) {
llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &) {
llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
}
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
}
void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
llvm_unreachable("CodeGen for 'omp taskyield' is not supported yet.");
}
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &) {
llvm_unreachable("CodeGen for 'omp barrier' is not supported yet.");
}
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
CGM.getOpenMPRuntime().EmitOMPFlush(
*this, [&]() -> ArrayRef<const Expr *> {
if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
auto FlushClause = cast<OMPFlushClause>(C);
return llvm::makeArrayRef(FlushClause->varlist_begin(),
FlushClause->varlist_end());
}
return llvm::None;
}(),
S.getLocStart());
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
}
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
}
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
}
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
}