Allow to annotate alias scopes in the new SCoP.
The command line flag -polly-annotate-alias-scopes controls whether or not
Polly annotates alias scopes in the new SCoP (default ON). This can improve
later optimizations as the new SCoP is basically an alias free environment for
them.
llvm-svn: 218877
diff --git a/polly/include/polly/CodeGen/CodeGeneration.h b/polly/include/polly/CodeGen/CodeGeneration.h
index f8273c3..1627987 100644
--- a/polly/include/polly/CodeGen/CodeGeneration.h
+++ b/polly/include/polly/CodeGen/CodeGeneration.h
@@ -36,6 +36,9 @@
};
extern CodeGenChoice PollyCodeGenChoice;
+/// @brief Flag to turn on/off annotation of alias scopes.
+extern bool PollyAnnotateAliasScopes;
+
static inline int getNumberOfIterations(__isl_take isl_set *Domain) {
int Dim = isl_set_dim(Domain, isl_dim_set);
diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index 463f393..3c9218f 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -18,14 +18,34 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/Analysis/LoopInfo.h"
-namespace polly {
+namespace llvm {
+class ScalarEvolution;
+}
-/// @brief Helper class to annotate newly generated loops with metadata.
+namespace polly {
+class Scop;
+
+/// @brief Helper class to annotate newly generated SCoPs with metadata.
///
-/// This stack-like structure will keep track of all loops, and annotate
-/// memory instructions and loop headers according to all parallel loops.
+/// The annotations are twofold:
+/// 1) Loops are stored in a stack-like structure in the order they are
+/// constructed and the LoopID metadata node is added to the backedge.
+/// Contained memory instructions and loop headers are annotated according
+/// to all parallel surrounding loops.
+/// 2) The new SCoP is assumed alias free (either due to the result of
+/// AliasAnalysis queries or runtime alias checks). We annotate therefore
+/// all memory instruction with alias scopes to indicate that fact to
+/// later optimizations.
+/// These alias scopes live in a new alias domain only used in this SCoP.
+/// Each base pointer has its own alias scope and is annotated to not
+/// alias with any access to different base pointers.
class LoopAnnotator {
public:
+ LoopAnnotator();
+
+ /// @brief Build all alias scopes for the given SCoP.
+ void buildAliasScopes(Scop &S);
+
/// @brief Add a new loop @p L which is parallel if @p IsParallel is true.
void pushLoop(llvm::Loop *L, bool IsParallel);
@@ -40,11 +60,23 @@
bool IsParallel) const;
private:
+ /// @brief The ScalarEvolution analysis we use to find base pointers.
+ llvm::ScalarEvolution *SE;
+
/// @brief All loops currently under construction.
llvm::SmallVector<llvm::Loop *, 8> ActiveLoops;
/// @brief Metadata pointing to parallel loops currently under construction.
llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops;
+
+ /// @brief The alias scope domain for the current SCoP.
+ llvm::MDNode *AliasScopeDomain;
+
+ /// @brief A map from base pointers to its alias scope.
+ llvm::DenseMap<llvm::Value *, llvm::MDNode *> AliasScopeMap;
+
+ /// @brief A map from base pointers to an alias scope list of other pointers.
+ llvm::DenseMap<llvm::Value *, llvm::MDNode *> OtherAliasScopeListMap;
};
/// @brief Add Polly specifics when running IRBuilder.
diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 2fc8419..d01dca0 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -14,35 +14,83 @@
#include "polly/CodeGen/IRBuilder.h"
+#include "polly/ScopInfo.h"
+#include "polly/Support/ScopHelper.h"
+
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
using namespace polly;
-/// @brief Get the loop id metadata node.
+/// @brief Get a self referencing id metadata node.
///
-/// Each loop is identified by a self referencing metadata node of the form:
+/// The MDNode looks like this (if arg0/arg1 are not null):
///
-/// '!n = metadata !{metadata !n}'
+/// '!n = metadata !{metadata !n, arg0, arg1}'
///
-/// This functions creates such metadata on demand if not yet available.
-///
-/// @return The loop id metadata node.
-static MDNode *getLoopID(Loop *L) {
- Value *Args[] = {0};
- MDNode *LoopID = MDNode::get(L->getHeader()->getContext(), Args);
- LoopID->replaceOperandWith(0, LoopID);
- return LoopID;
+/// @return The self referencing id metadata node.
+static MDNode *getID(LLVMContext &Ctx, Value *arg0 = nullptr,
+ Value *arg1 = nullptr) {
+ MDNode *ID;
+ SmallVector<Value *, 3> Args;
+ Args.push_back(nullptr);
+
+ if (arg0)
+ Args.push_back(arg0);
+ if (arg1)
+ Args.push_back(arg1);
+
+ ID = MDNode::get(Ctx, Args);
+ ID->replaceOperandWith(0, ID);
+ return ID;
+}
+
+LoopAnnotator::LoopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {}
+
+void LoopAnnotator::buildAliasScopes(Scop &S) {
+ SE = S.getSE();
+
+ LLVMContext &Ctx = SE->getContext();
+ AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain"));
+
+ AliasScopeMap.clear();
+ OtherAliasScopeListMap.clear();
+
+ SetVector<Value *> BasePtrs;
+ for (ScopStmt *Stmt : S)
+ for (MemoryAccess *MA : *Stmt)
+ BasePtrs.insert(MA->getBaseAddr());
+
+ std::string AliasScopeStr = "polly.alias.scope.";
+ for (Value *BasePtr : BasePtrs)
+ AliasScopeMap[BasePtr] = getID(
+ Ctx, AliasScopeDomain,
+ MDString::get(Ctx, (AliasScopeStr + BasePtr->getName()).str().c_str()));
+
+ for (Value *BasePtr : BasePtrs) {
+ MDNode *AliasScopeList = MDNode::get(Ctx, {});
+ for (const auto &AliasScopePair : AliasScopeMap) {
+ if (BasePtr == AliasScopePair.first)
+ continue;
+
+ Value *Args = {AliasScopePair.second};
+ AliasScopeList =
+ MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args));
+ }
+
+ OtherAliasScopeListMap[BasePtr] = AliasScopeList;
+ }
}
void polly::LoopAnnotator::pushLoop(Loop *L, bool IsParallel) {
+
ActiveLoops.push_back(L);
if (!IsParallel)
return;
BasicBlock *Header = L->getHeader();
- MDNode *Id = getLoopID(L);
+ MDNode *Id = getID(Header->getContext());
Value *Args[] = {Id};
MDNode *Ids = ParallelLoops.empty()
? MDNode::get(Header->getContext(), Args)
@@ -71,7 +119,26 @@
}
void polly::LoopAnnotator::annotate(Instruction *Inst) {
- if (!Inst->mayReadOrWriteMemory() || ParallelLoops.empty())
+ if (!Inst->mayReadOrWriteMemory())
+ return;
+
+ // TODO: Use the ScopArrayInfo once available here.
+ if (AliasScopeDomain) {
+ Value *BasePtr = nullptr;
+ if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst)) {
+ const SCEV *PtrSCEV = SE->getSCEV(getPointerOperand(*Inst));
+ const SCEV *BaseSCEV = SE->getPointerBase(PtrSCEV);
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(BaseSCEV))
+ BasePtr = SU->getValue();
+ }
+
+ if (BasePtr) {
+ Inst->setMetadata("alias.scope", AliasScopeMap[BasePtr]);
+ Inst->setMetadata("noalias", OtherAliasScopeListMap[BasePtr]);
+ }
+ }
+
+ if (ParallelLoops.empty())
return;
Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp
index f5599ee..68ed943 100644
--- a/polly/lib/CodeGen/IslCodeGeneration.cpp
+++ b/polly/lib/CodeGen/IslCodeGeneration.cpp
@@ -605,6 +605,10 @@
assert(!S.getRegion().isTopLevelRegion() &&
"Top level regions are not supported");
+ // Build the alias scopes for annotations first.
+ if (PollyAnnotateAliasScopes)
+ Annotator.buildAliasScopes(S);
+
BasicBlock *EnteringBB = simplifyRegion(&S, this);
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp
index 7bc5973..d556885 100644
--- a/polly/lib/Support/RegisterPasses.cpp
+++ b/polly/lib/Support/RegisterPasses.cpp
@@ -141,6 +141,13 @@
cl::desc("Show the Polly CFG right after code generation"),
cl::Hidden, cl::init(false), cl::cat(PollyCategory));
+bool polly::PollyAnnotateAliasScopes;
+static cl::opt<bool, true> XPollyAnnotateAliasScopes(
+ "polly-annotate-alias-scopes",
+ cl::desc("Annotate memory instructions with alias scopes"),
+ cl::location(PollyAnnotateAliasScopes), cl::init(true), cl::ZeroOrMore,
+ cl::cat(PollyCategory));
+
namespace polly {
void initializePollyPasses(PassRegistry &Registry) {
#ifdef CLOOG_FOUND
diff --git a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
index e6b0e73..90a654b 100644
--- a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
+++ b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
@@ -3,16 +3,16 @@
; Check that we mark multiple parallel loops correctly including the memory instructions.
;
; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022
-; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !0
+; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]]
;
; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510
-; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !2
+; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
;
-; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, !llvm.mem.parallel_loop_access !1
+; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4
;
-; CHECK: !0 = metadata !{metadata !0}
-; CHECK: !1 = metadata !{metadata !0, metadata !2}
-; CHECK: !2 = metadata !{metadata !2}
+; CHECK-DAG: ![[IDOuter]] = metadata !{metadata ![[IDOuter]]}
+; CHECK-DAG: ![[IDInner]] = metadata !{metadata ![[IDInner]]}
+; CHECK-DAG: !4 = metadata !{metadata ![[IDOuter]], metadata ![[IDInner]]}
;
; void jd(int *A) {
; for (int i = 0; i < 1024; i++)
diff --git a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
index 4ad9158..d1b519c 100644
--- a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
+++ b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
@@ -38,17 +38,17 @@
; SEQUENTIAL: @test-one
; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
-; SEQUENTIAL-NOT: !llvm.loop !0
+; SEQUENTIAL-NOT: !llvm.loop
; SEQUENTIAL-SCEV: @test-one
; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access
; SEQUENTIAL-SCEV-NOT: !llvm.loop
; PARALLEL: @test-one
-; PARALLEL: store i32 1, i32* %p_scevgep, !llvm.mem.parallel_loop_access !0
-; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0
+; PARALLEL: store i32 1, i32* %p_scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
+; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
; PARALLEL-SCEV: @test-one
-; PARALLEL-SCEV: store i32 1, i32* %scevgep1, !llvm.mem.parallel_loop_access !0
-; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0
+; PARALLEL-SCEV: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
+; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
; This loop has memory dependences that require at least a simple dependence
; analysis to detect the parallelism.
@@ -87,16 +87,16 @@
; SEQUENTIAL: @test-two
; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
-; SEQUENTIAL-NOT: !llvm.loop !0
+; SEQUENTIAL-NOT: !llvm.loop
; SEQUENTIAL-SCEV: @test-two
; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access
; SEQUENTIAL-SCEV-NOT: !llvm.loop
; PARALLEL: @test-two
-; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, !llvm.mem.parallel_loop_access !1
-; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, !llvm.mem.parallel_loop_access !1
-; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1
+; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
+; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]]
+; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
; PARALLEL-SCEV: @test-two
-; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, !llvm.mem.parallel_loop_access !1
-; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, !llvm.mem.parallel_loop_access !1
-; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1
+; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
+; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]]
+; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
diff --git a/polly/test/Isl/CodeGen/annotated_alias_scopes.ll b/polly/test/Isl/CodeGen/annotated_alias_scopes.ll
new file mode 100644
index 0000000..05a8ee9
--- /dev/null
+++ b/polly/test/Isl/CodeGen/annotated_alias_scopes.ll
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -S < %s | FileCheck %s --check-prefix=SCOPES
+; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -polly-annotate-alias-scopes=false -S < %s | FileCheck %s --check-prefix=NOSCOPES
+;
+; Check that we create alias scopes that indicate the accesses to A, B and C cannot alias in any way.
+;
+; SCOPES: %[[BIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %B, i64 %polly.indvar
+; SCOPES: load i32* %[[BIdx]], !alias.scope ![[AliasScopeB:[0-9]*]], !noalias ![[NoAliasB:[0-9]*]]
+; SCOPES: %[[CIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds float* %C, i64 %polly.indvar
+; SCOPES: load float* %[[CIdx]], !alias.scope ![[AliasScopeC:[0-9]*]], !noalias ![[NoAliasC:[0-9]*]]
+; SCOPES: %[[AIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %A, i64 %polly.indvar
+; SCOPES: store i32 %{{[._a-zA-Z0-9]*}}, i32* %[[AIdx]], !alias.scope ![[AliasScopeA:[0-9]*]], !noalias ![[NoAliasA:[0-9]*]]
+;
+; SCOPES: ![[AliasScopeB]] = metadata !{metadata ![[AliasScopeB]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.B"}
+; SCOPES: ![[NoAliasB]] = metadata !{
+; SCOPES-DAG: metadata ![[AliasScopeA]]
+; SCOPES-DAG: metadata ![[AliasScopeC]]
+; SCOPES: }
+; SCOPES-DAG: ![[AliasScopeA]] = metadata !{metadata ![[AliasScopeA]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.A"}
+; SCOPES-DAG: ![[AliasScopeC]] = metadata !{metadata ![[AliasScopeC]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.C"}
+; SCOPES: ![[NoAliasC]] = metadata !{
+; SCOPES-DAG: metadata ![[AliasScopeA]]
+; SCOPES-DAG: metadata ![[AliasScopeB]]
+; SCOPES: }
+; SCOPES: ![[NoAliasA]] = metadata !{
+; SCOPES-DAG: metadata ![[AliasScopeB]]
+; SCOPES-DAG: metadata ![[AliasScopeC]]
+; SCOPES: }
+;
+; NOSCOPES: %[[BIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %B, i64 %polly.indvar
+; NOSCOPES: load i32* %[[BIdx]]
+; NOSCOPES-NOT: alias.scope
+; NOSCOPES-NOT: noalias
+; NOSCOPES: %[[CIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds float* %C, i64 %polly.indvar
+; NOSCOPES: load float* %[[CIdx]]
+; NOSCOPES-NOT: alias.scope
+; NOSCOPES-NOT: noalias
+; NOSCOPES: %[[AIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %A, i64 %polly.indvar
+; NOSCOPES: store i32 %{{[._a-zA-Z0-9]*}}, i32* %[[AIdx]]
+; NOSCOPES-NOT: alias.scope
+; NOSCOPES-NOT: noalias
+;
+; NOSCOPES-NOT: metadata
+;
+; void jd(int *A, int *B, float *C) {
+; for (int i = 0; i < 1024; i++)
+; A[i] = B[i] + C[i];
+; }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32* %A, i32* %B, float* %C) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %exitcond = icmp ne i64 %indvars.iv, 1024
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %tmp = load i32* %arrayidx, align 4
+ %conv = sitofp i32 %tmp to float
+ %arrayidx2 = getelementptr inbounds float* %C, i64 %indvars.iv
+ %tmp1 = load float* %arrayidx2, align 4
+ %add = fadd fast float %conv, %tmp1
+ %conv3 = fptosi float %add to i32
+ %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ store i32 %conv3, i32* %arrayidx5, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}