Introduce a hybrid target to generate code for either the GPU or CPU
Summary:
Introduce a "hybrid" `-polly-target` option to optimise code for either the GPU or CPU.
When this target is selected, PPCGCodeGeneration will attempt first to optimise a Scop. If the Scop isn't modified, it is then sent to the passes that form the CPU pipeline, i.e. IslScheduleOptimizerPass, IslAstInfoWrapperPass and CodeGeneration.
In case the Scop is modified, it is marked to be skipped by the subsequent CPU optimisation passes.
Reviewers: grosser, Meinersbur, bollu
Reviewed By: grosser
Subscribers: kbarton, nemanjai, pollydev
Tags: #polly
Differential Revision: https://reviews.llvm.org/D34054
llvm-svn: 306863
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 431a9c3..cafc377 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1633,6 +1633,9 @@
/// Number of copy statements.
unsigned CopyStmtsNum;
+ /// Flag to indicate if the Scop is to be skipped.
+ bool SkipScop;
+
typedef std::list<ScopStmt> StmtSet;
/// The statements in this Scop.
StmtSet Stmts;
@@ -2366,6 +2369,12 @@
/// Check if the SCoP has been optimized by the scheduler.
bool isOptimized() const { return IsOptimized; }
+ /// Mark the SCoP to be skipped by ScopPass passes.
+ void markAsToBeSkipped() { SkipScop = true; }
+
+ /// Check if the SCoP is to be skipped by ScopPass passes.
+ bool isToBeSkipped() const { return SkipScop; }
+
/// Get the name of the entry and exit blocks of this Scop.
///
/// These along with the function name can uniquely identify a Scop.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index fffd344..116445b 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3497,8 +3497,8 @@
Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
ScopDetection::DetectionContext &DC)
: SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false),
- HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false),
- MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
+ SkipScop(false), HasSingleExitEdge(R.getExitingBlock()),
+ HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
Schedule(nullptr) {
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp
index 0e80058..8617e1c 100644
--- a/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/polly/lib/CodeGen/CodeGeneration.cpp
@@ -278,6 +278,10 @@
/// Generate LLVM-IR for the SCoP @p S.
bool runOnScop(Scop &S) override {
+ // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
+ if (S.isToBeSkipped())
+ return false;
+
AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index 1df582f..5b04a9e 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -624,6 +624,11 @@
void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); }
bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) {
+
+ // Skip SCoPs in case they're already handled by PPCGCodeGeneration.
+ if (Scop.isToBeSkipped())
+ return false;
+
const Dependences &D =
getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 199d741..eb46d45 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -2840,8 +2840,10 @@
auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
- if (PPCGGen->tree)
+ if (PPCGGen->tree) {
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
+ CurrentScop.markAsToBeSkipped();
+ }
freeOptions(PPCGScop);
freePPCGGen(PPCGGen);
diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp
index 6fb8907..d25848f 100644
--- a/polly/lib/Support/RegisterPasses.cpp
+++ b/polly/lib/Support/RegisterPasses.cpp
@@ -92,13 +92,15 @@
clEnumValN(CODEGEN_NONE, "none", "No code generation")),
cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
-enum TargetChoice { TARGET_CPU, TARGET_GPU };
+enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID };
static cl::opt<TargetChoice>
Target("polly-target", cl::desc("The hardware to target"),
cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
#ifdef GPU_CODEGEN
,
- clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
+ clEnumValN(TARGET_GPU, "gpu", "generate GPU code"),
+ clEnumValN(TARGET_HYBRID, "hybrid",
+ "generate GPU code (preferably) or CPU code")
#endif
),
cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
@@ -314,9 +316,12 @@
if (EnablePruneUnprofitable)
PM.add(polly::createPruneUnprofitablePass());
- if (Target == TARGET_GPU) {
- // GPU generation provides its own scheduling optimization strategy.
- } else {
+#ifdef GPU_CODEGEN
+ if (Target == TARGET_HYBRID)
+ PM.add(
+ polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
+#endif
+ if (Target == TARGET_CPU || Target == TARGET_HYBRID)
switch (Optimizer) {
case OPTIMIZER_NONE:
break; /* Do nothing */
@@ -325,17 +330,11 @@
PM.add(polly::createIslScheduleOptimizerPass());
break;
}
- }
if (ExportJScop)
PM.add(polly::createJSONExporterPass());
- if (Target == TARGET_GPU) {
-#ifdef GPU_CODEGEN
- PM.add(
- polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
-#endif
- } else {
+ if (Target == TARGET_CPU || Target == TARGET_HYBRID)
switch (CodeGeneration) {
case CODEGEN_AST:
PM.add(polly::createIslAstInfoWrapperPassPass());
@@ -346,7 +345,11 @@
case CODEGEN_NONE:
break;
}
- }
+#ifdef GPU_CODEGEN
+ else
+ PM.add(
+ polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
+#endif
// FIXME: This dummy ModulePass keeps some programs from miscompiling,
// probably some not correctly preserved analyses. It acts as a barrier to
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index a7b174a..3982ed4 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -1443,6 +1443,10 @@
bool IslScheduleOptimizer::runOnScop(Scop &S) {
+ // Skip SCoPs in case they're already optimised by PPCGCodeGeneration
+ if (S.isToBeSkipped())
+ return false;
+
// Skip empty SCoPs but still allow code generation as it will delete the
// loops present but not needed.
if (S.getSize() == 0) {