blob: 03bf5cd11dcc326f8371e3bc0a289761a1096b1c [file] [log] [blame]
Tobias Grosser30aa24c2011-05-14 19:02:06 +00001//===- Schedule.cpp - Calculate an optimized schedule ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Tobias Grosser234a4822015-08-15 09:34:33 +000010// This pass generates an entirey new schedule tree from the data dependences
11// and iteration domains. The new schedule tree is computed in two steps:
Tobias Grosser30aa24c2011-05-14 19:02:06 +000012//
Tobias Grosser234a4822015-08-15 09:34:33 +000013// 1) The isl scheduling optimizer is run
14//
15// The isl scheduling optimizer creates a new schedule tree that maximizes
16// parallelism and tileability and minimizes data-dependence distances. The
17// algorithm used is a modified version of the ``Pluto'' algorithm:
18//
19// U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan.
20// A Practical Automatic Polyhedral Parallelizer and Locality Optimizer.
21// In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language
22// Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008.
23//
24// 2) A set of post-scheduling transformations is applied on the schedule tree.
25//
26// These optimizations include:
27//
28// - Tiling of the innermost tilable bands
29// - Prevectorization - The coice of a possible outer loop that is strip-mined
30// to the innermost level to enable inner-loop
31// vectorization.
32// - Some optimizations for spatial locality are also planned.
33//
34// For a detailed description of the schedule tree itself please see section 6
35// of:
36//
37// Polyhedral AST generation is more than scanning polyhedra
38// Tobias Grosser, Sven Verdoolaege, Albert Cohen
39// ACM Transations on Programming Languages and Systems (TOPLAS),
40// 37(4), July 2015
41// http://www.grosser.es/#pub-polyhedral-AST-generation
42//
43// This publication also contains a detailed discussion of the different options
44// for polyhedral loop unrolling, full/partial tile separation and other uses
45// of the schedule tree.
46//
Tobias Grosser30aa24c2011-05-14 19:02:06 +000047//===----------------------------------------------------------------------===//
48
Tobias Grosser967239c2011-10-23 20:59:44 +000049#include "polly/ScheduleOptimizer.h"
Tobias Grosserba0d0922015-05-09 09:13:42 +000050#include "polly/CodeGen/CodeGeneration.h"
51#include "polly/DependenceInfo.h"
52#include "polly/LinkAllPasses.h"
53#include "polly/Options.h"
54#include "polly/ScopInfo.h"
55#include "polly/Support/GICHelper.h"
Roman Gareev42402c92016-06-22 09:52:37 +000056#include "llvm/Analysis/TargetTransformInfo.h"
Tobias Grosserba0d0922015-05-09 09:13:42 +000057#include "llvm/Support/Debug.h"
Tobias Grosser2493e922011-12-07 07:42:57 +000058#include "isl/aff.h"
Tobias Grosserde68cc92011-06-30 20:01:02 +000059#include "isl/band.h"
Tobias Grosser8ad6bc32012-01-31 13:26:29 +000060#include "isl/constraint.h"
61#include "isl/map.h"
Tobias Grosser42152ff2012-01-30 19:38:47 +000062#include "isl/options.h"
Tobias Grosser97d87452015-05-30 06:46:59 +000063#include "isl/printer.h"
Tobias Grosser8ad6bc32012-01-31 13:26:29 +000064#include "isl/schedule.h"
Tobias Grosserbbb4cec2015-03-22 12:06:39 +000065#include "isl/schedule_node.h"
Tobias Grosser8ad6bc32012-01-31 13:26:29 +000066#include "isl/space.h"
Tobias Grossercd524dc2015-05-09 09:36:38 +000067#include "isl/union_map.h"
68#include "isl/union_set.h"
Tobias Grosser30aa24c2011-05-14 19:02:06 +000069
70using namespace llvm;
71using namespace polly;
72
Chandler Carruth95fef942014-04-22 03:30:19 +000073#define DEBUG_TYPE "polly-opt-isl"
74
Tobias Grossera26db472012-01-30 19:38:43 +000075static cl::opt<std::string>
Tobias Grosser483a90d2014-07-09 10:50:10 +000076 OptimizeDeps("polly-opt-optimize-only",
77 cl::desc("Only a certain kind of dependences (all/raw)"),
78 cl::Hidden, cl::init("all"), cl::ZeroOrMore,
79 cl::cat(PollyCategory));
Tobias Grosser1deda292012-02-14 14:02:48 +000080
81static cl::opt<std::string>
Tobias Grosser483a90d2014-07-09 10:50:10 +000082 SimplifyDeps("polly-opt-simplify-deps",
83 cl::desc("Dependences should be simplified (yes/no)"),
84 cl::Hidden, cl::init("yes"), cl::ZeroOrMore,
85 cl::cat(PollyCategory));
Tobias Grossera26db472012-01-30 19:38:43 +000086
Tobias Grosser483a90d2014-07-09 10:50:10 +000087static cl::opt<int> MaxConstantTerm(
88 "polly-opt-max-constant-term",
89 cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
90 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
Tobias Grosser992e60c2012-02-20 08:41:15 +000091
Tobias Grosser483a90d2014-07-09 10:50:10 +000092static cl::opt<int> MaxCoefficient(
93 "polly-opt-max-coefficient",
94 cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
95 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
96
97static cl::opt<std::string> FusionStrategy(
98 "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"),
99 cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory));
Tobias Grosser92f54802012-02-20 08:41:47 +0000100
Tobias Grossere602a072013-05-07 07:30:56 +0000101static cl::opt<std::string>
Tobias Grosser483a90d2014-07-09 10:50:10 +0000102 MaximizeBandDepth("polly-opt-maximize-bands",
103 cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
104 cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
Tobias Grosserb3ad85b2012-01-30 19:38:50 +0000105
Michael Kruse315aa322016-05-02 11:35:27 +0000106static cl::opt<std::string> OuterCoincidence(
107 "polly-opt-outer-coincidence",
108 cl::desc("Try to construct schedules where the outer member of each band "
109 "satisfies the coincidence constraints (yes/no)"),
110 cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory));
111
Tobias Grosser07c1c2f2015-08-19 08:46:11 +0000112static cl::opt<int> PrevectorWidth(
113 "polly-prevect-width",
114 cl::desc(
115 "The number of loop iterations to strip-mine for pre-vectorization"),
116 cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
117
Tobias Grosser04832712015-08-20 13:45:02 +0000118static cl::opt<bool> FirstLevelTiling("polly-tiling",
119 cl::desc("Enable loop tiling"),
120 cl::init(true), cl::ZeroOrMore,
121 cl::cat(PollyCategory));
122
Roman Gareev42402c92016-06-22 09:52:37 +0000123static cl::opt<int> LatencyVectorFma(
124 "polly-target-latency-vector-fma",
125 cl::desc("The minimal number of cycles between issuing two "
126 "dependent consecutive vector fused multiply-add "
127 "instructions."),
128 cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
129
130static cl::opt<int> ThrougputVectorFma(
131 "polly-target-througput-vector-fma",
132 cl::desc("A throughput of the processor floating-point arithmetic units "
133 "expressed in the number of vector fused multiply-add "
134 "instructions per clock cycle."),
135 cl::Hidden, cl::init(1), cl::ZeroOrMore, cl::cat(PollyCategory));
136
Tobias Grosser04832712015-08-20 13:45:02 +0000137static cl::opt<int> FirstLevelDefaultTileSize(
Tobias Grosser483a90d2014-07-09 10:50:10 +0000138 "polly-default-tile-size",
139 cl::desc("The default tile size (if not enough were provided by"
140 " --polly-tile-sizes)"),
141 cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory));
Johannes Doerfertc3958b22014-05-28 17:21:02 +0000142
Tobias Grosser04832712015-08-20 13:45:02 +0000143static cl::list<int> FirstLevelTileSizes(
144 "polly-tile-sizes", cl::desc("A tile size for each loop dimension, filled "
145 "with --polly-default-tile-size"),
146 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
147
148static cl::opt<bool>
149 SecondLevelTiling("polly-2nd-level-tiling",
150 cl::desc("Enable a 2nd level loop of loop tiling"),
151 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
152
153static cl::opt<int> SecondLevelDefaultTileSize(
154 "polly-2nd-level-default-tile-size",
155 cl::desc("The default 2nd-level tile size (if not enough were provided by"
156 " --polly-2nd-level-tile-sizes)"),
157 cl::Hidden, cl::init(16), cl::ZeroOrMore, cl::cat(PollyCategory));
158
159static cl::list<int>
160 SecondLevelTileSizes("polly-2nd-level-tile-sizes",
161 cl::desc("A tile size for each loop dimension, filled "
162 "with --polly-default-tile-size"),
163 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
164 cl::cat(PollyCategory));
165
Tobias Grosser42e24892015-08-20 13:45:05 +0000166static cl::opt<bool> RegisterTiling("polly-register-tiling",
167 cl::desc("Enable register tiling"),
168 cl::init(false), cl::ZeroOrMore,
169 cl::cat(PollyCategory));
170
171static cl::opt<int> RegisterDefaultTileSize(
172 "polly-register-tiling-default-tile-size",
173 cl::desc("The default register tile size (if not enough were provided by"
174 " --polly-register-tile-sizes)"),
175 cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
176
177static cl::list<int>
178 RegisterTileSizes("polly-register-tile-sizes",
179 cl::desc("A tile size for each loop dimension, filled "
180 "with --polly-register-tile-size"),
181 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
182 cl::cat(PollyCategory));
183
Roman Gareev9c3eb592016-05-28 16:17:58 +0000184static cl::opt<bool>
185 PMBasedOpts("polly-pattern-matching-based-opts",
186 cl::desc("Perform optimizations based on pattern matching"),
187 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
188
Tobias Grosserca7f5bb2015-10-20 09:12:21 +0000189/// @brief Create an isl_union_set, which describes the isolate option based
190/// on IsoalteDomain.
191///
192/// @param IsolateDomain An isl_set whose last dimension is the only one that
193/// should belong to the current band node.
194static __isl_give isl_union_set *
195getIsolateOptions(__isl_take isl_set *IsolateDomain) {
196 auto Dims = isl_set_dim(IsolateDomain, isl_dim_set);
197 auto *IsolateRelation = isl_map_from_domain(IsolateDomain);
198 IsolateRelation = isl_map_move_dims(IsolateRelation, isl_dim_out, 0,
199 isl_dim_in, Dims - 1, 1);
200 auto *IsolateOption = isl_map_wrap(IsolateRelation);
201 auto *Id = isl_id_alloc(isl_set_get_ctx(IsolateOption), "isolate", NULL);
202 return isl_union_set_from_set(isl_set_set_tuple_id(IsolateOption, Id));
203}
204
205/// @brief Create an isl_union_set, which describes the atomic option for the
206/// dimension of the current node.
207///
208/// It may help to reduce the size of generated code.
209///
210/// @param Ctx An isl_ctx, which is used to create the isl_union_set.
211static __isl_give isl_union_set *getAtomicOptions(__isl_take isl_ctx *Ctx) {
212 auto *Space = isl_space_set_alloc(Ctx, 0, 1);
213 auto *AtomicOption = isl_set_universe(Space);
214 auto *Id = isl_id_alloc(Ctx, "atomic", NULL);
215 return isl_union_set_from_set(isl_set_set_tuple_id(AtomicOption, Id));
216}
217
218/// @brief Make the last dimension of Set to take values
219/// from 0 to VectorWidth - 1.
220///
221/// @param Set A set, which should be modified.
222/// @param VectorWidth A parameter, which determines the constraint.
223static __isl_give isl_set *addExtentConstraints(__isl_take isl_set *Set,
224 int VectorWidth) {
225 auto Dims = isl_set_dim(Set, isl_dim_set);
226 auto Space = isl_set_get_space(Set);
227 auto *LocalSpace = isl_local_space_from_space(Space);
228 auto *ExtConstr =
229 isl_constraint_alloc_inequality(isl_local_space_copy(LocalSpace));
230 ExtConstr = isl_constraint_set_constant_si(ExtConstr, 0);
231 ExtConstr =
232 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, 1);
233 Set = isl_set_add_constraint(Set, ExtConstr);
234 ExtConstr = isl_constraint_alloc_inequality(LocalSpace);
235 ExtConstr = isl_constraint_set_constant_si(ExtConstr, VectorWidth - 1);
236 ExtConstr =
237 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, -1);
238 return isl_set_add_constraint(Set, ExtConstr);
239}
240
241/// @brief Build the desired set of partial tile prefixes.
242///
243/// We build a set of partial tile prefixes, which are prefixes of the vector
244/// loop that have exactly VectorWidth iterations.
245///
246/// 1. Get all prefixes of the vector loop.
247/// 2. Extend it to a set, which has exactly VectorWidth iterations for
248/// any prefix from the set that was built on the previous step.
249/// 3. Subtract loop domain from it, project out the vector loop dimension and
Roman Gareev76614d32016-05-31 11:22:21 +0000250/// get a set of prefixes, which don't have exactly VectorWidth iterations.
Tobias Grosserca7f5bb2015-10-20 09:12:21 +0000251/// 4. Subtract it from all prefixes of the vector loop and get the desired
252/// set.
253///
254/// @param ScheduleRange A range of a map, which describes a prefix schedule
255/// relation.
256static __isl_give isl_set *
257getPartialTilePrefixes(__isl_take isl_set *ScheduleRange, int VectorWidth) {
258 auto Dims = isl_set_dim(ScheduleRange, isl_dim_set);
259 auto *LoopPrefixes = isl_set_project_out(isl_set_copy(ScheduleRange),
260 isl_dim_set, Dims - 1, 1);
261 auto *ExtentPrefixes =
262 isl_set_add_dims(isl_set_copy(LoopPrefixes), isl_dim_set, 1);
263 ExtentPrefixes = addExtentConstraints(ExtentPrefixes, VectorWidth);
264 auto *BadPrefixes = isl_set_subtract(ExtentPrefixes, ScheduleRange);
265 BadPrefixes = isl_set_project_out(BadPrefixes, isl_dim_set, Dims - 1, 1);
266 return isl_set_subtract(LoopPrefixes, BadPrefixes);
267}
268
269__isl_give isl_schedule_node *ScheduleTreeOptimizer::isolateFullPartialTiles(
270 __isl_take isl_schedule_node *Node, int VectorWidth) {
271 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
272 Node = isl_schedule_node_child(Node, 0);
273 Node = isl_schedule_node_child(Node, 0);
274 auto *SchedRelUMap = isl_schedule_node_get_prefix_schedule_relation(Node);
275 auto *ScheduleRelation = isl_map_from_union_map(SchedRelUMap);
276 auto *ScheduleRange = isl_map_range(ScheduleRelation);
277 auto *IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
278 auto *AtomicOption = getAtomicOptions(isl_set_get_ctx(IsolateDomain));
279 auto *IsolateOption = getIsolateOptions(IsolateDomain);
280 Node = isl_schedule_node_parent(Node);
281 Node = isl_schedule_node_parent(Node);
282 auto *Options = isl_union_set_union(IsolateOption, AtomicOption);
283 Node = isl_schedule_node_band_set_ast_build_options(Node, Options);
284 return Node;
285}
286
Tobias Grosserb241d922015-07-28 18:03:36 +0000287__isl_give isl_schedule_node *
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000288ScheduleTreeOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
289 unsigned DimToVectorize,
290 int VectorWidth) {
Tobias Grosserb241d922015-07-28 18:03:36 +0000291 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
Tobias Grosserc6699b72011-06-30 20:29:13 +0000292
Tobias Grosserb241d922015-07-28 18:03:36 +0000293 auto Space = isl_schedule_node_band_get_space(Node);
294 auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set);
295 isl_space_free(Space);
296 assert(DimToVectorize < ScheduleDimensions);
Tobias Grosserf5338802011-10-06 00:03:35 +0000297
Tobias Grosserb241d922015-07-28 18:03:36 +0000298 if (DimToVectorize > 0) {
299 Node = isl_schedule_node_band_split(Node, DimToVectorize);
300 Node = isl_schedule_node_child(Node, 0);
301 }
302 if (DimToVectorize < ScheduleDimensions - 1)
303 Node = isl_schedule_node_band_split(Node, 1);
304 Space = isl_schedule_node_band_get_space(Node);
305 auto Sizes = isl_multi_val_zero(Space);
306 auto Ctx = isl_schedule_node_get_ctx(Node);
307 Sizes =
308 isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
309 Node = isl_schedule_node_band_tile(Node, Sizes);
Tobias Grosserca7f5bb2015-10-20 09:12:21 +0000310 Node = isolateFullPartialTiles(Node, VectorWidth);
Tobias Grosserb241d922015-07-28 18:03:36 +0000311 Node = isl_schedule_node_child(Node, 0);
Tobias Grosser42e24892015-08-20 13:45:05 +0000312 // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
313 // we will have troubles to match it in the backend.
314 Node = isl_schedule_node_band_set_ast_build_options(
Tobias Grosserfc490a92015-08-20 19:08:16 +0000315 Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
316 Node = isl_schedule_node_band_sink(Node);
Tobias Grosserb241d922015-07-28 18:03:36 +0000317 Node = isl_schedule_node_child(Node, 0);
Roman Gareev11001e12016-02-23 09:00:13 +0000318 if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf)
319 Node = isl_schedule_node_parent(Node);
320 isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr);
321 Node = isl_schedule_node_insert_mark(Node, LoopMarker);
Tobias Grosserb241d922015-07-28 18:03:36 +0000322 return Node;
Tobias Grosserc6699b72011-06-30 20:29:13 +0000323}
324
Tobias Grosserd891b542015-08-20 12:16:23 +0000325__isl_give isl_schedule_node *
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000326ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node,
327 const char *Identifier, ArrayRef<int> TileSizes,
328 int DefaultTileSize) {
Tobias Grosser9bdea572015-08-20 12:22:37 +0000329 auto Ctx = isl_schedule_node_get_ctx(Node);
330 auto Space = isl_schedule_node_band_get_space(Node);
331 auto Dims = isl_space_dim(Space, isl_dim_set);
332 auto Sizes = isl_multi_val_zero(Space);
Tobias Grosser1ac884d2015-08-23 09:11:00 +0000333 std::string IdentifierString(Identifier);
Tobias Grosser9bdea572015-08-20 12:22:37 +0000334 for (unsigned i = 0; i < Dims; i++) {
335 auto tileSize = i < TileSizes.size() ? TileSizes[i] : DefaultTileSize;
336 Sizes = isl_multi_val_set_val(Sizes, i, isl_val_int_from_si(Ctx, tileSize));
337 }
Tobias Grosser1ac884d2015-08-23 09:11:00 +0000338 auto TileLoopMarkerStr = IdentifierString + " - Tiles";
339 isl_id *TileLoopMarker =
340 isl_id_alloc(Ctx, TileLoopMarkerStr.c_str(), nullptr);
341 Node = isl_schedule_node_insert_mark(Node, TileLoopMarker);
342 Node = isl_schedule_node_child(Node, 0);
Tobias Grosser9bdea572015-08-20 12:22:37 +0000343 Node = isl_schedule_node_band_tile(Node, Sizes);
Tobias Grosser1ac884d2015-08-23 09:11:00 +0000344 Node = isl_schedule_node_child(Node, 0);
345 auto PointLoopMarkerStr = IdentifierString + " - Points";
346 isl_id *PointLoopMarker =
347 isl_id_alloc(Ctx, PointLoopMarkerStr.c_str(), nullptr);
348 Node = isl_schedule_node_insert_mark(Node, PointLoopMarker);
349 Node = isl_schedule_node_child(Node, 0);
350 return Node;
Tobias Grosser9bdea572015-08-20 12:22:37 +0000351}
352
Roman Gareevb17b9a82016-06-12 17:20:05 +0000353__isl_give isl_schedule_node *
354ScheduleTreeOptimizer::applyRegisterTiling(__isl_take isl_schedule_node *Node,
355 llvm::ArrayRef<int> TileSizes,
356 int DefaultTileSize) {
357 auto *Ctx = isl_schedule_node_get_ctx(Node);
358 Node = tileNode(Node, "Register tiling", TileSizes, DefaultTileSize);
359 Node = isl_schedule_node_band_set_ast_build_options(
360 Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}"));
361 return Node;
362}
363
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000364bool ScheduleTreeOptimizer::isTileableBandNode(
Tobias Grosser862b9b52015-08-20 12:32:45 +0000365 __isl_keep isl_schedule_node *Node) {
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000366 if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
Tobias Grosser862b9b52015-08-20 12:32:45 +0000367 return false;
Tobias Grosserde68cc92011-06-30 20:01:02 +0000368
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000369 if (isl_schedule_node_n_children(Node) != 1)
Tobias Grosser862b9b52015-08-20 12:32:45 +0000370 return false;
Tobias Grosserde68cc92011-06-30 20:01:02 +0000371
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000372 if (!isl_schedule_node_band_get_permutable(Node))
Tobias Grosser862b9b52015-08-20 12:32:45 +0000373 return false;
Tobias Grosser44f19ac2011-07-05 22:15:53 +0000374
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000375 auto Space = isl_schedule_node_band_get_space(Node);
376 auto Dims = isl_space_dim(Space, isl_dim_set);
Tobias Grosser9bdea572015-08-20 12:22:37 +0000377 isl_space_free(Space);
Tobias Grosserde68cc92011-06-30 20:01:02 +0000378
Tobias Grosser9bdea572015-08-20 12:22:37 +0000379 if (Dims <= 1)
Tobias Grosser862b9b52015-08-20 12:32:45 +0000380 return false;
Tobias Grosserde68cc92011-06-30 20:01:02 +0000381
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000382 auto Child = isl_schedule_node_get_child(Node, 0);
383 auto Type = isl_schedule_node_get_type(Child);
384 isl_schedule_node_free(Child);
385
Tobias Grosser9bdea572015-08-20 12:22:37 +0000386 if (Type != isl_schedule_node_leaf)
Tobias Grosser862b9b52015-08-20 12:32:45 +0000387 return false;
388
389 return true;
390}
391
392__isl_give isl_schedule_node *
Roman Gareev9c3eb592016-05-28 16:17:58 +0000393ScheduleTreeOptimizer::standardBandOpts(__isl_take isl_schedule_node *Node,
394 void *User) {
Tobias Grosser04832712015-08-20 13:45:02 +0000395 if (FirstLevelTiling)
Tobias Grosser1ac884d2015-08-23 09:11:00 +0000396 Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
397 FirstLevelDefaultTileSize);
Tobias Grosser04832712015-08-20 13:45:02 +0000398
399 if (SecondLevelTiling)
Tobias Grosser1ac884d2015-08-23 09:11:00 +0000400 Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
401 SecondLevelDefaultTileSize);
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000402
Roman Gareevb17b9a82016-06-12 17:20:05 +0000403 if (RegisterTiling)
404 Node =
405 applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
Tobias Grosser42e24892015-08-20 13:45:05 +0000406
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000407 if (PollyVectorizerChoice == VECTORIZER_NONE)
Tobias Grosserf10f4632015-08-19 08:03:37 +0000408 return Node;
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000409
Tobias Grosser862b9b52015-08-20 12:32:45 +0000410 auto Space = isl_schedule_node_band_get_space(Node);
411 auto Dims = isl_space_dim(Space, isl_dim_set);
412 isl_space_free(Space);
413
Tobias Grosserb241d922015-07-28 18:03:36 +0000414 for (int i = Dims - 1; i >= 0; i--)
Tobias Grosserf10f4632015-08-19 08:03:37 +0000415 if (isl_schedule_node_band_member_get_coincident(Node, i)) {
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000416 Node = prevectSchedBand(Node, i, PrevectorWidth);
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000417 break;
418 }
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000419
Tobias Grosserf10f4632015-08-19 08:03:37 +0000420 return Node;
Tobias Grosserde68cc92011-06-30 20:01:02 +0000421}
422
Roman Gareev9c3eb592016-05-28 16:17:58 +0000423/// @brief Check whether output dimensions of the map rely on the specified
424/// input dimension.
425///
426/// @param IslMap The isl map to be considered.
427/// @param DimNum The number of an input dimension to be checked.
428static bool isInputDimUsed(__isl_take isl_map *IslMap, unsigned DimNum) {
429 auto *CheckedAccessRelation =
430 isl_map_project_out(isl_map_copy(IslMap), isl_dim_in, DimNum, 1);
431 CheckedAccessRelation =
432 isl_map_insert_dims(CheckedAccessRelation, isl_dim_in, DimNum, 1);
433 auto *InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_in);
434 CheckedAccessRelation =
435 isl_map_set_tuple_id(CheckedAccessRelation, isl_dim_in, InputDimsId);
436 InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_out);
437 CheckedAccessRelation =
438 isl_map_set_tuple_id(CheckedAccessRelation, isl_dim_out, InputDimsId);
439 auto res = !isl_map_is_equal(CheckedAccessRelation, IslMap);
440 isl_map_free(CheckedAccessRelation);
441 isl_map_free(IslMap);
442 return res;
443}
444
445/// @brief Check if the SCoP statement could probably be optimized with
446/// analytical modeling.
447///
448/// containsMatrMult tries to determine whether the following conditions
449/// are true:
450/// 1. all memory accesses of the statement will have stride 0 or 1,
451/// if we interchange loops (switch the variable used in the inner
452/// loop to the outer loop).
453/// 2. all memory accesses of the statement except from the last one, are
454/// read memory access and the last one is write memory access.
Roman Gareev76614d32016-05-31 11:22:21 +0000455/// 3. all subscripts of the last memory access of the statement don't contain
Roman Gareev9c3eb592016-05-28 16:17:58 +0000456/// the variable used in the inner loop.
457///
458/// @param PartialSchedule The PartialSchedule that contains a SCoP statement
459/// to check.
460static bool containsMatrMult(__isl_keep isl_map *PartialSchedule) {
461 auto InputDimsId = isl_map_get_tuple_id(PartialSchedule, isl_dim_in);
462 auto *ScpStmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimsId));
463 isl_id_free(InputDimsId);
464 if (ScpStmt->size() <= 1)
465 return false;
466 auto MemA = ScpStmt->begin();
467 for (unsigned i = 0; i < ScpStmt->size() - 2 && MemA != ScpStmt->end();
468 i++, MemA++)
Roman Gareev76614d32016-05-31 11:22:21 +0000469 if (!(*MemA)->isRead() ||
470 ((*MemA)->isArrayKind() &&
471 !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) ||
Roman Gareev9c3eb592016-05-28 16:17:58 +0000472 (*MemA)->isStrideZero(isl_map_copy(PartialSchedule)))))
473 return false;
474 MemA++;
Roman Gareev76614d32016-05-31 11:22:21 +0000475 if (!(*MemA)->isWrite() || !(*MemA)->isArrayKind() ||
476 !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) ||
Roman Gareev9c3eb592016-05-28 16:17:58 +0000477 (*MemA)->isStrideZero(isl_map_copy(PartialSchedule))))
478 return false;
479 auto DimNum = isl_map_dim(PartialSchedule, isl_dim_in);
480 return !isInputDimUsed((*MemA)->getAccessRelation(), DimNum - 1);
481}
482
483/// @brief Circular shift of output dimensions of the integer map.
484///
485/// @param IslMap The isl map to be modified.
486static __isl_give isl_map *circularShiftOutputDims(__isl_take isl_map *IslMap) {
Roman Gareev9c3eb592016-05-28 16:17:58 +0000487 auto DimNum = isl_map_dim(IslMap, isl_dim_out);
Roman Gareev4b8c7ae2016-06-03 18:46:29 +0000488 if (DimNum == 0)
489 return IslMap;
490 auto InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_in);
Roman Gareev9c3eb592016-05-28 16:17:58 +0000491 IslMap = isl_map_move_dims(IslMap, isl_dim_in, 0, isl_dim_out, DimNum - 1, 1);
492 IslMap = isl_map_move_dims(IslMap, isl_dim_out, 0, isl_dim_in, 0, 1);
493 return isl_map_set_tuple_id(IslMap, isl_dim_in, InputDimsId);
494}
495
Roman Gareev42402c92016-06-22 09:52:37 +0000496__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
497 __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
498 assert(TTI && "The target transform info should be provided.");
499 // Get a micro-kernel.
500 // Nvec - Number of double-precision floating-point numbers that can be hold
501 // by a vector register. Use 2 by default.
502 auto Nvec = TTI->getRegisterBitWidth(true) / 64;
503 if (Nvec == 0)
504 Nvec = 2;
505 int Nr =
506 ceil(sqrt(Nvec * LatencyVectorFma * ThrougputVectorFma) / Nvec) * Nvec;
507 int Mr = ceil(Nvec * LatencyVectorFma * ThrougputVectorFma / Nr);
508 std::vector<int> MicroKernelParams{Mr, Nr};
509 Node = applyRegisterTiling(Node, MicroKernelParams, 1);
510 return Node;
511}
512
Roman Gareev9c3eb592016-05-28 16:17:58 +0000513bool ScheduleTreeOptimizer::isMatrMultPattern(
514 __isl_keep isl_schedule_node *Node) {
515 auto *PartialSchedule =
516 isl_schedule_node_band_get_partial_schedule_union_map(Node);
Roman Gareev397a34a2016-06-22 12:11:30 +0000517 if (isl_schedule_node_band_n_member(Node) != 3 ||
518 isl_union_map_n_map(PartialSchedule) != 1) {
519 isl_union_map_free(PartialSchedule);
Roman Gareev9c3eb592016-05-28 16:17:58 +0000520 return false;
521 }
Roman Gareev397a34a2016-06-22 12:11:30 +0000522 auto *NewPartialSchedule = isl_map_from_union_map(PartialSchedule);
Roman Gareev9c3eb592016-05-28 16:17:58 +0000523 NewPartialSchedule = circularShiftOutputDims(NewPartialSchedule);
524 if (containsMatrMult(NewPartialSchedule)) {
525 isl_map_free(NewPartialSchedule);
526 return true;
527 }
528 isl_map_free(NewPartialSchedule);
529 return false;
530}
531
532__isl_give isl_schedule_node *
533ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
534 void *User) {
535 if (!isTileableBandNode(Node))
536 return Node;
537
Roman Gareev42402c92016-06-22 09:52:37 +0000538 if (PMBasedOpts && User && isMatrMultPattern(Node)) {
Roman Gareev9c3eb592016-05-28 16:17:58 +0000539 DEBUG(dbgs() << "The matrix multiplication pattern was detected\n");
Roman Gareev42402c92016-06-22 09:52:37 +0000540 const llvm::TargetTransformInfo *TTI;
541 TTI = static_cast<const llvm::TargetTransformInfo *>(User);
542 Node = optimizeMatMulPattern(Node, TTI);
543 }
Roman Gareev9c3eb592016-05-28 16:17:58 +0000544
545 return standardBandOpts(Node, User);
546}
547
Tobias Grosser808cd692015-07-14 09:33:13 +0000548__isl_give isl_schedule *
Roman Gareev42402c92016-06-22 09:52:37 +0000549ScheduleTreeOptimizer::optimizeSchedule(__isl_take isl_schedule *Schedule,
550 const llvm::TargetTransformInfo *TTI) {
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000551 isl_schedule_node *Root = isl_schedule_get_root(Schedule);
Roman Gareev42402c92016-06-22 09:52:37 +0000552 Root = optimizeScheduleNode(Root, TTI);
Tobias Grosser808cd692015-07-14 09:33:13 +0000553 isl_schedule_free(Schedule);
Tobias Grosser808cd692015-07-14 09:33:13 +0000554 auto S = isl_schedule_node_get_schedule(Root);
Tobias Grosserbbb4cec2015-03-22 12:06:39 +0000555 isl_schedule_node_free(Root);
Tobias Grosser808cd692015-07-14 09:33:13 +0000556 return S;
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000557}
558
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000559__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeScheduleNode(
Roman Gareev42402c92016-06-22 09:52:37 +0000560 __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
561 Node = isl_schedule_node_map_descendant_bottom_up(
562 Node, optimizeBand, const_cast<void *>(static_cast<const void *>(TTI)));
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000563 return Node;
564}
565
566bool ScheduleTreeOptimizer::isProfitableSchedule(
Johannes Doerfert7ceb0402015-02-11 17:25:09 +0000567 Scop &S, __isl_keep isl_union_map *NewSchedule) {
568 // To understand if the schedule has been optimized we check if the schedule
569 // has changed at all.
570 // TODO: We can improve this by tracking if any necessarily beneficial
571 // transformations have been performed. This can e.g. be tiling, loop
572 // interchange, or ...) We can track this either at the place where the
573 // transformation has been performed or, in case of automatic ILP based
574 // optimizations, by comparing (yet to be defined) performance metrics
575 // before/after the scheduling optimizer
576 // (e.g., #stride-one accesses)
577 isl_union_map *OldSchedule = S.getSchedule();
578 bool changed = !isl_union_map_is_equal(OldSchedule, NewSchedule);
579 isl_union_map_free(OldSchedule);
580 return changed;
581}
582
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000583namespace {
584class IslScheduleOptimizer : public ScopPass {
585public:
586 static char ID;
587 explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = nullptr; }
588
589 ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); }
590
Johannes Doerfert45be6442015-09-27 15:43:29 +0000591 /// @brief Optimize the schedule of the SCoP @p S.
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000592 bool runOnScop(Scop &S) override;
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000593
Johannes Doerfert45be6442015-09-27 15:43:29 +0000594 /// @brief Print the new schedule for the SCoP @p S.
595 void printScop(raw_ostream &OS, Scop &S) const override;
596
597 /// @brief Register all analyses and transformation required.
598 void getAnalysisUsage(AnalysisUsage &AU) const override;
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000599
Johannes Doerfert0f376302015-09-27 15:42:28 +0000600 /// @brief Release the internal memory.
601 void releaseMemory() override {
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000602 isl_schedule_free(LastSchedule);
603 LastSchedule = nullptr;
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000604 }
Johannes Doerfert45be6442015-09-27 15:43:29 +0000605
606private:
607 isl_schedule *LastSchedule;
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000608};
609}
610
611char IslScheduleOptimizer::ID = 0;
612
Tobias Grosser73600b82011-10-08 00:30:40 +0000613bool IslScheduleOptimizer::runOnScop(Scop &S) {
Johannes Doerfert6f7921f2015-02-14 12:02:24 +0000614
615 // Skip empty SCoPs but still allow code generation as it will delete the
616 // loops present but not needed.
617 if (S.getSize() == 0) {
618 S.markAsOptimized();
619 return false;
620 }
621
Hongbin Zheng2a798852016-03-03 08:15:33 +0000622 const Dependences &D =
623 getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000624
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000625 if (!D.hasValidDependences())
Tobias Grosser38c36ea2014-02-23 15:15:44 +0000626 return false;
627
Tobias Grosser28781422012-10-16 07:29:19 +0000628 isl_schedule_free(LastSchedule);
Tobias Grosser5a56cbf2014-04-16 07:33:47 +0000629 LastSchedule = nullptr;
Tobias Grosser28781422012-10-16 07:29:19 +0000630
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000631 // Build input data.
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000632 int ValidityKinds =
633 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
Tobias Grosser1deda292012-02-14 14:02:48 +0000634 int ProximityKinds;
635
636 if (OptimizeDeps == "all")
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000637 ProximityKinds =
638 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
Tobias Grosser1deda292012-02-14 14:02:48 +0000639 else if (OptimizeDeps == "raw")
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000640 ProximityKinds = Dependences::TYPE_RAW;
Tobias Grosser1deda292012-02-14 14:02:48 +0000641 else {
642 errs() << "Do not know how to optimize for '" << OptimizeDeps << "'"
Tobias Grosser4d96c8d2013-03-23 01:05:07 +0000643 << " Falling back to optimizing all dependences.\n";
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000644 ProximityKinds =
645 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
Tobias Grosser1deda292012-02-14 14:02:48 +0000646 }
647
Tobias Grosser5f9a7622012-02-14 14:02:40 +0000648 isl_union_set *Domain = S.getDomains();
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000649
Tobias Grosser98610ee2012-02-13 23:31:39 +0000650 if (!Domain)
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000651 return false;
652
Johannes Doerfert7e6424b2015-03-05 00:43:48 +0000653 isl_union_map *Validity = D.getDependences(ValidityKinds);
654 isl_union_map *Proximity = D.getDependences(ProximityKinds);
Tobias Grosser8a507022012-03-16 11:51:41 +0000655
Tobias Grossera26db472012-01-30 19:38:43 +0000656 // Simplify the dependences by removing the constraints introduced by the
657 // domains. This can speed up the scheduling time significantly, as large
658 // constant coefficients will be removed from the dependences. The
659 // introduction of some additional dependences reduces the possible
660 // transformations, but in most cases, such transformation do not seem to be
661 // interesting anyway. In some cases this option may stop the scheduler to
662 // find any schedule.
663 if (SimplifyDeps == "yes") {
Tobias Grosser00383a72012-02-14 14:02:44 +0000664 Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain));
665 Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain));
Tobias Grosser4d96c8d2013-03-23 01:05:07 +0000666 Proximity =
667 isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain));
Tobias Grosser00383a72012-02-14 14:02:44 +0000668 Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain));
Tobias Grossera26db472012-01-30 19:38:43 +0000669 } else if (SimplifyDeps != "no") {
670 errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
671 "or 'no'. Falling back to default: 'yes'\n";
672 }
673
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000674 DEBUG(dbgs() << "\n\nCompute schedule from: ");
Tobias Grosser01aea582014-10-22 23:16:28 +0000675 DEBUG(dbgs() << "Domain := " << stringFromIslObj(Domain) << ";\n");
676 DEBUG(dbgs() << "Proximity := " << stringFromIslObj(Proximity) << ";\n");
677 DEBUG(dbgs() << "Validity := " << stringFromIslObj(Validity) << ";\n");
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000678
Michael Krusec59f22c2015-06-18 16:45:40 +0000679 unsigned IslSerializeSCCs;
Tobias Grosserb3ad85b2012-01-30 19:38:50 +0000680
681 if (FusionStrategy == "max") {
Michael Krusec59f22c2015-06-18 16:45:40 +0000682 IslSerializeSCCs = 0;
Tobias Grosserb3ad85b2012-01-30 19:38:50 +0000683 } else if (FusionStrategy == "min") {
Michael Krusec59f22c2015-06-18 16:45:40 +0000684 IslSerializeSCCs = 1;
Tobias Grosserb3ad85b2012-01-30 19:38:50 +0000685 } else {
686 errs() << "warning: Unknown fusion strategy. Falling back to maximal "
687 "fusion.\n";
Michael Krusec59f22c2015-06-18 16:45:40 +0000688 IslSerializeSCCs = 0;
Tobias Grosserb3ad85b2012-01-30 19:38:50 +0000689 }
690
Tobias Grosser95e860c2012-01-30 19:38:54 +0000691 int IslMaximizeBands;
692
Tobias Grossera4ea90b2012-01-30 22:43:56 +0000693 if (MaximizeBandDepth == "yes") {
Tobias Grosser95e860c2012-01-30 19:38:54 +0000694 IslMaximizeBands = 1;
Tobias Grossera4ea90b2012-01-30 22:43:56 +0000695 } else if (MaximizeBandDepth == "no") {
Tobias Grosser95e860c2012-01-30 19:38:54 +0000696 IslMaximizeBands = 0;
697 } else {
698 errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'"
699 " or 'no'. Falling back to default: 'yes'\n";
700 IslMaximizeBands = 1;
701 }
702
Michael Kruse315aa322016-05-02 11:35:27 +0000703 int IslOuterCoincidence;
704
705 if (OuterCoincidence == "yes") {
706 IslOuterCoincidence = 1;
707 } else if (OuterCoincidence == "no") {
708 IslOuterCoincidence = 0;
709 } else {
710 errs() << "warning: Option -polly-opt-outer-coincidence should either be "
711 "'yes' or 'no'. Falling back to default: 'no'\n";
712 IslOuterCoincidence = 0;
713 }
714
715 isl_options_set_schedule_outer_coincidence(S.getIslCtx(),
716 IslOuterCoincidence);
Michael Krusec59f22c2015-06-18 16:45:40 +0000717 isl_options_set_schedule_serialize_sccs(S.getIslCtx(), IslSerializeSCCs);
Tobias Grosser95e860c2012-01-30 19:38:54 +0000718 isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands);
Tobias Grosser992e60c2012-02-20 08:41:15 +0000719 isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm);
Tobias Grosser92f54802012-02-20 08:41:47 +0000720 isl_options_set_schedule_max_coefficient(S.getIslCtx(), MaxCoefficient);
Tobias Grosser4f6bcef2015-03-31 07:52:36 +0000721 isl_options_set_tile_scale_tile_loops(S.getIslCtx(), 0);
Tobias Grosser42152ff2012-01-30 19:38:47 +0000722
723 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE);
Tobias Grossera38c9242014-01-26 19:36:28 +0000724
725 isl_schedule_constraints *ScheduleConstraints;
726 ScheduleConstraints = isl_schedule_constraints_on_domain(Domain);
727 ScheduleConstraints =
728 isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity);
729 ScheduleConstraints = isl_schedule_constraints_set_validity(
730 ScheduleConstraints, isl_union_map_copy(Validity));
731 ScheduleConstraints =
732 isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity);
Tobias Grosser00383a72012-02-14 14:02:44 +0000733 isl_schedule *Schedule;
Tobias Grossera38c9242014-01-26 19:36:28 +0000734 Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints);
Tobias Grosser42152ff2012-01-30 19:38:47 +0000735 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT);
736
737 // In cases the scheduler is not able to optimize the code, we just do not
738 // touch the schedule.
Tobias Grosser98610ee2012-02-13 23:31:39 +0000739 if (!Schedule)
Tobias Grosser42152ff2012-01-30 19:38:47 +0000740 return false;
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000741
Tobias Grosser97d87452015-05-30 06:46:59 +0000742 DEBUG({
743 auto *P = isl_printer_to_str(S.getIslCtx());
744 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
745 P = isl_printer_print_schedule(P, Schedule);
746 dbgs() << "NewScheduleTree: \n" << isl_printer_get_str(P) << "\n";
747 isl_printer_free(P);
748 });
Tobias Grosser4d63b9d2012-02-20 08:41:21 +0000749
Roman Gareev42402c92016-06-22 09:52:37 +0000750 Function &F = S.getFunction();
751 auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
752 isl_schedule *NewSchedule =
753 ScheduleTreeOptimizer::optimizeSchedule(Schedule, TTI);
Tobias Grosser808cd692015-07-14 09:33:13 +0000754 isl_union_map *NewScheduleMap = isl_schedule_get_map(NewSchedule);
Johannes Doerfert7ceb0402015-02-11 17:25:09 +0000755
Tobias Grosserfa57e9b2015-08-24 06:01:47 +0000756 if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewScheduleMap)) {
Tobias Grosser808cd692015-07-14 09:33:13 +0000757 isl_union_map_free(NewScheduleMap);
758 isl_schedule_free(NewSchedule);
Johannes Doerfert7ceb0402015-02-11 17:25:09 +0000759 return false;
760 }
761
Tobias Grosser808cd692015-07-14 09:33:13 +0000762 S.setScheduleTree(NewSchedule);
Johannes Doerfert7ceb0402015-02-11 17:25:09 +0000763 S.markAsOptimized();
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000764
Tobias Grosser808cd692015-07-14 09:33:13 +0000765 isl_union_map_free(NewScheduleMap);
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000766 return false;
767}
768
Johannes Doerfert3fe584d2015-03-01 18:40:25 +0000769void IslScheduleOptimizer::printScop(raw_ostream &OS, Scop &) const {
Tobias Grosser28781422012-10-16 07:29:19 +0000770 isl_printer *p;
771 char *ScheduleStr;
772
773 OS << "Calculated schedule:\n";
774
775 if (!LastSchedule) {
776 OS << "n/a\n";
777 return;
778 }
779
780 p = isl_printer_to_str(isl_schedule_get_ctx(LastSchedule));
781 p = isl_printer_print_schedule(p, LastSchedule);
782 ScheduleStr = isl_printer_get_str(p);
783 isl_printer_free(p);
784
785 OS << ScheduleStr << "\n";
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000786}
787
Tobias Grosser73600b82011-10-08 00:30:40 +0000788void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000789 ScopPass::getAnalysisUsage(AU);
Johannes Doerfertf6557f92015-03-04 22:43:40 +0000790 AU.addRequired<DependenceInfo>();
Roman Gareev42402c92016-06-22 09:52:37 +0000791 AU.addRequired<TargetTransformInfoWrapperPass>();
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000792}
793
Tobias Grosser4d96c8d2013-03-23 01:05:07 +0000794Pass *polly::createIslScheduleOptimizerPass() {
Tobias Grosser73600b82011-10-08 00:30:40 +0000795 return new IslScheduleOptimizer();
Tobias Grosser30aa24c2011-05-14 19:02:06 +0000796}
Tobias Grosser4d96c8d2013-03-23 01:05:07 +0000797
798INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl",
799 "Polly - Optimize schedule of SCoP", false, false);
Johannes Doerfertf6557f92015-03-04 22:43:40 +0000800INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
Johannes Doerfert99191c72016-05-31 09:41:04 +0000801INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
Roman Gareev42402c92016-06-22 09:52:37 +0000802INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass);
Tobias Grosser4d96c8d2013-03-23 01:05:07 +0000803INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl",
804 "Polly - Optimize schedule of SCoP", false, false)