Rewrite getPrevectorMap using schedule trees operations Schedule trees are a lot easier to work with, for both humans and machines. For humans the more structured schedule representation is easier to reason about. Together with the more abstract isl programming interface this can result in a lot cleaner code (see this changeset). For machines, the structured schedule and the fact that we now use explicit piecewise affine expressions instead of integer maps makes it easier to generate code from this schedule tree. As a result, we can already see a slight compile-time improvement -- for 3mm from 0m0.593s to 0m0.551s seconds (-7 %). More importantly, future optimizations such as full-partial tile separation will most likely result in more streamlined code to be generated. Contributed-by: Roman Gareev <gareevroman@gmail.com> llvm-svn: 243458

commit: b241d928bd884d418445c6c763c4df9ffaa9303f [log] [tgz]
author: Tobias Grosser <tobias@grosser.es> Tue Jul 28 18:03:36 2015 +0000
committer: Tobias Grosser <tobias@grosser.es> Tue Jul 28 18:03:36 2015 +0000
tree: 175917bf8e9200692ac8702f4de1cbcd7d4589b9
parent: 305e8f6312f9d461bb9fd1dd73a894d899b77fcc [diff] [blame]
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 8c35898..26b9668 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp

@@ -118,16 +118,14 @@
   /// @return True, if we believe @p NewSchedule is an improvement for @p S.
   bool isProfitableSchedule(Scop &S, __isl_keep isl_union_map *NewSchedule);
 
-  /// @brief Create a map that pre-vectorizes one scheduling dimension.
+  /// @brief Pre-vectorizes one scheduling dimension of a schedule band.
   ///
-  /// getPrevectorMap creates a map that maps each input dimension to the same
-  /// output dimension, except for the dimension DimToVectorize.
-  /// DimToVectorize is strip mined by 'VectorWidth' and the newly created
-  /// point loop of DimToVectorize is moved to the innermost level.
+  /// prevectSchedBand splits out the dimension DimToVectorize, tiles it and
+  /// sinks the resulting point loop.
   ///
-  /// Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
+  /// Example (DimToVectorize=0, VectorWidth=4):
   ///
-  /// | Before transformation
+  /// | Before transformation:
   /// |
   /// | A[i,j] -> [i,j]
   /// |
@@ -135,17 +133,12 @@
   /// |    for (j = 0; j < 128; j++)
   /// |      A(i,j);
   ///
-  ///   Prevector map:
-  ///   [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
-  ///
   /// | After transformation:
   /// |
-  /// | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
-  /// |
-  /// | for (it = 0; it < 128; it+=4)
+  /// | for (it = 0; it < 32; it+=1)
   /// |    for (j = 0; j < 128; j++)
-  /// |      for (ip = max(0,it); ip < min(128, it + 3); ip++)
-  /// |        A(ip,j);
+  /// |      for (ip = 0; ip <= 3; ip++)
+  /// |        A(4 * it + ip,j);
   ///
   /// The goal of this transformation is to create a trivially vectorizable
   /// loop.  This means a parallel loop at the innermost level that has a
@@ -156,9 +149,9 @@
   /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
   /// currently constant and not yet target specific. This function does not
   /// reason about parallelism.
-  static __isl_give isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
-                                             int ScheduleDimensions,
-                                             int VectorWidth = 4);
+  static __isl_give isl_schedule_node *
+  prevectSchedBand(__isl_take isl_schedule_node *Node, unsigned DimToVectorize,
+                   int VectorWidth = 4);
 
   /// @brief Apply additional optimizations on the bands in the schedule tree.
   ///
@@ -170,7 +163,7 @@
   ///      - if the band has more than one loop dimension
   ///
   ///  - Prevectorize the schedule of the band (or the point loop in case of
-  ///    tiling)
+  ///    tiling).
   ///      - if vectorization is enabled
   ///
   /// @param Node The schedule node to (possibly) optimize.
@@ -204,58 +197,33 @@
 
 char IslScheduleOptimizer::ID = 0;
 
-__isl_give isl_map *
-IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
-                                      int ScheduleDimensions, int VectorWidth) {
-  isl_space *Space;
-  isl_local_space *LocalSpace, *LocalSpaceRange;
-  isl_set *Modulo;
-  isl_map *TilingMap;
-  isl_constraint *c;
-  isl_aff *Aff;
-  int PointDimension; /* ip */
-  int TileDimension;  /* it */
-  isl_val *VectorWidthMP;
+__isl_give isl_schedule_node *
+IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
+                                       unsigned DimToVectorize,
+                                       int VectorWidth) {
+  assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
 
-  assert(0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);
+  auto Space = isl_schedule_node_band_get_space(Node);
+  auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set);
+  isl_space_free(Space);
+  assert(DimToVectorize < ScheduleDimensions);
 
-  Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
-  TilingMap = isl_map_universe(isl_space_copy(Space));
-  LocalSpace = isl_local_space_from_space(Space);
-  PointDimension = ScheduleDimensions;
-  TileDimension = DimToVectorize;
-
-  // Create an identity map for everything except DimToVectorize and map
-  // DimToVectorize to the point loop at the innermost dimension.
-  for (int i = 0; i < ScheduleDimensions; i++)
-    if (i == DimToVectorize)
-      TilingMap =
-          isl_map_equate(TilingMap, isl_dim_in, i, isl_dim_out, PointDimension);
-    else
-      TilingMap = isl_map_equate(TilingMap, isl_dim_in, i, isl_dim_out, i);
-
-  // it % 'VectorWidth' = 0
-  LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
-  Aff = isl_aff_zero_on_domain(LocalSpaceRange);
-  Aff = isl_aff_set_constant_si(Aff, VectorWidth);
-  Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
-  VectorWidthMP = isl_val_int_from_si(ctx, VectorWidth);
-  Aff = isl_aff_mod_val(Aff, VectorWidthMP);
-  Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
-  TilingMap = isl_map_intersect_range(TilingMap, Modulo);
-
-  // it <= ip
-  TilingMap = isl_map_order_le(TilingMap, isl_dim_out, TileDimension,
-                               isl_dim_out, PointDimension);
-
-  // ip <= it + ('VectorWidth' - 1)
-  c = isl_inequality_alloc(LocalSpace);
-  isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
-  isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
-  isl_constraint_set_constant_si(c, VectorWidth - 1);
-  TilingMap = isl_map_add_constraint(TilingMap, c);
-
-  return TilingMap;
+  if (DimToVectorize > 0) {
+    Node = isl_schedule_node_band_split(Node, DimToVectorize);
+    Node = isl_schedule_node_child(Node, 0);
+  }
+  if (DimToVectorize < ScheduleDimensions - 1)
+    Node = isl_schedule_node_band_split(Node, 1);
+  Space = isl_schedule_node_band_get_space(Node);
+  auto Sizes = isl_multi_val_zero(Space);
+  auto Ctx = isl_schedule_node_get_ctx(Node);
+  Sizes =
+      isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
+  Node = isl_schedule_node_band_tile(Node, Sizes);
+  Node = isl_schedule_node_child(Node, 0);
+  Node = isl_schedule_node_band_sink(Node);
+  Node = isl_schedule_node_child(Node, 0);
+  return Node;
 }
 
 isl_schedule_node *IslScheduleOptimizer::optimizeBand(isl_schedule_node *Node,
@@ -307,21 +275,12 @@
   if (PollyVectorizerChoice == VECTORIZER_NONE)
     return Res;
 
-  auto Schedule = isl_schedule_node_band_get_partial_schedule(Res);
-
-  for (int i = Dims - 1; i >= 0; i--) {
+  for (int i = Dims - 1; i >= 0; i--)
     if (isl_schedule_node_band_member_get_coincident(Res, i)) {
-      auto TileMap = IslScheduleOptimizer::getPrevectorMap(Ctx, i, Dims);
-      auto TileUMap = isl_union_map_from_map(TileMap);
-      auto Schedule2 = isl_union_map_apply_range(
-          isl_union_map_from_multi_union_pw_aff(Schedule), TileUMap);
-      Schedule = isl_multi_union_pw_aff_from_union_map(Schedule2);
+      Res = IslScheduleOptimizer::prevectSchedBand(Res, i);
       break;
     }
-  }
 
-  Res = isl_schedule_node_delete(Res);
-  Res = isl_schedule_node_insert_partial_schedule(Res, Schedule);
   return Res;
 }
commit	b241d928bd884d418445c6c763c4df9ffaa9303f	[log] [tgz]
author	Tobias Grosser <tobias@grosser.es>	Tue Jul 28 18:03:36 2015 +0000
committer	Tobias Grosser <tobias@grosser.es>	Tue Jul 28 18:03:36 2015 +0000
tree	175917bf8e9200692ac8702f4de1cbcd7d4589b9
parent	305e8f6312f9d461bb9fd1dd73a894d899b77fcc [diff] [blame]