Store the size of the outermost dimension in case of newly created arrays that require memory allocation.

We do not need the size of the outermost dimension in most cases, but if we
allocate memory for newly created arrays, that size is needed.

Reviewed-by: Michael Kruse <llvm@meinersbur.de>

Differential Revision: https://reviews.llvm.org/D23991

llvm-svn: 281234
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index cb8c48a..3135802 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -278,29 +278,27 @@
   unsigned getNumberOfDimensions() const {
     if (Kind == MK_PHI || Kind == MK_ExitPHI || Kind == MK_Value)
       return 0;
-    return DimensionSizes.size() + 1;
+    return DimensionSizes.size();
   }
 
   /// Return the size of dimension @p dim as SCEV*.
   //
   //  Scalars do not have array dimensions and the first dimension of
   //  a (possibly multi-dimensional) array also does not carry any size
-  //  information.
+  //  information, in case the array is not newly created.
   const SCEV *getDimensionSize(unsigned Dim) const {
-    assert(Dim > 0 && "Only dimensions larger than zero are sized.");
     assert(Dim < getNumberOfDimensions() && "Invalid dimension");
-    return DimensionSizes[Dim - 1];
+    return DimensionSizes[Dim];
   }
 
   /// Return the size of dimension @p dim as isl_pw_aff.
   //
   //  Scalars do not have array dimensions and the first dimension of
   //  a (possibly multi-dimensional) array also does not carry any size
-  //  information.
+  //  information, in case the array is not newly created.
   __isl_give isl_pw_aff *getDimensionSizePw(unsigned Dim) const {
-    assert(Dim > 0 && "Only dimensions larger than zero are sized.");
     assert(Dim < getNumberOfDimensions() && "Invalid dimension");
-    return isl_pw_aff_copy(DimensionSizesPw[Dim - 1]);
+    return isl_pw_aff_copy(DimensionSizesPw[Dim]);
   }
 
   /// Get the canonical element type of this array.
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index 13695d0..7a47470 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -182,6 +182,8 @@
   if (Sizes.empty())
     return false;
 
+  SizesSCEV.push_back(nullptr);
+
   for (auto V : Sizes)
     SizesSCEV.push_back(SE.getSCEV(
         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
@@ -213,9 +215,10 @@
   if (AccItr == InsnToMemAcc.end())
     return false;
 
-  std::vector<const SCEV *> Sizes(
-      AccItr->second.Shape->DelinearizedSizes.begin(),
-      AccItr->second.Shape->DelinearizedSizes.end());
+  std::vector<const SCEV *> Sizes = {nullptr};
+
+  Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
+               AccItr->second.Shape->DelinearizedSizes.end());
   // Remove the element size. This information is already provided by the
   // ElementSize parameter. In case the element size of this access and the
   // element size used for delinearization differs the delinearization is
@@ -273,7 +276,7 @@
   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
   addArrayAccess(Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
                  IntegerType::getInt8Ty(DestPtrVal->getContext()), false,
-                 {DestAccFunc, LengthVal}, {}, Inst.getValueOperand());
+                 {DestAccFunc, LengthVal}, {nullptr}, Inst.getValueOperand());
 
   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
   if (!MemTrans)
@@ -294,7 +297,7 @@
   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
   addArrayAccess(Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
                  IntegerType::getInt8Ty(SrcPtrVal->getContext()), false,
-                 {SrcAccFunc, LengthVal}, {}, Inst.getValueOperand());
+                 {SrcAccFunc, LengthVal}, {nullptr}, Inst.getValueOperand());
 
   return true;
 }
@@ -336,7 +339,7 @@
 
       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
       addArrayAccess(Inst, AccType, ArgBasePtr->getValue(),
-                     ArgBasePtr->getType(), false, {AF}, {}, CI);
+                     ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
     }
     return true;
   }
@@ -383,7 +386,7 @@
     AccType = MemoryAccess::MAY_WRITE;
 
   addArrayAccess(Inst, AccType, BasePointer->getValue(), ElementType, IsAffine,
-                 {AccessFunction}, {}, Val);
+                 {AccessFunction}, {nullptr}, Val);
 }
 
 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, Loop *L) {
@@ -662,7 +665,7 @@
   for (auto *GlobalRead : GlobalReads)
     for (auto *BP : ArrayBasePointers)
       addArrayAccess(MemAccInst(GlobalRead), MemoryAccess::READ, BP,
-                     BP->getType(), false, {AF}, {}, GlobalRead);
+                     BP->getType(), false, {AF}, {nullptr}, GlobalRead);
 
   scop->init(AA, AC, DT, LI);
 }
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 2c0cd42..2140457 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -218,9 +218,13 @@
   int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
   int ExtraDimsNew = NewSizes.size() - SharedDims;
   int ExtraDimsOld = DimensionSizes.size() - SharedDims;
-  for (int i = 0; i < SharedDims; i++)
-    if (NewSizes[i + ExtraDimsNew] != DimensionSizes[i + ExtraDimsOld])
+
+  for (int i = 0; i < SharedDims; i++) {
+    auto &NewSize = NewSizes[i + ExtraDimsNew];
+    auto &KnownSize = DimensionSizes[i + ExtraDimsOld];
+    if (NewSize && KnownSize && NewSize != KnownSize)
       return false;
+  }
 
   if (DimensionSizes.size() >= NewSizes.size())
     return true;
@@ -232,6 +236,10 @@
     isl_pw_aff_free(Size);
   DimensionSizesPw.clear();
   for (const SCEV *Expr : DimensionSizes) {
+    if (!Expr) {
+      DimensionSizesPw.push_back(nullptr);
+      continue;
+    }
     isl_pw_aff *Size = S.getPwAffOnly(Expr);
     DimensionSizesPw.push_back(Size);
   }
@@ -258,9 +266,12 @@
 
 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
   OS.indent(8) << *getElementType() << " " << getName();
-  if (getNumberOfDimensions() > 0)
+  unsigned u = 0;
+  if (getNumberOfDimensions() > 0 && !getDimensionSize(0)) {
     OS << "[*]";
-  for (unsigned u = 1; u < getNumberOfDimensions(); u++) {
+    u++;
+  }
+  for (; u < getNumberOfDimensions(); u++) {
     OS << "[";
 
     if (SizeAsPwAff) {
@@ -636,7 +647,7 @@
 
 void MemoryAccess::buildMemIntrinsicAccessRelation() {
   assert(isa<MemIntrinsic>(getAccessInstruction()));
-  assert(Subscripts.size() == 2 && Sizes.size() == 0);
+  assert(Subscripts.size() == 2 && Sizes.size() == 1);
 
   auto *SubscriptPWA = getPwAff(Subscripts[0]);
   auto *SubscriptMap = isl_map_from_pw_aff(SubscriptPWA);
@@ -704,7 +715,7 @@
   for (int i = Size - 2; i >= 0; --i) {
     isl_space *Space;
     isl_map *MapOne, *MapTwo;
-    isl_pw_aff *DimSize = getPwAff(Sizes[i]);
+    isl_pw_aff *DimSize = getPwAff(Sizes[i + 1]);
 
     isl_space *SpaceSize = isl_pw_aff_get_space(DimSize);
     isl_pw_aff_free(DimSize);
@@ -813,7 +824,7 @@
     AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap);
   }
 
-  if (Sizes.size() >= 1 && !isa<SCEVConstant>(Sizes[0]))
+  if (Sizes.size() >= 2 && !isa<SCEVConstant>(Sizes[1]))
     AccessRelation = foldAccess(AccessRelation, Statement);
 
   Space = Statement->getDomainSpace();
@@ -3499,7 +3510,10 @@
   std::vector<const SCEV *> SCEVSizes;
 
   for (auto size : Sizes)
-    SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
+    if (size)
+      SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false));
+    else
+      SCEVSizes.push_back(nullptr);
 
   auto *SAI =
       getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes,
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 60fbdb9..0134323 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -1155,8 +1155,12 @@
     if (SAI->getBasePtr())
       continue;
 
+    assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) &&
+           "The size of the outermost dimension is used to declare newly "
+           "created arrays that require memory allocation.");
+
     Type *NewArrayType = nullptr;
-    for (unsigned i = SAI->getNumberOfDimensions() - 1; i >= 1; i--) {
+    for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) {
       auto *DimSize = SAI->getDimensionSize(i);
       unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
                                      ->getAPInt()
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 783d256..654bf5b 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1206,6 +1206,7 @@
     SmallVector<const SCEV *, 4> Sizes;
     isl_ast_build *Build =
         isl_ast_build_from_context(isl_set_copy(Prog->context));
+    Sizes.push_back(nullptr);
     for (long j = 1; j < Kernel->array[i].array->n_index; j++) {
       isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff(
           Build, isl_pw_aff_copy(Kernel->array[i].array->bound[j]));
@@ -1315,6 +1316,7 @@
     Type *ArrayTy = EleTy;
     SmallVector<const SCEV *, 4> Sizes;
 
+    Sizes.push_back(nullptr);
     for (unsigned int j = 1; j < Var.array->n_index; ++j) {
       isl_val *Val = isl_vec_get_element_val(Var.size, j);
       long Bound = isl_val_get_num_si(Val);
diff --git a/polly/lib/Exchange/JSONExporter.cpp b/polly/lib/Exchange/JSONExporter.cpp
index 01ebbba..915c4b4 100644
--- a/polly/lib/Exchange/JSONExporter.cpp
+++ b/polly/lib/Exchange/JSONExporter.cpp
@@ -152,7 +152,12 @@
 
     Json::Value Array;
     Array["name"] = SAI->getName();
-    for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++) {
+    unsigned i = 0;
+    if (!SAI->getDimensionSize(i)) {
+      Array["sizes"].append("*");
+      i++;
+    }
+    for (; i < SAI->getNumberOfDimensions(); i++) {
       SAI->getDimensionSize(i)->print(RawStringOstream);
       Array["sizes"].append(RawStringOstream.str());
       Buffer.clear();
@@ -477,11 +482,11 @@
   if (SAI->getName() != Array["name"].asCString())
     return false;
 
-  if (SAI->getNumberOfDimensions() != Array["sizes"].size() + 1)
+  if (SAI->getNumberOfDimensions() != Array["sizes"].size())
     return false;
 
-  for (unsigned i = 0; i < Array["sizes"].size(); i++) {
-    SAI->getDimensionSize(i + 1)->print(RawStringOstream);
+  for (unsigned i = 1; i < Array["sizes"].size(); i++) {
+    SAI->getDimensionSize(i)->print(RawStringOstream);
     if (RawStringOstream.str() != Array["sizes"][i].asCString())
       return false;
     Buffer.clear();
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index c825892..b89374d 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -717,7 +717,7 @@
 ///        the matrix multiplication pattern.
 ///
 /// Create an access relation of the following form:
-/// [O0, O1, O2, O3, O4, O5, O6, O7, O8] -> [0, O5 + K * OI, OJ],
+/// [O0, O1, O2, O3, O4, O5, O6, O7, O8] -> [O5 + K * OI, OJ],
 /// where K is @p Coeff, I is @p FirstDim, J is @p SecondDim.
 ///
 /// It can be used, for example, to create relations that helps to consequently
@@ -745,17 +745,16 @@
                                     unsigned Coeff, unsigned FirstDim,
                                     unsigned SecondDim) {
   auto *Ctx = isl_map_get_ctx(MapOldIndVar);
-  auto *AccessRelSpace = isl_space_alloc(Ctx, 0, 9, 3);
+  auto *AccessRelSpace = isl_space_alloc(Ctx, 0, 9, 2);
   auto *AccessRel = isl_map_universe(isl_space_copy(AccessRelSpace));
   auto *ConstrSpace = isl_local_space_from_space(AccessRelSpace);
   auto *Constr = isl_constraint_alloc_equality(ConstrSpace);
-  Constr = isl_constraint_set_coefficient_si(Constr, isl_dim_out, 1, -1);
+  Constr = isl_constraint_set_coefficient_si(Constr, isl_dim_out, 0, -1);
   Constr = isl_constraint_set_coefficient_si(Constr, isl_dim_in, 5, 1);
   Constr =
       isl_constraint_set_coefficient_si(Constr, isl_dim_in, FirstDim, Coeff);
   AccessRel = isl_map_add_constraint(AccessRel, Constr);
-  AccessRel = isl_map_fix_si(AccessRel, isl_dim_out, 0, 0);
-  AccessRel = isl_map_equate(AccessRel, isl_dim_in, SecondDim, isl_dim_out, 2);
+  AccessRel = isl_map_equate(AccessRel, isl_dim_in, SecondDim, isl_dim_out, 1);
   return isl_map_apply_range(MapOldIndVar, AccessRel);
 }
 
diff --git a/polly/test/Isl/CodeGen/MemAccess/create_arrays.ll b/polly/test/Isl/CodeGen/MemAccess/create_arrays.ll
index c63952b..1ee647a 100644
--- a/polly/test/Isl/CodeGen/MemAccess/create_arrays.ll
+++ b/polly/test/Isl/CodeGen/MemAccess/create_arrays.ll
@@ -11,11 +11,11 @@
 ; CHECK:        double MemRef_B[*][1024]; // Element size 8
 ; CHECK:        double MemRef_beta; // Element size 8
 ; CHECK:        double MemRef_A[*][1056]; // Element size 8
-; CHECK:        double D[*][270336]; // Element size 8
-; CHECK:        double E[*][270336][200000]; // Element size 8
-; CHECK:        i64 F[*][270336]; // Element size 8
+; CHECK:        double D[270336]; // Element size 8
+; CHECK:        double E[270336][200000]; // Element size 8
+; CHECK:        i64 F[270336]; // Element size 8
 ;
-; CHECK:New access function '{ Stmt_bb12[i0, i1, i2] -> E[0, i2, i0] }' detected in JSCOP file
+; CHECK:New access function '{ Stmt_bb12[i0, i1, i2] -> E[i2, i0] }' detected in JSCOP file
 ;
 ; CODEGEN:define internal void @create_arrays(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %beta, [1056 x double]* %A, [1024 x double]* %B, [1056 x double]* %arg7) #0 {
 ; CODEGEN:bb:
@@ -27,9 +27,8 @@
 ;
 ; CODEGEN:  %beta.s2a.reload = load double, double* %beta.s2a
 ; CODEGEN:  %polly.access.cast.E = bitcast [270336 x [200000 x double]]* %E to double*
-; CODEGEN:  %polly.access.add.E = add nsw i64 0, %polly.indvar33
-; CODEGEN:  %polly.access.mul.E = mul nsw i64 %polly.access.add.E, 200000
-; CODEGEN:  %polly.access.add.E36 = add nsw i64 %polly.access.mul.E, %polly.indvar
+; CODEGEN:  %polly.access.mul.E = mul nsw i64 %polly.indvar33, 200000
+; CODEGEN:  %polly.access.add.E = add nsw i64 %polly.access.mul.E, %polly.indvar
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
diff --git a/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop b/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop
index 4ceaa41..82f3e7a 100644
--- a/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop
+++ b/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop
@@ -1,13 +1,13 @@
 {
    "arrays" : [
-     {
+      {
          "name" : "MemRef_B",
-         "sizes" : [ "1024" ],
+         "sizes" : [ "*", "1024" ],
          "type" : "double"
       },
       {
          "name" : "MemRef_A",
-         "sizes" : [ "1056" ],
+         "sizes" : [ "*", "1056" ],
          "type" : "double"
       }
    ],
diff --git a/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop.transformed b/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop.transformed
index 47c3ab9..0ed3e59 100644
--- a/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop.transformed
+++ b/polly/test/Isl/CodeGen/MemAccess/create_arrays___%bb9---%bb26.jscop.transformed
@@ -2,12 +2,12 @@
    "arrays" : [
        {
          "name" : "MemRef_B",
-         "sizes" : [ "1024" ],
+         "sizes" : [ "*", "1024" ],
          "type" : "double"
       },
       {
          "name" : "MemRef_A",
-         "sizes" : [ "1056" ],
+         "sizes" : [ "*", "1056" ],
          "type" : "double"
       },
       {
@@ -33,7 +33,7 @@
          "accesses" : [
             {
                "kind" : "read",
-               "relation" : "{ Stmt_bb12[i0, i1, i2] -> E[0, i2, i0] }"
+               "relation" : "{ Stmt_bb12[i0, i1, i2] -> E[i2, i0] }"
             },
             {
                "kind" : "read",
diff --git a/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop b/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop
index 0021fa0..3e99198 100644
--- a/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop
+++ b/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop
@@ -2,6 +2,7 @@
    "arrays" : [
       {
          "name" : "MemRef_A",
+         "sizes" : [ "*" ],
          "type" : "double"
       }
    ],
diff --git a/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop.transformed b/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop.transformed
index e533752..d495694 100644
--- a/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop.transformed
+++ b/polly/test/Isl/CodeGen/MemAccess/map_scalar_access___%outer.for---%return.jscop.transformed
@@ -2,6 +2,7 @@
    "arrays" : [
       {
          "name" : "MemRef_A",
+         "sizes" : [ "*" ],
          "type" : "double"
       }
    ],
diff --git a/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll b/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll
index db793bd..1f38a0e 100644
--- a/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll
+++ b/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll
@@ -9,14 +9,14 @@
 ;	     C[i][j] += alpha * A[i][k] * B[k][j];
 ;        }
 ;
-; CHECK:        double Packed_A[*][ { [] -> [(1024)] } ][ { [] -> [(4)] } ]; // Element size 8
-; CHECK:        double Packed_B[*][ { [] -> [(3072)] } ][ { [] -> [(8)] } ]; // Element size 8
+; CHECK:        double Packed_A[ { [] -> [(1024)] } ][ { [] -> [(4)] } ]; // Element size 8
+; CHECK:        double Packed_B[ { [] -> [(3072)] } ][ { [] -> [(8)] } ]; // Element size 8
 ;
 ; CHECK:                { Stmt_bb14[i0, i1, i2] -> MemRef_arg6[i0, i2] };
-; CHECK:           new: { Stmt_bb14[i0, i1, i2] -> Packed_A[0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 4*floor((-i0 + o2)/4) = -i0 + o2 and 0 <= o2 <= 3 and -3 + i0 - 16*floor((i0)/16) <= 4*floor((o1)/256) <= i0 - 16*floor((i0)/16) };
+; CHECK:           new: { Stmt_bb14[i0, i1, i2] -> Packed_A[o0, o1] : 256*floor((-i2 + o0)/256) = -i2 + o0 and 4*floor((-i0 + o1)/4) = -i0 + o1 and 0 <= o1 <= 3 and -3 + i0 - 16*floor((i0)/16) <= 4*floor((o0)/256) <= i0 - 16*floor((i0)/16) };
 ;
 ; CHECK:                { Stmt_bb14[i0, i1, i2] -> MemRef_arg7[i2, i1] };
-; CHECK:           new: { Stmt_bb14[i0, i1, i2] -> Packed_B[0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 8*floor((-i1 + o2)/8) = -i1 + o2 and 0 <= o2 <= 7 and -7 + i1 - 96*floor((i1)/96) <= 8*floor((o1)/256) <= i1 - 96*floor((i1)/96) };
+; CHECK:           new: { Stmt_bb14[i0, i1, i2] -> Packed_B[o0, o1] : 256*floor((-i2 + o0)/256) = -i2 + o0 and 8*floor((-i1 + o1)/8) = -i1 + o1 and 0 <= o1 <= 7 and -7 + i1 - 96*floor((i1)/96) <= 8*floor((o0)/256) <= i1 - 96*floor((i1)/96) };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"