AVX-512 Loop Vectorizer: Cost calculation for interleave load/store patterns.

X86 target does not provide any target specific cost calculation for interleave patterns.It uses the common target-independent calculation, which gives very high numbers. As a result, the scalar version is chosen in many cases. The situation on AVX-512 is even worse, since we have 3-src shuffles that significantly reduce the cost.

In this patch I calculate the cost on AVX-512. It will allow to compare interleave pattern with gather/scatter and choose a better solution (PR31426).

* Shiffle-broadcast cost will be changed in Simon's upcoming patch.

Differential Revision: https://reviews.llvm.org/D28118

llvm-svn: 290810
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 870c446..67d1773 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -97,6 +97,27 @@
   return true;
 }
 
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+  bool Vec0 = false;
+  bool Vec1 = false;
+  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+    if (Mask[i] >= 0) {
+      if ((unsigned)Mask[i] >= NumVecElts)
+        Vec1 = true;
+      else
+        Vec0 = true;
+    }
+  }
+  return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+  for (unsigned i = 0; i < Mask.size(); ++i)
+    if (Mask[i] > 0)
+      return false;
+  return true;
+}
+
 static bool isAlternateVectorMask(ArrayRef<int> Mask) {
   bool isAlternate = true;
   unsigned MaskSize = Mask.size();
@@ -501,6 +522,17 @@
       if (isAlternateVectorMask(Mask))
         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
                                    VecTypOp0, 0, nullptr);
+
+      if (isZeroEltBroadcastVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
+                                   VecTypOp0, 0, nullptr);
+
+      if (isSingleSourceVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                   VecTypOp0, 0, nullptr);
+
+      return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+                                 VecTypOp0, 0, nullptr);
     }
 
     return -1;