[SLP] Support for horizontal min/max reduction.

SLP vectorizer supports horizontal reductions for Add/FAdd binary
operations. Patch adds support for horizontal min/max reductions.
Function getReductionCost() is split to getArithmeticReductionCost() for
binary operation reductions and getMinMaxReductionCost() for min/max
reductions.
Patch fixes PR26956.

Differential revision: https://reviews.llvm.org/D27846

llvm-svn: 312791
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 871a38d..79f192c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1999,6 +1999,152 @@
   return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
 }
 
+int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
+                                       bool IsPairwise, bool IsUnsigned) {
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+  MVT MTy = LT.second;
+
+  int ISD;
+  if (ValTy->isIntOrIntVectorTy()) {
+    ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
+  } else {
+    assert(ValTy->isFPOrFPVectorTy() &&
+           "Expected float point or integer vector type.");
+    ISD = ISD::FMINNUM;
+  }
+
+  // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+  // and make it as the cost.
+
+  static const CostTblEntry SSE42CostTblPairWise[] = {
+      {ISD::FMINNUM, MVT::v2f64, 3},
+      {ISD::FMINNUM, MVT::v4f32, 2},
+      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+      {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
+      {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
+      {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
+      {ISD::SMIN, MVT::v8i16, 2},
+      {ISD::UMIN, MVT::v8i16, 2},
+  };
+
+  static const CostTblEntry AVX1CostTblPairWise[] = {
+      {ISD::FMINNUM, MVT::v4f32, 1},
+      {ISD::FMINNUM, MVT::v4f64, 1},
+      {ISD::FMINNUM, MVT::v8f32, 2},
+      {ISD::SMIN, MVT::v2i64, 3},
+      {ISD::UMIN, MVT::v2i64, 3},
+      {ISD::SMIN, MVT::v4i32, 1},
+      {ISD::UMIN, MVT::v4i32, 1},
+      {ISD::SMIN, MVT::v8i16, 1},
+      {ISD::UMIN, MVT::v8i16, 1},
+      {ISD::SMIN, MVT::v8i32, 3},
+      {ISD::UMIN, MVT::v8i32, 3},
+  };
+
+  static const CostTblEntry AVX2CostTblPairWise[] = {
+      {ISD::SMIN, MVT::v4i64, 2},
+      {ISD::UMIN, MVT::v4i64, 2},
+      {ISD::SMIN, MVT::v8i32, 1},
+      {ISD::UMIN, MVT::v8i32, 1},
+      {ISD::SMIN, MVT::v16i16, 1},
+      {ISD::UMIN, MVT::v16i16, 1},
+      {ISD::SMIN, MVT::v32i8, 2},
+      {ISD::UMIN, MVT::v32i8, 2},
+  };
+
+  static const CostTblEntry AVX512CostTblPairWise[] = {
+      {ISD::FMINNUM, MVT::v8f64, 1},
+      {ISD::FMINNUM, MVT::v16f32, 2},
+      {ISD::SMIN, MVT::v8i64, 2},
+      {ISD::UMIN, MVT::v8i64, 2},
+      {ISD::SMIN, MVT::v16i32, 1},
+      {ISD::UMIN, MVT::v16i32, 1},
+  };
+
+  static const CostTblEntry SSE42CostTblNoPairWise[] = {
+      {ISD::FMINNUM, MVT::v2f64, 3},
+      {ISD::FMINNUM, MVT::v4f32, 3},
+      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+      {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
+      {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
+      {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
+      {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
+      {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
+  };
+
+  static const CostTblEntry AVX1CostTblNoPairWise[] = {
+      {ISD::FMINNUM, MVT::v4f32, 1},
+      {ISD::FMINNUM, MVT::v4f64, 1},
+      {ISD::FMINNUM, MVT::v8f32, 1},
+      {ISD::SMIN, MVT::v2i64, 3},
+      {ISD::UMIN, MVT::v2i64, 3},
+      {ISD::SMIN, MVT::v4i32, 1},
+      {ISD::UMIN, MVT::v4i32, 1},
+      {ISD::SMIN, MVT::v8i16, 1},
+      {ISD::UMIN, MVT::v8i16, 1},
+      {ISD::SMIN, MVT::v8i32, 2},
+      {ISD::UMIN, MVT::v8i32, 2},
+  };
+
+  static const CostTblEntry AVX2CostTblNoPairWise[] = {
+      {ISD::SMIN, MVT::v4i64, 1},
+      {ISD::UMIN, MVT::v4i64, 1},
+      {ISD::SMIN, MVT::v8i32, 1},
+      {ISD::UMIN, MVT::v8i32, 1},
+      {ISD::SMIN, MVT::v16i16, 1},
+      {ISD::UMIN, MVT::v16i16, 1},
+      {ISD::SMIN, MVT::v32i8, 1},
+      {ISD::UMIN, MVT::v32i8, 1},
+  };
+
+  static const CostTblEntry AVX512CostTblNoPairWise[] = {
+      {ISD::FMINNUM, MVT::v8f64, 1},
+      {ISD::FMINNUM, MVT::v16f32, 2},
+      {ISD::SMIN, MVT::v8i64, 1},
+      {ISD::UMIN, MVT::v8i64, 1},
+      {ISD::SMIN, MVT::v16i32, 1},
+      {ISD::UMIN, MVT::v16i32, 1},
+  };
+
+  if (IsPairwise) {
+    if (ST->hasAVX512())
+      if (const auto *Entry = CostTableLookup(AVX512CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE42())
+      if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+  } else {
+    if (ST->hasAVX512())
+      if (const auto *Entry =
+              CostTableLookup(AVX512CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE42())
+      if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+  }
+
+  return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
+}
+
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.