[CostModel] Replace ShuffleKind::SK_Alternate with ShuffleKind::SK_Select (PR33744)
As discussed on PR33744, this patch relaxes ShuffleKind::SK_Alternate which requires shuffle masks to only match an alternating pattern from its 2 sources:
e.g. v4f32: <0,5,2,7> or <4,1,6,3>
This seems far too restrictive as most SIMD hardware which will implement it using a general blend/bit-select instruction, so replaces it with SK_Select, permitting elements from either source as long as they are inline:
e.g. v4f32: <0,5,2,7>, <4,1,6,3>, <0,1,6,7>, <4,1,2,3> etc.
This initial patch just updates the name and cost model shuffle mask analysis, later patch reviews will update SLP to better utilise this - it still limits itself to SK_Alternate style patterns.
Differential Revision: https://reviews.llvm.org/D47985
llvm-svn: 334513
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 71fa26f..2fbba6a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -912,8 +912,8 @@
{ TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb
+ { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb
+ { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb
{ TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
{ TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
@@ -977,12 +977,12 @@
{ TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
// + vinsertf128
- { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps
+ { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor
{ TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd
{ TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd
@@ -1008,12 +1008,12 @@
return LT.first * Entry->Cost;
static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
+ { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Select, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb
};
if (ST->hasSSE41())
@@ -1027,8 +1027,8 @@
{ TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por
+ { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por
+ { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por
{ TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
@@ -1055,11 +1055,11 @@
{ TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por
+ { TTI::SK_Select, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Select, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por
+ { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por
{ TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
{ TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
@@ -1083,7 +1083,7 @@
static const CostTblEntry SSE1ShuffleTbl[] = {
{ TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps
+ { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps
{ TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
{ TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
};
@@ -1941,8 +1941,8 @@
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
- Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
- getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
+ Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) +
+ getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),