[CostModel][X86][XOP] Improve costs for XOP shuffles
VPPERM/VPERMIL2PD/VPERMIL2PS all provide more effective 2-input shuffles than regular AVX instructions
llvm-svn: 311005
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index ed56e45..2f44d61 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -861,6 +861,28 @@
if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
+ static const CostTblEntry XOPShuffleTbl[] = {
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vpermil2pd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 + vpermil2ps
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 + 2*vpperm
+ // + vinsertf128
+
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 + 6*vpperm
+ // + vinsertf128
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm
+ };
+
+ if (ST->hasXOP())
+ if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry AVX1ShuffleTbl[] = {
{ TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
{ TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps