[X86] Make some cast costs more precise
Make some AVX and AVX512 cast costs more precise.
Based on part of a patch by Elena Demikhovsky (D15604).
Differential Revision: http://reviews.llvm.org/D22064
llvm-svn: 275106
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 8fb72be..fbe9568 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -547,6 +547,9 @@
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
};
+ // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
+ // 256-bit wide vectors.
+
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
@@ -577,6 +580,8 @@
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
@@ -591,11 +596,13 @@
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
@@ -685,6 +692,7 @@
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
@@ -693,9 +701,11 @@
// here. We have roughly 10 instructions per scalar element.
// Multiply that by the vector width.
// FIXME: remove that when PR19268 is fixed.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 },
-
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 10 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 20 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
+
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
// This node is expanded into scalarized operations but BasicTTI is overly
@@ -705,6 +715,9 @@
// should be factored in too. Inflating the cost per element by 1.
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
+
+ { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
+ { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
};
static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {