[X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32
As discussed on PR28461 we currently miss the chance to lower "fptosi <2 x double> %arg to <2 x i32>" to cvttpd2dq due to its use of illegal types.
This patch adds support for fptosi to 2i32 from both 2f64 and 2f32.
It also recognises that cvttpd2dq zeroes the upper 64-bits of the xmm result (similar to D23797) - we still don't do this for the cvttpd2dq/cvttps2dq intrinsics - this can be done in a future patch.
Differential Revision: https://reviews.llvm.org/D23808
llvm-svn: 284459
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eaa2c5a..c1b6a22 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -855,8 +855,9 @@
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
@@ -22261,6 +22262,31 @@
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ if (IsSigned && N->getValueType(0) == MVT::v2i32) {
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::v2f64) {
+ SDValue Idx = DAG.getIntPtrConstant(0, dl);
+ SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+ Results.push_back(Res);
+ return;
+ }
+ if (Src.getValueType() == MVT::v2f32) {
+ SDValue Idx = DAG.getIntPtrConstant(0, dl);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+ DAG.getUNDEF(MVT::v2f32));
+ Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+ Results.push_back(Res);
+ return;
+ }
+
+ // The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
+ // so early out here.
+ return;
+ }
+
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -22579,6 +22605,7 @@
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
+ case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e1429b8..09d6ee4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -302,6 +302,9 @@
// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+ // Vector double to signed integer (truncated).
+ CVTTPD2DQ,
+
// Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 144c44a..de0c76f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6309,6 +6309,16 @@
VR128X:$src1, sub_xmm)))), sub_ymm)>;
}
+let Predicates = [HasAVX512, HasVLX] in {
+ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))))),
+ (VCVTTPD2DQZ128rr VR128:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src))),
+ (VCVTTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
+ (VCVTTPD2DQZ128rm addr:$src)>;
+}
+
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f1b9475..594af33 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -67,6 +67,9 @@
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+def X86cvttpd2dq: SDNode<"X86ISD::CVTTPD2DQ",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
+ SDTCisVT<1, v2f64>]>>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 837e0d3..8be5b34 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2111,6 +2111,14 @@
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
+ (VCVTTPD2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
+ (VCVTTPD2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))),
+ (VCVTTPD2DQXrm addr:$src)>;
+
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
@@ -2128,6 +2136,16 @@
IIC_SSE_CVT_PD_RM>,
Sched<[WriteCvtF2ILd]>;
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))),
+ (CVTTPD2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))),
+ (CVTTPD2DQrr VR128:$src)>;
+ def : Pat<(v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))),
+ (CVTTPD2DQrm addr:$src)>;
+} // Predicates = [UseSSE2]
+
// Convert packed single to packed double
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 525575e..a37ecd5 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -805,6 +805,8 @@
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },