[X86][AVX512]fix dag & add intrinsics for fixupimm
cover all width and types (pd/ps/sd/ss) of fixupimm instruction and inrtinsics
Differential Revision: http://reviews.llvm.org/D16313
llvm-svn: 258124
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 322a013..e63c156 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17024,6 +17024,35 @@
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
+ case FIXUPIMMS:
+ case FIXUPIMMS_MASKZ:
+ case FIXUPIMM:
+ case FIXUPIMM_MASKZ:{
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Imm = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ?
+ Src1 : getZeroVector(VT, Subtarget, DAG, dl);
+ // We specify 2 possible modes for intrinsics, with/without rounding
+ // modes.
+ // First, we check if the intrinsic have rounding mode (7 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 7)
+ Rnd = Op.getOperand(6);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ)
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3, Imm, Rnd),
+ Mask, Passthru, Subtarget, DAG);
+ else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3, Imm, Rnd),
+ Mask, Passthru, Subtarget, DAG);
+ }
case CONVERT_TO_MASK: {
MVT SrcVT = Op.getOperand(1).getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
@@ -20934,6 +20963,7 @@
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
+ case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f14073b..1820286 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -402,6 +402,7 @@
VPTERNLOG,
// Fix Up Special Packed Float32/64 values
VFIXUPIMM,
+ VFIXUPIMMS,
// Range Restriction Calculation For Packed Pairs of Float32/64 values
VRANGE,
// Reduce - Perform Reduction Transformation on scalar\packed FP
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 568cc5c..1281576 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -95,6 +95,12 @@
"v" # NumElts # "f" # EltSize,
VTName)));
+ ValueType IntVT = !cast<ValueType>(
+ !if (!eq (!srl(EltSize,5),0),
+ VTName,
+ !if (!eq(TypeVariantName, "f"),
+ "v" # NumElts # "i" # EltSize,
+ VTName)));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
@@ -301,7 +307,7 @@
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
+ (X86select _.KRCWM:$mask, RHS, _.RC:$src1), X86select>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -6913,19 +6919,6 @@
opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
}
-defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
- avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps",
- avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-
-defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info,
- 0x55, X86VFixupimm, HasAVX512>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
-defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
- 0x55, X86VFixupimm, HasAVX512>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
@@ -7458,3 +7451,112 @@
defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - FixupImm
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst" in {
+ defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.IntVT _.RC:$src3),
+ (i32 imm:$src4),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
+ (i32 imm:$src4),
+ (i32 FROUND_CURRENT))>;
+ defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ (i32 imm:$src4),
+ (i32 FROUND_CURRENT))>, EVEX_B;
+ }
+ } // Constraints = "$src1 = $dst"
+}
+
+multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+let Constraints = "$src1 = $dst" in {
+ defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ "$src2, $src3, {sae}, $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.IntVT _.RC:$src3),
+ (i32 imm:$src4),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+ }
+}
+
+multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
+ let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
+ defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT _src3VT.RC:$src3),
+ (i32 imm:$src4),
+ (i32 FROUND_CURRENT))>;
+
+ defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ "$src2, $src3, {sae}, $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT _src3VT.RC:$src3),
+ (i32 imm:$src4),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+ let mayLoad = 1 in
+ defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
+ OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT (scalar_to_vector
+ (_src3VT.ScalarLdFrag addr:$src3))),
+ (i32 imm:$src4),
+ (i32 FROUND_CURRENT))>;
+ }
+}
+
+multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
+ avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
+ AVX512AIi8Base, EVEX_4V, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
+ AVX512AIi8Base, EVEX_4V, EVEX_V128;
+ defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
+ AVX512AIi8Base, EVEX_4V, EVEX_V256;
+ }
+}
+
+defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+ f32x_info, v4i32x_info>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+ f64x_info, v2i64x_info>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
+ EVEX_CD8<32, CD8VF>;
+defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
+ EVEX_CD8<64, CD8VF>, VEX_W;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6432863..176cf56 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -309,6 +309,10 @@
SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
+def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisVec<3>, SDTCisInt<4>, SDTCisInt<5>]>;
+def SDTFPTernaryOpImmRounds: SDTypeProfile<1, 5, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,SDTCisInt<3>, SDTCisInt<4>, SDTCisInt<5>]>;
def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisInt<2>, SDTCisInt<3>]>;
@@ -405,7 +409,8 @@
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
-def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
+def X86VFixupimmScalar : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRounds>;
def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0ce341a1..0bedb66 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -30,8 +30,8 @@
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC,
- TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK,
- CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
+ TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
+ FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
struct IntrinsicData {
@@ -810,6 +810,14 @@
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
@@ -1842,6 +1850,22 @@
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_512, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_128, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_256, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_512, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMMS_MASKZ,
+ X86ISD::VFIXUPIMMS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
+ X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,