[X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.
Someday maybe we'll use selects for all intrinsics.
llvm-svn: 332824
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d477f46..7db891f 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -182,8 +182,8 @@
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.cvtusi2sd" || // Added in 7.0
- Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
- Name == "avx512.mask.permvar.si.256" || // Added in 7.0
+ Name.startswith("avx512.mask.permvar.") || // Added in 7.0
+ Name.startswith("avx512.mask.permvar.") || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
@@ -1207,10 +1207,38 @@
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
- } else if (Name == "permvar.sf.256") {
- IID = Intrinsic::x86_avx2_permps;
- } else if (Name == "permvar.si.256") {
- IID = Intrinsic::x86_avx2_permd;
+ } else if (Name.startswith("permvar.")) {
+ bool IsFloat = CI.getType()->isFPOrFPVectorTy();
+ if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx2_permps;
+ else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx2_permd;
+ else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_df_256;
+ else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_di_256;
+ else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_sf_512;
+ else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_si_512;
+ else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_df_512;
+ else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_di_512;
+ else if (VecWidth == 128 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_128;
+ else if (VecWidth == 256 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_256;
+ else if (VecWidth == 512 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_512;
+ else if (VecWidth == 128 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_128;
+ else if (VecWidth == 256 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_256;
+ else if (VecWidth == 512 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
} else
return false;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1dfa6f7..442280a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20508,15 +20508,12 @@
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_2OP_MASK : {
+ case VPERM_2OP : {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
- SDValue PassThru = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
// Swap Src1 and Src2 in the node creation
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1),
- Mask, PassThru, Subtarget, DAG);
+ return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:{
@@ -20914,13 +20911,6 @@
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::x86_avx2_permd:
- case Intrinsic::x86_avx2_permps:
- // Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/instruction.
- return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(1));
-
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2078e1f..46a0efd 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -31,7 +31,7 @@
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
IFMA_OP_MASK, IFMA_OP_MASKZ,
- VPERM_2OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -406,6 +406,8 @@
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
+ X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -797,30 +799,6 @@
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_df_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_df_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_di_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_di_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_128, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_128, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
@@ -1420,10 +1398,20 @@
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP,
- X86ISD::VPMADDUBSW, 0),
- X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP,
- X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_di_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_128, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_128, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_sf_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_si_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4a01c11..2516179 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2785,30 +2785,22 @@
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
+ case Intrinsic::x86_avx512_permvar_df_256:
+ case Intrinsic::x86_avx512_permvar_df_512:
+ case Intrinsic::x86_avx512_permvar_di_256:
+ case Intrinsic::x86_avx512_permvar_di_512:
+ case Intrinsic::x86_avx512_permvar_hi_128:
+ case Intrinsic::x86_avx512_permvar_hi_256:
+ case Intrinsic::x86_avx512_permvar_hi_512:
+ case Intrinsic::x86_avx512_permvar_qi_128:
+ case Intrinsic::x86_avx512_permvar_qi_256:
+ case Intrinsic::x86_avx512_permvar_qi_512:
+ case Intrinsic::x86_avx512_permvar_sf_512:
+ case Intrinsic::x86_avx512_permvar_si_512:
if (Value *V = simplifyX86vpermv(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
- case Intrinsic::x86_avx512_mask_permvar_df_256:
- case Intrinsic::x86_avx512_mask_permvar_df_512:
- case Intrinsic::x86_avx512_mask_permvar_di_256:
- case Intrinsic::x86_avx512_mask_permvar_di_512:
- case Intrinsic::x86_avx512_mask_permvar_hi_128:
- case Intrinsic::x86_avx512_mask_permvar_hi_256:
- case Intrinsic::x86_avx512_mask_permvar_hi_512:
- case Intrinsic::x86_avx512_mask_permvar_qi_128:
- case Intrinsic::x86_avx512_mask_permvar_qi_256:
- case Intrinsic::x86_avx512_mask_permvar_qi_512:
- case Intrinsic::x86_avx512_mask_permvar_sf_512:
- case Intrinsic::x86_avx512_mask_permvar_si_512:
- if (Value *V = simplifyX86vpermv(*II, Builder)) {
- // We simplified the permuting, now create a select for the masking.
- V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
- Builder);
- return replaceInstUsesWith(*II, V);
- }
- break;
-
case Intrinsic::x86_avx_maskload_ps:
case Intrinsic::x86_avx_maskload_pd:
case Intrinsic::x86_avx_maskload_ps_256: