[X86] Remove AVX512VBMI2 concat and shift intrinsics. Replace with target independent funnel shift intrinsics.
Differential Revision: https://reviews.llvm.org/D56377
llvm-svn: 350554
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 51fef27..bb0d4ab 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -285,6 +285,12 @@
Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
+ Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
+ Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
+ Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
+ Name.startswith("avx512.vpshld.") || // Added in 8.0
+ Name.startswith("avx512.vpshrd.") || // Added in 8.0
Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
@@ -980,6 +986,40 @@
return Res;
}
+static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
+ bool IsShiftRight, bool ZeroMask) {
+ Type *Ty = CI.getType();
+ Value *Op0 = CI.getArgOperand(0);
+ Value *Op1 = CI.getArgOperand(1);
+ Value *Amt = CI.getArgOperand(2);
+
+ if (IsShiftRight)
+ std::swap(Op0, Op1);
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+ // we only care about the lowest log2 bits anyway.
+ if (Amt->getType() != Ty) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+ Amt = Builder.CreateVectorSplat(NumElts, Amt);
+ }
+
+ Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+ Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
+
+ unsigned NumArgs = CI.getNumArgOperands();
+ if (NumArgs >= 4) { // For masked intrinsics.
+ Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
+ ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
+ CI.getArgOperand(0);
+ Value *Mask = CI.getOperand(NumArgs - 1);
+ Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+ }
+ return Res;
+}
+
static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
Value *Ptr, Value *Data, Value *Mask,
bool Aligned) {
@@ -1346,48 +1386,6 @@
IID = Intrinsic::x86_avx512_dbpsadbw_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("vpshld.")) {
- if (VecWidth == 128 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_128;
- else if (VecWidth == 128 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_128;
- else if (VecWidth == 128 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_128;
- else if (VecWidth == 256 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_256;
- else if (VecWidth == 256 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_256;
- else if (VecWidth == 256 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_256;
- else if (VecWidth == 512 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshld_q_512;
- else if (VecWidth == 512 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshld_d_512;
- else if (VecWidth == 512 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshld_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("vpshrd.")) {
- if (VecWidth == 128 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_128;
- else if (VecWidth == 128 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_128;
- else if (VecWidth == 128 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_128;
- else if (VecWidth == 256 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_256;
- else if (VecWidth == 256 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_256;
- else if (VecWidth == 256 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_256;
- else if (VecWidth == 512 && Name[7] == 'q')
- IID = Intrinsic::x86_avx512_vpshrd_q_512;
- else if (VecWidth == 512 && Name[7] == 'd')
- IID = Intrinsic::x86_avx512_vpshrd_d_512;
- else if (VecWidth == 512 && Name[7] == 'w')
- IID = Intrinsic::x86_avx512_vpshrd_w_512;
- else
- llvm_unreachable("Unexpected intrinsic");
} else
return false;
@@ -1960,6 +1958,16 @@
} else if (IsX86 && (Name.startswith("avx512.pror") ||
Name.startswith("avx512.mask.pror"))) {
Rep = upgradeX86Rotate(Builder, *CI, true);
+ } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
+ Name.startswith("avx512.mask.vpshld") ||
+ Name.startswith("avx512.maskz.vpshld"))) {
+ bool ZeroMask = Name[11] == 'z';
+ Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
+ } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
+ Name.startswith("avx512.mask.vpshrd") ||
+ Name.startswith("avx512.maskz.vpshrd"))) {
+ bool ZeroMask = Name[11] == 'z';
+ Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::x86_sse42_crc32_32_8);