[AVX512] Remove masked palignr intrinsics and auto-upgrade them to native IR of vector shuffle and select.
llvm-svn: 271872
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 99aa237..85ef8e1 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -218,6 +218,7 @@
Name.startswith("x86.avx512.mask.load.q.") ||
Name == "x86.sse42.crc32.64.8" ||
Name.startswith("x86.avx.vbroadcast.s") ||
+ Name.startswith("x86.avx512.mask.palignr.") ||
Name.startswith("x86.sse2.psll.dq") ||
Name.startswith("x86.sse2.psrl.dq") ||
Name.startswith("x86.avx2.psll.dq") ||
@@ -366,6 +367,53 @@
return Builder.CreateBitCast(Res, ResultTy, "cast");
}
+static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
+ Value *Op0, Value *Op1, Value *Shift,
+ Value *Passthru, Value *Mask) {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
+
+ unsigned NumElts = Op0->getType()->getVectorNumElements();
+ assert(NumElts % 16 == 0);
+
+ // If palignr is shifting the pair of vectors more than the size of two
+ // lanes, emit zero.
+ if (ShiftVal >= 32)
+ return llvm::Constant::getNullValue(Op0->getType());
+
+ // If palignr is shifting the pair of input vectors more than one lane,
+ // but less than two lanes, convert to shifting in zeroes.
+ if (ShiftVal > 16) {
+ ShiftVal -= 16;
+ Op1 = Op0;
+ Op0 = llvm::Constant::getNullValue(Op0->getType());
+ }
+
+ int Indices[64];
+ // 256-bit palignr operates on 128-bit lanes so we need to handle that
+ for (unsigned l = 0; l != NumElts; l += 16) {
+ for (unsigned i = 0; i != 16; ++i) {
+ unsigned Idx = ShiftVal + i;
+ if (Idx >= 16)
+ Idx += NumElts - 16; // End of lane, switch operand.
+ Indices[l + i] = Idx + l;
+ }
+ }
+
+ Value *Align = Builder.CreateShuffleVector(Op1, Op0,
+ makeArrayRef(Indices, NumElts),
+ "palignr");
+
+ // If the mask is all ones just emit the align operation.
+ if (const auto *C = dyn_cast<Constant>(Mask))
+ if (C->isAllOnesValue())
+ return Align;
+
+ llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
+ NumElts);
+ Mask = Builder.CreateBitCast(Mask, MaskTy, "cast");
+ return Builder.CreateSelect(Mask, Align, Passthru);
+}
+
// Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
@@ -725,6 +773,12 @@
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
Constant::getNullValue(MaskTy));
+ } else if (Name.startswith("llvm.x86.avx512.mask.palignr.")) {
+ Rep = UpgradeX86PALIGNRIntrinsics(Builder, C, CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ CI->getArgOperand(2),
+ CI->getArgOperand(3),
+ CI->getArgOperand(4));
} else if (Name == "llvm.x86.sse2.psll.dq" ||
Name == "llvm.x86.avx2.psll.dq") {
// 128/256-bit shift left specified in bits.