[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 (reapplied)
As reported on PR26235, we don't currently make use of the VBROADCASTF128/VBROADCASTI128 instructions (or the AVX512 equivalents) to load+splat a 128-bit vector to both lanes of a 256-bit vector.
This patch enables lowering from subvector insertion/concatenation patterns and auto-upgrades the llvm.x86.avx.vbroadcastf128.pd.256 / llvm.x86.avx.vbroadcastf128.ps.256 intrinsics to match.
We could possibly investigate using VBROADCASTF128/VBROADCASTI128 to load repeated constants as well (similar to how we already do for scalar broadcasts).
Reapplied with fix for PR28657 - removed intrinsic definitions (clang companion patch to be be submitted shortly).
Differential Revision: https://reviews.llvm.org/D22460
llvm-svn: 276416
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f8..a8145b6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -296,6 +296,7 @@
Name.startswith("avx.blend.p") ||
Name == "avx2.pblendw" ||
Name.startswith("avx2.pblendd.") ||
+ Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128" ||
Name == "xop.vpcmov" ||
(Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
@@ -886,7 +887,7 @@
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+ } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
@@ -918,15 +919,21 @@
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
- } else if (IsX86 && Name == "avx2.vbroadcasti128") {
- // Replace vbroadcasts with a vector shuffle.
- Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+ } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+ Name == "avx2.vbroadcasti128")) {
+ // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+ Type *EltTy = CI->getType()->getVectorElementType();
+ unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+ Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
Value *Load = Builder.CreateLoad(VT, Op);
- uint32_t Idxs[4] = { 0, 1, 0, 1 };
- Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- Idxs);
+ if (NumSrcElts == 2)
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 0, 1 });
+ else
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 2, 3, 0, 1, 2, 3 });
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||