[X86] Fold masking into subvector extract builtins.
I'm looking into making the select builtins require avx512f, avx512bw, or avx512vl since masking operations generally require those features.
The extract builtins are funny because the 512-bit versions return a 128 or 256 bit vector with masking even when avx512vl is not supported.
llvm-svn: 334330
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ac9f46c..012428d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9239,18 +9239,18 @@
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4:
- case X86::BI__builtin_ia32_extractf32x4:
- case X86::BI__builtin_ia32_extracti64x4:
- case X86::BI__builtin_ia32_extracti32x4:
- case X86::BI__builtin_ia32_extractf32x8:
- case X86::BI__builtin_ia32_extracti32x8:
- case X86::BI__builtin_ia32_extractf32x4_256:
- case X86::BI__builtin_ia32_extracti32x4_256:
- case X86::BI__builtin_ia32_extractf64x2_256:
- case X86::BI__builtin_ia32_extracti64x2_256:
- case X86::BI__builtin_ia32_extractf64x2_512:
- case X86::BI__builtin_ia32_extracti64x2_512: {
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask: {
llvm::Type *DstTy = ConvertType(E->getType());
unsigned NumElts = DstTy->getVectorNumElements();
unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts;
@@ -9259,10 +9259,15 @@
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + Index;
- return Builder.CreateShuffleVector(Ops[0],
- UndefValue::get(Ops[0]->getType()),
- makeArrayRef(Indices, NumElts),
- "extract");
+ Value *Res = Builder.CreateShuffleVector(Ops[0],
+ UndefValue::get(Ops[0]->getType()),
+ makeArrayRef(Indices, NumElts),
+ "extract");
+
+ if (Ops.size() == 4)
+ Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
+
+ return Res;
}
case X86::BI__builtin_ia32_vinsertf128_pd256:
case X86::BI__builtin_ia32_vinsertf128_ps256: