[X86] Use native shuffle vector for the perm2f128 intrinsics
This patch replaces the perm2f128 intrinsics with native shuffle vectors.
This uses a pretty simple approach to allocate source 0 to the lower half input and source 1 to the upper half input. Then its just a matter of filling in the indices to use either the lower or upper half of that specific source. This can result in the same source being used by both operands. InstCombine or SelectionDAGBuilder should be able to clean that up.
Differential Revision: https://reviews.llvm.org/D37892
llvm-svn: 313418
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4240d2d..3e353ea 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7923,6 +7923,45 @@
return EmitX86Select(*this, Ops[4], Align, Ops[3]);
}
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+
+ // This takes a very simple approach since there are two lanes and a
+ // shuffle can have 2 inputs. So we reserve the first input for the first
+ // lane and the second input for the second lane. This may result in
+ // duplicate sources, but this can be dealt with in the backend.
+
+ Value *OutOps[2];
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != 2; ++l) {
+ // Determine the source for this lane.
+ if (Imm & (1 << ((l * 4) + 3)))
+ OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
+ else if (Imm & (1 << ((l * 4) + 1)))
+ OutOps[l] = Ops[1];
+ else
+ OutOps[l] = Ops[0];
+
+ for (unsigned i = 0; i != NumElts/2; ++i) {
+ // Start with ith element of the source for this lane.
+ unsigned Idx = (l * NumElts) + i;
+ // If bit 0 of the immediate half is set, switch to the high half of
+ // the source.
+ if (Imm & (1 << (l * 4)))
+ Idx += NumElts/2;
+ Indices[(l * (NumElts/2)) + i] = Idx;
+ }
+ }
+
+ return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
+ makeArrayRef(Indices, NumElts),
+ "vperm");
+ }
+
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd: