In the common case where we are shuffling a vector, emit an
llvm vector shuffle instead of a bunch of insert/extract operations.
For: vec4 = vec4.yyyy; // splat
Emit:
%tmp1 = shufflevector <4 x float> %tmp, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
instead of:
%tmp1 = extractelement <4 x float> %tmp, i32 1
%tmp2 = insertelement <4 x float> undef, float %tmp1, i32 0
%tmp3 = extractelement <4 x float> %tmp, i32 1
%tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
%tmp5 = extractelement <4 x float> %tmp, i32 1
%tmp6 = insertelement <4 x float> %tmp4, float %tmp5, i32 2
%tmp7 = extractelement <4 x float> %tmp, i32 1
%tmp8 = insertelement <4 x float> %tmp6, float %tmp7, i32 3
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@40779 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/CodeGen/CGExpr.cpp b/CodeGen/CGExpr.cpp
index 178107e..dad4b86 100644
--- a/CodeGen/CGExpr.cpp
+++ b/CodeGen/CGExpr.cpp
@@ -298,14 +298,31 @@
return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp"));
}
+ // If the source and destination have the same number of elements, use a
+ // vector shuffle instead of insert/extracts.
+ unsigned NumResultElts = cast<VectorType>(ExprType)->getNumElements();
+ unsigned NumSourceElts =
+ cast<llvm::VectorType>(Vec->getType())->getNumElements();
- unsigned NumElts = cast<VectorType>(ExprType)->getNumElements();
+ if (NumResultElts == NumSourceElts) {
+ llvm::SmallVector<llvm::Constant*, 4> Mask;
+ for (unsigned i = 0; i != NumResultElts; ++i) {
+ unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
+ Mask.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx));
+ }
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(&Mask[0], Mask.size());
+ Vec = Builder.CreateShuffleVector(Vec,
+ llvm::UndefValue::get(Vec->getType()),
+ MaskV, "tmp");
+ return RValue::get(Vec);
+ }
// Start out with an undef of the result type.
llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType));
// Extract/Insert each element of the result.
- for (unsigned i = 0; i != NumElts; ++i) {
+ for (unsigned i = 0; i != NumResultElts; ++i) {
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx);
Elt = Builder.CreateExtractElement(Vec, Elt, "tmp");