Make better use of instructions that clear high bits; fix various 2-wide shuffle bugs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45058 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c2f2736..ed1df4d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3138,8 +3138,6 @@
return V;
}
-/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is
-/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x>
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3562,17 +3560,35 @@
}
}
-/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones if possible. This can be done when every pair / quad of shuffle mask
-/// elements point to elements in the right sequence. e.g.
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
-SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
+SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
+ MVT::ValueType VT,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
- unsigned Scale = NumElems / 4;
- SmallVector<SDOperand, 4> MaskVec;
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT::ValueType NewVT = MaskVT;
+ switch (VT) {
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ default: assert(false && "Unexpected!");
+ }
+
+ if (NewWidth == 2)
+ if (MVT::isInteger(VT))
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ unsigned Scale = NumElems / NewWidth;
+ SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
@@ -3591,10 +3607,11 @@
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2);
- return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4));
+ V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
}
SDOperand
@@ -3626,6 +3643,35 @@
return PromoteSplat(Op, DAG);
}
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.Val)) {
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val) {
+ SDOperand NewV1 = NewOp.getOperand(0);
+ SDOperand NewV2 = NewOp.getOperand(1);
+ SDOperand NewMask = NewOp.getOperand(2);
+ if (isCommutedMOVL(NewMask.Val, true, false)) {
+ NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
+ NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
+ NewV1, NewV2, getMOVLMask(2, DAG));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
@@ -3654,6 +3700,7 @@
Commuted = true;
}
+ // FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
@@ -3735,13 +3782,6 @@
}
}
- // If the shuffle can be rewritten as a 4 wide shuffle, then do it!
- if (VT == MVT::v8i16 || VT == MVT::v16i8) {
- SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this);
- if (NewOp.Val)
- return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
- }
-
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);