AVX-512: Added shuffle instructions -
VPSHUFD, VPERMILPS, VMOVDDUP, VMOVLHPS, VMOVHLPS, VSHUFPS, VALIGN
single and double forms.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189215 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a00f848..6a7ca7d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3600,7 +3600,7 @@
return false;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
@@ -3683,10 +3683,7 @@
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool HasFp256,
- bool Commuted = false) {
- if (!HasFp256 && VT.is256BitVector())
- return false;
+static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3695,6 +3692,10 @@
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ bool symetricMaskRequired =
+ (VT.getSizeInBits() >= 256) && (EltSize == 32);
+
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
@@ -3714,6 +3715,7 @@
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
+ SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
@@ -3724,9 +3726,13 @@
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
- if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ if (!symetricMaskRequired || Idx < 0)
continue;
- if (!isUndefOrEqual(Idx, Mask[i]+l))
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Idx - l;
+ continue;
+ }
+ if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
@@ -4158,31 +4164,32 @@
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
- if (!HasFp256)
+static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
-
+ bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
- // Only match 256-bit with 32/64-bit types
- if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
- return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
+ // 2 or 4 elements in one lane
+
+ SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (NumElts != 8 || l == 0)
- continue;
- // VPERMILPS handling
- if (Mask[i] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
- return false;
+ if (symetricMaskRequired) {
+ if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
+ ExpectedMaskVal[i] = Mask[i+l] - l;
+ continue;
+ }
+ if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
+ return false;
+ }
}
}
-
return true;
}
@@ -4431,10 +4438,11 @@
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
MVT VT = SVOp->getSimpleValueType(0);
- unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+ unsigned EltSize = VT.is512BitVector() ? 1 :
+ VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
@@ -7407,7 +7415,7 @@
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -7430,7 +7438,7 @@
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasFp256))
+ if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
@@ -7449,8 +7457,8 @@
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasFp256)) {
- if (HasInt256 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT)) {
+ if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -13621,7 +13629,7 @@
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
- isSHUFPMask(M, SVT, Subtarget->hasFp256()) ||
+ isSHUFPMask(M, SVT) ||
isPSHUFDMask(M, SVT) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
@@ -13646,8 +13654,8 @@
if (NumElts == 4 && SVT.is128BitVector()) {
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
- isSHUFPMask(Mask, SVT, Subtarget->hasFp256()) ||
- isSHUFPMask(Mask, SVT, Subtarget->hasFp256(), /* Commuted */ true));
+ isSHUFPMask(Mask, SVT) ||
+ isSHUFPMask(Mask, SVT, /* Commuted */ true));
}
return false;
}