Add support for selecting AVX2 vpshuflw and vpshufhw. Add decoding support for AsmPrinter.
llvm-svn: 155982
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index f532019..dbee886 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -96,7 +96,17 @@
case X86::PSHUFHWmi:
case X86::VPSHUFHWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ DecodePSHUFHWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFHWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFHWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSHUFLWri:
@@ -106,7 +116,17 @@
case X86::PSHUFLWmi:
case X86::VPSHUFLWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ DecodePSHUFLWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFLWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFLWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index a802333..a1f2424 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -70,7 +70,7 @@
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
- int NewImm = Imm;
+ unsigned NewImm = Imm;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + l);
@@ -80,26 +80,38 @@
}
}
-void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
- ShuffleMask.push_back(0);
- ShuffleMask.push_back(1);
- ShuffleMask.push_back(2);
- ShuffleMask.push_back(3);
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back(4+(Imm & 3));
- Imm >>= 2;
+void DecodePSHUFHWMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumElts = 8 * NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + 4 + (NewImm & 3));
+ NewImm >>= 2;
+ }
}
}
-void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back((Imm & 3));
- Imm >>= 2;
+void DecodePSHUFLWMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumElts = 8 * NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
}
- ShuffleMask.push_back(4);
- ShuffleMask.push_back(5);
- ShuffleMask.push_back(6);
- ShuffleMask.push_back(7);
}
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
@@ -111,7 +123,7 @@
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
- int NewImm = Imm;
+ unsigned NewImm = Imm;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
// Part that reads from dest.
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
@@ -176,9 +188,9 @@
unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
- for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+ for (unsigned i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
ShuffleMask.push_back(i);
- for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+ for (unsigned i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
ShuffleMask.push_back(i);
}
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 5b8c6ef..14545e7 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -37,9 +37,9 @@
void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFHWMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFLWMask(EVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8005e23..7ab4b26 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3196,8 +3196,8 @@
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) {
- if (VT != MVT::v8i16)
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+ if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
return false;
// Lower quadword copied in order or undef.
@@ -3206,16 +3206,27 @@
// Upper quadword shuffled.
for (unsigned i = 4; i != 8; ++i)
- if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ if (!isUndefOrInRange(Mask[i], 4, 8))
return false;
+ if (VT == MVT::v16i16) {
+ // Lower quadword copied in order or undef.
+ if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
+ return false;
+
+ // Upper quadword shuffled.
+ for (unsigned i = 12; i != 16; ++i)
+ if (!isUndefOrInRange(Mask[i], 12, 16))
+ return false;
+ }
+
return true;
}
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) {
- if (VT != MVT::v8i16)
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+ if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
return false;
// Upper quadword copied in order.
@@ -3224,9 +3235,20 @@
// Lower quadword shuffled.
for (unsigned i = 0; i != 4; ++i)
- if (Mask[i] >= 4)
+ if (!isUndefOrInRange(Mask[i], 0, 4))
return false;
+ if (VT == MVT::v16i16) {
+ // Upper quadword copied in order.
+ if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
+ return false;
+
+ // Lower quadword shuffled.
+ for (unsigned i = 8; i != 12; ++i)
+ if (!isUndefOrInRange(Mask[i], 8, 12))
+ return false;
+ }
+
return true;
}
@@ -4405,12 +4427,12 @@
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
@@ -6581,12 +6603,12 @@
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
- if (isPSHUFHWMask(M, VT))
+ if (isPSHUFHWMask(M, VT, HasAVX2))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
- if (isPSHUFLWMask(M, VT))
+ if (isPSHUFLWMask(M, VT, HasAVX2))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
@@ -11376,8 +11398,8 @@
isMOVLMask(M, VT) ||
isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT) ||
- isPSHUFLWMask(M, VT) ||
+ isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
+ isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
isPALIGNRMask(M, VT, Subtarget) ||
isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||