[AMDGPU] DAG combine to produce V_PERM_B32
Differential Revision: https://reviews.llvm.org/D48099
llvm-svn: 334559
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 81d1b4a..259fdaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4119,6 +4119,7 @@
NODE_NAME_CASE(MAD_I24)
NODE_NAME_CASE(MAD_I64_I32)
NODE_NAME_CASE(MAD_U64_U32)
+ NODE_NAME_CASE(PERM)
NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(EXPORT_DONE)
@@ -4374,6 +4375,34 @@
Known.Zero.setHighBits(32 - MaxValBits);
break;
}
+ case AMDGPUISD::PERM: {
+ ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ if (!CMask)
+ return;
+
+ KnownBits LHSKnown, RHSKnown;
+ DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
+ DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
+ unsigned Sel = CMask->getZExtValue();
+
+ for (unsigned I = 0; I < 32; I += 8) {
+ unsigned ByteMask = 0xff << I;
+ unsigned SelBits = Sel & 0xff;
+ if (SelBits < 4) {
+ Known.One |= RHSKnown.One & ByteMask;
+ Known.Zero |= RHSKnown.Zero & ByteMask;
+ } else if (SelBits < 7) {
+ Known.One |= LHSKnown.One & ByteMask;
+ Known.Zero |= LHSKnown.Zero & ByteMask;
+ } else if (SelBits == 0x0c) {
+ Known.Zero |= ByteMask;
+ } else if (SelBits > 0x0c) {
+ Known.One |= ByteMask;
+ }
+ Sel >>= 8;
+ }
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IID) {