[AMDGPU][MC][VI][GFX9] Added support of SDWA/DPP for v_cndmask_b32
See bug 36356: https://bugs.llvm.org/show_bug.cgi?id=36356
Differential Revision: https://reviews.llvm.org/D45446
Reviewers: artem.tamazov, arsenm, timcorringham
llvm-svn: 330123
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 07ab6b6..648f97a 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -890,6 +890,10 @@
Opc == AMDGPU::V_MAC_F32_e32))
return false;
+ // FIXME: has SDWA but require handling of implicit VCC use
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32)
+ return false;
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ec762db..b012dd3 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -168,6 +168,10 @@
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
+ let AsmMatchConverter = "cvtSdwaVOP2b";
+ }
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -294,12 +298,30 @@
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
let Asm32 = "$vdst, $src0, $src1, vcc";
let Asm64 = "$vdst, $src0, $src1, $src2";
+ let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+ let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+ clampmod:$clamp,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+
+ let InsDPP = (ins DstRCDPP:$old,
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let HasExt = 1;
+ let HasSDWA9 = 1;
}
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -820,7 +842,7 @@
def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
}
-defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>;
+defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;