[AMDGPU] Produce madak and madmk from the two-address pass
These two instructions are normally selected, but when the
two address pass converts mac into mad we end up with the
mad where we could have one of these.
Differential Revision: https://reviews.llvm.org/D37389
llvm-svn: 312928
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f7f6d52..e936070 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2083,6 +2083,19 @@
return false;
}
+static int64_t getFoldableImm(const MachineOperand* MO) {
+ if (!MO->isReg())
+ return false;
+ const MachineFunction *MF = MO->getParent()->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto Def = MRI.getUniqueVRegDef(MO->getReg());
+ if (Def && (Def->getOpcode() == AMDGPU::S_MOV_B32 ||
+ Def->getOpcode() == AMDGPU::V_MOV_B32_e32) &&
+ Def->getOperand(1).isImm())
+ return Def->getOperand(1).getImm();
+ return AMDGPU::NoRegister;
+}
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
MachineInstr &MI,
LiveVariables *LV) const {
@@ -2120,6 +2133,35 @@
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (!Src0Mods && !Src1Mods && !Clamp && !Omod) {
+ if (auto Imm = getFoldableImm(Src2)) {
+ return BuildMI(*MBB, MI, MI.getDebugLoc(),
+ get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
+ .add(*Dst)
+ .add(*Src0)
+ .add(*Src1)
+ .addImm(Imm);
+ }
+ if (auto Imm = getFoldableImm(Src1)) {
+ return BuildMI(*MBB, MI, MI.getDebugLoc(),
+ get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
+ .add(*Dst)
+ .add(*Src0)
+ .addImm(Imm)
+ .add(*Src2);
+ }
+ if (auto Imm = getFoldableImm(Src0)) {
+ if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
+ AMDGPU::OpName::src0), Src1))
+ return BuildMI(*MBB, MI, MI.getDebugLoc(),
+ get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
+ .add(*Dst)
+ .add(*Src1)
+ .addImm(Imm)
+ .add(*Src2);
+ }
+ }
+
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
.add(*Dst)
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 77c35fa..cc08333 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -34,7 +34,7 @@
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
-; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
+; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]]
; GCN: s_endpgm
define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
new file mode 100644
index 0000000..ebda1d2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir
@@ -0,0 +1,110 @@
+# RUN: llc -march=amdgcn %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: test_madmk_reg_imm_f32
+# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec
+---
+name: test_madmk_reg_imm_f32
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit %exec
+ %3 = V_MAC_F32_e32 killed %0.sub0, %2, killed %1, implicit %exec
+
+...
+
+# GCN-LABEL: name: test_madmk_imm_reg_f32
+# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit %exec
+---
+name: test_madmk_imm_reg_f32
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit %exec
+ %3 = V_MAC_F32_e32 %2, killed %0.sub0, killed %1, implicit %exec
+
+...
+
+# GCN-LABEL: name: test_madak_f32
+# GCN: V_MADAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit %exec
+---
+name: test_madak_f32
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = V_MOV_B32_e32 1078523331, implicit %exec
+ %2 = V_MAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit %exec
+
+...
+
+# GCN-LABEL: name: test_madmk_reg_imm_f16
+# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec
+---
+name: test_madmk_reg_imm_f16
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit %exec
+ %3 = V_MAC_F16_e32 killed %0.sub0, %2, killed %1, implicit %exec
+
+...
+
+# GCN-LABEL: name: test_madmk_imm_reg_f16
+# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit %exec
+---
+name: test_madmk_imm_reg_f16
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = COPY %0.sub1
+ %2 = V_MOV_B32_e32 1078523331, implicit %exec
+ %3 = V_MAC_F16_e32 %2, killed %0.sub0, killed %1, implicit %exec
+
+...
+
+# GCN-LABEL: name: test_madak_f16
+# GCN: V_MADAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit %exec
+---
+name: test_madak_f16
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+
+ %0 = IMPLICIT_DEF
+ %1 = V_MOV_B32_e32 1078523331, implicit %exec
+ %2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit %exec
+...
diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index 0148ff4..ab47cc9 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -23,9 +23,9 @@
}
; GCN-LABEL: {{^}}madak_f16_use_2
-; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; SI: v_madak_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x41200000
; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; VI: v_madak_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x4900
; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @madak_f16_use_2(