[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary:
1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it.
2. There were several problems with support of VOPC instructions in SDWA peephole pass.
Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl
Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye
Differential Revision: https://reviews.llvm.org/D34626
llvm-svn: 306413
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4ac23ef..e2ac663 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -627,10 +627,13 @@
return false;
}
- if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ if (!ST.hasSDWAOutModsVOPC() &&
+ (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
return false;
- } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+ } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
+ !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
return false;
}
@@ -649,25 +652,24 @@
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
assert(SDWAOpcode != -1);
- // Copy dst, if it is present in original then should also be present in SDWA
- MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
- if (!Dst && !TII->isVOPC(MI))
- return false;
-
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
- } else {
- Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
assert(Dst &&
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -714,20 +716,22 @@
}
// Copy omod if present, initialize otherwise if needed
- MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
- if (OMod) {
- assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
- SDWAInst.add(*OMod);
- } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
- SDWAInst.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+ MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod) {
+ SDWAInst.add(*OMod);
+ } else {
+ SDWAInst.addImm(0);
+ }
}
- // Initialize dst_sel and dst_unused if present
- if (Dst) {
- assert(
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
- AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+ // Initialize dst_sel if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ }
+
+ // Initialize dst_unused if present
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
}