[AMDGPU] Untangle SDWA pass from SIShrinkInstructions Remove dependency of SDWA pass on SIShrinkInstructions. The goal is to move SDWA even higher in the stack to avoid second run of MachineLICM, MachineCSE and SIFoldOperands. Also added handling to preserve original src modifiers. Differential Revision: https://reviews.llvm.org/D33860 llvm-svn: 304665

commit: 03306604030551c4372e973b8038087001e580f3 [log] [tgz]
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Sat Jun 03 17:39:47 2017 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> Sat Jun 03 17:39:47 2017 +0000
tree: df2e2ff4653953246b82ce1a8007ae0c5f16a437
parent: 2fdf185beaab02a21374d4339bc9a7fd3ae5c49d [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index fae249b..522cc0a 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

@@ -129,7 +129,8 @@
   bool getNeg() const { return Neg; }
   bool getSext() const { return Sext; }
 
-  uint64_t getSrcMods() const;
+  uint64_t getSrcMods(const SIInstrInfo *TII,
+                      const MachineOperand *SrcOp) const;
 };
 
 class SDWADstOperand : public SDWAOperand {
@@ -240,13 +241,24 @@
   return SuperMask.all();
 }
 
-uint64_t SDWASrcOperand::getSrcMods() const {
+uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
+                                    const MachineOperand *SrcOp) const {
   uint64_t Mods = 0;
+  const auto *MI = SrcOp->getParent();
+  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  }
   if (Abs || Neg) {
     assert(!Sext &&
            "Float and integer src modifiers can't be set simulteniously");
     Mods |= Abs ? SISrcMods::ABS : 0;
-    Mods |= Neg ? SISrcMods::NEG : 0;
+    Mods ^= Neg ? SISrcMods::NEG : 0;
   } else if (Sext) {
     Mods |= SISrcMods::SEXT;
   }
@@ -312,7 +324,7 @@
   }
   copyRegOperand(*Src, *getTargetOperand());
   SrcSel->setImm(getSrcSel());
-  SrcMods->setImm(getSrcMods());
+  SrcMods->setImm(getSrcMods(TII, Src));
   getTargetOperand()->setIsKill(false);
   return true;
 }
@@ -409,7 +421,10 @@
       switch (Opcode) {
       case AMDGPU::V_LSHRREV_B32_e32:
       case AMDGPU::V_ASHRREV_I32_e32:
-      case AMDGPU::V_LSHLREV_B32_e32: {
+      case AMDGPU::V_LSHLREV_B32_e32:
+      case AMDGPU::V_LSHRREV_B32_e64:
+      case AMDGPU::V_ASHRREV_I32_e64:
+      case AMDGPU::V_LSHLREV_B32_e64: {
         // from: v_lshrrev_b32_e32 v1, 16/24, v0
         // to SDWA src:v0 src_sel:WORD_1/BYTE_3
 
@@ -432,7 +447,8 @@
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B32_e64) {
           auto SDWADst = make_unique<SDWADstOperand>(
               Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -441,7 +457,8 @@
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B32_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -451,7 +468,10 @@
 
       case AMDGPU::V_LSHRREV_B16_e32:
       case AMDGPU::V_ASHRREV_I16_e32:
-      case AMDGPU::V_LSHLREV_B16_e32: {
+      case AMDGPU::V_LSHLREV_B16_e32:
+      case AMDGPU::V_LSHRREV_B16_e64:
+      case AMDGPU::V_ASHRREV_I16_e64:
+      case AMDGPU::V_LSHLREV_B16_e64: {
         // from: v_lshrrev_b16_e32 v1, 8, v0
         // to SDWA src:v0 src_sel:BYTE_1
 
@@ -472,7 +492,8 @@
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B16_e64) {
           auto SDWADst =
             make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -481,7 +502,8 @@
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, BYTE_1, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B16_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -549,20 +571,25 @@
         ++NumSDWAPatternsFound;
         break;
       }
-      case AMDGPU::V_AND_B32_e32: {
+      case AMDGPU::V_AND_B32_e32:
+      case AMDGPU::V_AND_B32_e64: {
         // e.g.:
         // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
         // to SDWA src:v0 src_sel:WORD_0/BYTE_0
 
         MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
-        auto Imm = foldToImm(*Src0);
-        if (!Imm)
-          break;
-
-        if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
-          break;
-
         MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+        auto ValSrc = Src1;
+        auto Imm = foldToImm(*Src0);
+
+        if (!Imm) {
+          Imm = foldToImm(*Src1);
+          ValSrc = Src0;
+        }
+
+        if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
+          break;
+
         MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
       
         if (TRI->isPhysicalRegister(Src1->getReg()) ||
@@ -570,7 +597,7 @@
           break;
 
         auto SDWASrc = make_unique<SDWASrcOperand>(
-            Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+            ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
         DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
         SDWAOperands[&MI] = std::move(SDWASrc);
         ++NumSDWAPatternsFound;
@@ -583,28 +610,38 @@
 
 bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
   // Check if this instruction has opcode that supports SDWA
-  return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
+  unsigned Opc = MI.getOpcode();
+  if (AMDGPU::getSDWAOp(Opc) != -1)
+    return true;
+  int Opc32 = AMDGPU::getVOPe32(Opc);
+  if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
+    return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+           !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  return false;
 }
 
 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
                                    const SDWAOperandsVector &SDWAOperands) {
   // Convert to sdwa
   int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+  if (SDWAOpcode == -1)
+    SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
   assert(SDWAOpcode != -1);
 
+  // Copy dst, if it is present in original then should also be present in SDWA
+  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  if (!Dst && !TII->isVOPC(MI))
+    return false;
+
   const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
 
   // Create SDWA version of instruction MI and initialize its operands
   MachineInstrBuilder SDWAInst =
     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
 
-  // Copy dst, if it is present in original then should also be present in SDWA
-  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
   if (Dst) {
     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
     SDWAInst.add(*Dst);
-  } else {
-    assert(TII->isVOPC(MI));
   }
 
   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +651,10 @@
     Src0 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
-  SDWAInst.addImm(0);
+  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
+    SDWAInst.addImm(Mod->getImm());
+  else
+    SDWAInst.addImm(0);
   SDWAInst.add(*Src0);
 
   // Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +663,11 @@
     assert(
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
-    SDWAInst.addImm(0);
+    if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
+      SDWAInst.addImm(Mod->getImm());
+    else
+      SDWAInst.addImm(0);
     SDWAInst.add(*Src1);
-  } else {
-    assert(TII->isVOP1(MI));
   }
 
   if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
commit	03306604030551c4372e973b8038087001e580f3	[log] [tgz]
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Sat Jun 03 17:39:47 2017 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	Sat Jun 03 17:39:47 2017 +0000
tree	df2e2ff4653953246b82ce1a8007ae0c5f16a437
parent	2fdf185beaab02a21374d4339bc9a7fd3ae5c49d [diff] [blame]