[AMDGPU][MC] Added support of SCC, VCCZ and EXECZ operands

See bug 39292: https://bugs.llvm.org/show_bug.cgi?id=39292

Reviewers: rampitec, arsenm

Differential Revision: https://reviews.llvm.org/D62660

llvm-svn: 362400
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0e8a517..9dcbb59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -678,6 +678,15 @@
         case AMDGPU::TMA_HI:
           llvm_unreachable("trap handler registers should not be used");
 
+        case AMDGPU::SRC_VCCZ:
+          llvm_unreachable("src_vccz register should not be used");
+
+        case AMDGPU::SRC_EXECZ:
+          llvm_unreachable("src_execz register should not be used");
+
+        case AMDGPU::SRC_SCC:
+          llvm_unreachable("src_scc register should not be used");
+
         default:
           break;
         }
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bc7068e..3787952 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1658,6 +1658,10 @@
   case AMDGPU::SRC_PRIVATE_LIMIT:
   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
     return true;
+  case AMDGPU::SRC_VCCZ:
+  case AMDGPU::SRC_EXECZ:
+  case AMDGPU::SRC_SCC:
+    return true;
   default:
     return false;
   }
@@ -1723,7 +1727,12 @@
     .Case("lds_direct", AMDGPU::LDS_DIRECT)
     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
     .Case("m0", AMDGPU::M0)
-    .Case("scc", AMDGPU::SCC)
+    .Case("vccz", AMDGPU::SRC_VCCZ)
+    .Case("src_vccz", AMDGPU::SRC_VCCZ)
+    .Case("execz", AMDGPU::SRC_EXECZ)
+    .Case("src_execz", AMDGPU::SRC_EXECZ)
+    .Case("scc", AMDGPU::SRC_SCC)
+    .Case("src_scc", AMDGPU::SRC_SCC)
     .Case("tba", AMDGPU::TBA)
     .Case("tma", AMDGPU::TMA)
     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
@@ -3878,6 +3887,12 @@
   }
 
   switch (RegNo) {
+  case AMDGPU::SRC_SHARED_BASE:
+  case AMDGPU::SRC_SHARED_LIMIT:
+  case AMDGPU::SRC_PRIVATE_BASE:
+  case AMDGPU::SRC_PRIVATE_LIMIT:
+  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+    return !isCI() && !isSI() && !isVI();
   case AMDGPU::TBA:
   case AMDGPU::TBA_LO:
   case AMDGPU::TBA_HI:
@@ -3895,9 +3910,6 @@
     break;
   }
 
-  if (isInlineValue(RegNo))
-    return !isCI() && !isSI() && !isVI();
-
   if (isCI())
     return true;
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index b77deea..307f0cb 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -916,11 +916,9 @@
   case 237: return createRegOperand(SRC_PRIVATE_BASE);
   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
-    // ToDo: no support for vccz register
-  case 251: break;
-    // ToDo: no support for execz register
-  case 252: break;
-  case 253: return createRegOperand(SCC);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
   case 254: return createRegOperand(LDS_DIRECT);
   default: break;
   }
@@ -942,6 +940,9 @@
   case 237: return createRegOperand(SRC_PRIVATE_BASE);
   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
   default: break;
   }
   return errOperand(Val, "unknown operand encoding " + Twine(Val));
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f67904a..0e27063 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -281,8 +281,14 @@
   case AMDGPU::VCC:
     O << "vcc";
     return;
-  case AMDGPU::SCC:
-    O << "scc";
+  case AMDGPU::SRC_VCCZ:
+    O << "src_vccz";
+    return;
+  case AMDGPU::SRC_EXECZ:
+    O << "src_execz";
+    return;
+  case AMDGPU::SRC_SCC:
+    O << "src_scc";
     return;
   case AMDGPU::EXEC:
     O << "exec";
@@ -358,6 +364,8 @@
   case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
   case AMDGPU::PRIVATE_RSRC_REG:
     llvm_unreachable("pseudo-register should not ever be emitted");
+  case AMDGPU::SCC:
+    llvm_unreachable("pseudo scc should not ever be emitted");
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 14f5dbe..c25be61 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2823,19 +2823,19 @@
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
 
-  // FLAT_SCR is just an SGPR pair.
-  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
-    return true;
-
-  // EXEC register uses the constant bus.
-  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
-    return true;
+  // Null is free
+  if (MO.getReg() == AMDGPU::SGPR_NULL)
+    return false;
 
   // SGPRs use the constant bus
-  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
-          (!MO.isImplicit() &&
-           (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
-            AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
+  if (MO.isImplicit()) {
+    return MO.getReg() == AMDGPU::M0 ||
+           MO.getReg() == AMDGPU::VCC ||
+           MO.getReg() == AMDGPU::VCC_LO;
+  } else {
+    return AMDGPU::SReg_32RegClass.contains(MO.getReg()) ||
+           AMDGPU::SReg_64RegClass.contains(MO.getReg());
+  }
 }
 
 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b503af4..341a88f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -154,6 +154,11 @@
   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
   reserveRegisterTuples(Reserved, AMDGPU::M0);
 
+  // Reserve src_vccz, src_execz, src_scc.
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
+
   // Reserve the memory aperture registers.
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 0a7962e..2605487 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -69,7 +69,16 @@
   let HWEncoding = 126;
 }
 
-def SCC : SIReg<"scc", 253>;
+// 32-bit real registers, for MC only.
+// May be used with both 32-bit and 64-bit operands.
+def SRC_VCCZ : SIReg<"src_vccz", 251>;
+def SRC_EXECZ : SIReg<"src_execz", 252>;
+def SRC_SCC : SIReg<"src_scc", 253>;
+
+// 1-bit pseudo register, for codegen only.
+// Should never be emitted.
+def SCC : SIReg<"">;
+
 def M0 : SIReg <"m0", 124>;
 def SGPR_NULL : SIReg<"null", 125>;
 
@@ -448,7 +457,8 @@
 def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
   (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
    SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
-   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID)> {
+   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+   SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
   let AllocationPriority = 8;
 }