[AMDGPU][MC][GFX8][GFX9] Added XNACK_MASK support

See bug 35764: https://bugs.llvm.org/show_bug.cgi?id=35764

Differential Revision: https://reviews.llvm.org/D41614

Reviewers: vpykhtin, artem.tamazov, arsenm
llvm-svn: 322189
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index fda6252..50d1d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -663,6 +663,11 @@
         case AMDGPU::FLAT_SCR_HI:
           continue;
 
+        case AMDGPU::XNACK_MASK:
+        case AMDGPU::XNACK_MASK_LO:
+        case AMDGPU::XNACK_MASK_HI:
+          llvm_unreachable("xnack_mask registers should not be used");
+
         case AMDGPU::TBA:
         case AMDGPU::TBA_LO:
         case AMDGPU::TBA_HI:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2e3a453..4a25711 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -896,6 +896,10 @@
     KernelScope.initialize(getContext());
   }
 
+  bool hasXNACK() const {
+    return AMDGPU::hasXNACK(getSTI());
+  }
+
   bool isSI() const {
     return AMDGPU::isSI(getSTI());
   }
@@ -1521,12 +1525,15 @@
     .Case("exec", AMDGPU::EXEC)
     .Case("vcc", AMDGPU::VCC)
     .Case("flat_scratch", AMDGPU::FLAT_SCR)
+    .Case("xnack_mask", AMDGPU::XNACK_MASK)
     .Case("m0", AMDGPU::M0)
     .Case("scc", AMDGPU::SCC)
     .Case("tba", AMDGPU::TBA)
     .Case("tma", AMDGPU::TMA)
     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
+    .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
     .Case("vcc_lo", AMDGPU::VCC_LO)
     .Case("vcc_hi", AMDGPU::VCC_HI)
     .Case("exec_lo", AMDGPU::EXEC_LO)
@@ -1564,6 +1571,11 @@
       RegWidth = 2;
       return true;
     }
+    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
+      Reg = AMDGPU::XNACK_MASK;
+      RegWidth = 2;
+      return true;
+    }
     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
       Reg = AMDGPU::VCC;
       RegWidth = 2;
@@ -2617,6 +2629,10 @@
   case AMDGPU::TMA_LO:
   case AMDGPU::TMA_HI:
     return !isGFX9();
+  case AMDGPU::XNACK_MASK:
+  case AMDGPU::XNACK_MASK_LO:
+  case AMDGPU::XNACK_MASK_HI:
+    return !isCI() && !isSI() && hasXNACK();
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 47a2d3f..ca7f2d1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -690,9 +690,8 @@
   switch (Val) {
   case 102: return createRegOperand(FLAT_SCR_LO);
   case 103: return createRegOperand(FLAT_SCR_HI);
-    // ToDo: no support for xnack_mask_lo/_hi register
-  case 104:
-  case 105: break;
+  case 104: return createRegOperand(XNACK_MASK_LO);
+  case 105: return createRegOperand(XNACK_MASK_HI);
   case 106: return createRegOperand(VCC_LO);
   case 107: return createRegOperand(VCC_HI);
   case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
@@ -722,6 +721,7 @@
 
   switch (Val) {
   case 102: return createRegOperand(FLAT_SCR);
+  case 104: return createRegOperand(XNACK_MASK);
   case 106: return createRegOperand(VCC);
   case 108: assert(!isGFX9()); return createRegOperand(TBA);
   case 110: assert(!isGFX9()); return createRegOperand(TMA);
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index bf57f88..eaa24f2 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -267,6 +267,9 @@
   case AMDGPU::FLAT_SCR:
     O << "flat_scratch";
     return;
+  case AMDGPU::XNACK_MASK:
+    O << "xnack_mask";
+    return;
   case AMDGPU::VCC_LO:
     O << "vcc_lo";
     return;
@@ -297,6 +300,12 @@
   case AMDGPU::FLAT_SCR_HI:
     O << "flat_scratch_hi";
     return;
+  case AMDGPU::XNACK_MASK_LO:
+    O << "xnack_mask_lo";
+    return;
+  case AMDGPU::XNACK_MASK_HI:
+    O << "xnack_mask_hi";
+    return;
   case AMDGPU::FP_REG:
   case AMDGPU::SP_REG:
   case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 65cdc13..21035a4 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -163,6 +163,9 @@
   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
 
+  // Reserve xnack_mask registers - support is not implemented in Codegen.
+  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
+
   // Reserve Trap Handler registers - support is not implemented in Codegen.
   reserveRegisterTuples(Reserved, AMDGPU::TBA);
   reserveRegisterTuples(Reserved, AMDGPU::TMA);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index dd0efef..0c93125 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -76,6 +76,16 @@
 def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
 def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
 
+def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
+def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
+
+def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+                 DwarfRegAlias<XNACK_MASK_LO> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 104;
+}
+
 // Trap handler registers
 def TBA_LO : SIReg<"tba_lo", 108>;
 def TBA_HI : SIReg<"tba_hi", 109>;
@@ -403,7 +413,7 @@
 // Subset of SReg_32 without M0 for SMRD instructions and alike.
 // See comments in SIInstructions.td for more info.
 def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+  (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
    TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
    SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
   let AllocationPriority = 7;
@@ -435,7 +445,7 @@
 }
 
 def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
-  (add SGPR_64, VCC, FLAT_SCR, TTMP_64, TBA, TMA)> {
+  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
   let CopyCost = 1;
   let AllocationPriority = 8;
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 125a3b2..0deb66b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -598,6 +598,10 @@
   }
 }
 
+bool hasXNACK(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
+}
+
 bool isSI(const MCSubtargetInfo &STI) {
   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a215b44..0c1d697 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -278,6 +278,8 @@
   }
 }
 
+bool hasXNACK(const MCSubtargetInfo &STI);
+
 bool isSI(const MCSubtargetInfo &STI);
 bool isCI(const MCSubtargetInfo &STI);
 bool isVI(const MCSubtargetInfo &STI);
diff --git a/llvm/test/MC/AMDGPU/xnack-mask.s b/llvm/test/MC/AMDGPU/xnack-mask.s
new file mode 100644
index 0000000..08ded06
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/xnack-mask.s
@@ -0,0 +1,30 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s 2>&1 | FileCheck -check-prefix=XNACKERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s | FileCheck -check-prefix=XNACK %s
+
+s_mov_b64 xnack_mask, -1
+// NOSICIVI: error: not a valid operand.
+// XNACK:    s_mov_b64 xnack_mask, -1 ; encoding: [0xc1,0x01,0xe8,0xbe]
+
+s_mov_b32 xnack_mask_lo, -1
+// NOSICIVI: error: not a valid operand.
+// XNACK:    s_mov_b32 xnack_mask_lo, -1 ; encoding: [0xc1,0x00,0xe8,0xbe]
+
+s_mov_b32 xnack_mask_hi, -1
+// NOSICIVI: error: not a valid operand.
+// XNACK:    s_mov_b32 xnack_mask_hi, -1 ; encoding: [0xc1,0x00,0xe9,0xbe]
+
+s_mov_b32 xnack_mask, -1
+// NOSICIVI: error: not a valid operand.
+// XNACKERR: error: invalid operand for instruction
+
+s_mov_b64 xnack_mask_lo, -1
+// NOSICIVI: error: not a valid operand.
+// XNACKERR: error: invalid operand for instruction
+
+s_mov_b64 xnack_mask_hi, -1
+// NOSICIVI: error: not a valid operand.
+// XNACKERR: error: invalid operand for instruction
diff --git a/llvm/test/MC/Disassembler/AMDGPU/sop1_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/sop1_vi.txt
index 749783d..308a416 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/sop1_vi.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/sop1_vi.txt
@@ -15,12 +15,21 @@
 # VI:   s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x00,0x80,0xbe,0xab,0x63,0x51,0xfe]
 0xff 0x00 0x80 0xbe 0xab 0x63 0x51 0xfe
 
+# VI:   s_mov_b32 xnack_mask_lo, -1 ; encoding: [0xc1,0x00,0xe8,0xbe]
+0xc1,0x00,0xe8,0xbe
+
+# VI:   s_mov_b32 xnack_mask_hi, -1 ; encoding: [0xc1,0x00,0xe9,0xbe]
+0xc1,0x00,0xe9,0xbe
+
 # VI:   s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x01,0x82,0xbe]
 0x04 0x01 0x82 0xbe
 
-# FIXME:   s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe]
+# VI:   s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe]
 0xc1 0x01 0x82 0xbe
 
+# VI: s_mov_b64 xnack_mask, -1 ; encoding: [0xc1,0x01,0xe8,0xbe]
+0xc1,0x01,0xe8,0xbe
+
 # VI:   s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x01,0x82,0xbe,0xff,0xff,0xff,0xff]
 0xff 0x01 0x82 0xbe 0xff 0xff 0xff 0xff
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt
index 35e363f..5c62162 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt
@@ -210,6 +210,9 @@
 # GFX9: v_mad_mix_f32 v5, flat_scratch_hi, v2, v3    ; encoding: [0x05,0x00,0xa0,0xd3,0x67,0x04,0x0e,0x04]
 0x05,0x00,0xa0,0xd3,0x67,0x04,0x0e,0x04
 
+# GFX9: v_mad_mix_f32 v5, xnack_mask_hi, v2, v3 ; encoding: [0x05,0x00,0xa0,0xd3,0x69,0x04,0x0e,0x04]
+0x05,0x00,0xa0,0xd3,0x69,0x04,0x0e,0x04
+
 # GFX9: v_mad_mix_f32 v5, vcc_lo, v2, v3    ; encoding: [0x05,0x00,0xa0,0xd3,0x6a,0x04,0x0e,0x04]
 0x05,0x00,0xa0,0xd3,0x6a,0x04,0x0e,0x04
 
@@ -665,3 +668,6 @@
 
 # GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]
 0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04
+
+# GFX9: v_add_f64 v[5:6], xnack_mask, v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x68,0x04,0x02,0x00]
+0x05,0x00,0x80,0xd2,0x68,0x04,0x02,0x00