[AMDGPU][MC][GFX9] Added support of operands shared_base, shared_limit, private_base, private_limit, pops_exiting_wave_id
See bug 39297: https://bugs.llvm.org/show_bug.cgi?id=39297
Reviewers: artem.tamazov, arsenm, rampitec
Differential Revision: https://reviews.llvm.org/D59290
llvm-svn: 356561
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e8f0a2b..4f9e643 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -659,6 +659,9 @@
case AMDGPU::SRC_PRIVATE_LIMIT:
continue;
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ llvm_unreachable("src_pops_exiting_wave_id should not be used");
+
case AMDGPU::NoRegister:
assert(MI.isDebugInstr());
continue;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c5b3f34..6db22f1 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -344,6 +344,8 @@
bool isRegClass(unsigned RCID) const;
+ bool isInlineValue() const;
+
bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
}
@@ -1271,6 +1273,15 @@
}
bool AMDGPUOperand::isInlinableImm(MVT type) const {
+
+ // This is a hack to enable named inline values like
+ // shared_base with both 32-bit and 64-bit operands.
+ // Note that these values are defined as
+ // 32-bit operands only.
+ if (isInlineValue()) {
+ return true;
+ }
+
if (!isImmTy(ImmTyNone)) {
// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
return false;
@@ -1579,6 +1590,23 @@
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
}
+static bool isInlineValue(unsigned Reg) {
+ switch (Reg) {
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AMDGPUOperand::isInlineValue() const {
+ return isRegKind() && ::isInlineValue(getReg());
+}
+
//===----------------------------------------------------------------------===//
// AsmParser
//===----------------------------------------------------------------------===//
@@ -1622,6 +1650,16 @@
.Case("vcc", AMDGPU::VCC)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("xnack_mask", AMDGPU::XNACK_MASK)
+ .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+ .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
.Case("lds_direct", AMDGPU::LDS_DIRECT)
.Case("src_lds_direct", AMDGPU::LDS_DIRECT)
.Case("m0", AMDGPU::M0)
@@ -3391,6 +3429,9 @@
break;
}
+ if (isInlineValue(RegNo))
+ return !isCI() && !isSI() && !isVI();
+
if (isCI())
return true;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 7db266f..a75ce72 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -779,10 +779,10 @@
case 105: return createRegOperand(XNACK_MASK_HI);
case 106: return createRegOperand(VCC_LO);
case 107: return createRegOperand(VCC_HI);
- case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
- case 109: assert(!isGFX9()); return createRegOperand(TBA_HI);
- case 110: assert(!isGFX9()); return createRegOperand(TMA_LO);
- case 111: assert(!isGFX9()); return createRegOperand(TMA_HI);
+ case 108: return createRegOperand(TBA_LO);
+ case 109: return createRegOperand(TBA_HI);
+ case 110: return createRegOperand(TMA_LO);
+ case 111: return createRegOperand(TMA_HI);
case 124: return createRegOperand(M0);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
@@ -790,7 +790,7 @@
case 236: return createRegOperand(SRC_SHARED_LIMIT);
case 237: return createRegOperand(SRC_PRIVATE_BASE);
case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
- // TODO: SRC_POPS_EXITING_WAVE_ID
+ case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
// ToDo: no support for vccz register
case 251: break;
// ToDo: no support for execz register
@@ -809,9 +809,14 @@
case 102: return createRegOperand(FLAT_SCR);
case 104: return createRegOperand(XNACK_MASK);
case 106: return createRegOperand(VCC);
- case 108: assert(!isGFX9()); return createRegOperand(TBA);
- case 110: assert(!isGFX9()); return createRegOperand(TMA);
+ case 108: return createRegOperand(TBA);
+ case 110: return createRegOperand(TMA);
case 126: return createRegOperand(EXEC);
+ case 235: return createRegOperand(SRC_SHARED_BASE);
+ case 236: return createRegOperand(SRC_SHARED_LIMIT);
+ case 237: return createRegOperand(SRC_PRIVATE_BASE);
+ case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+ case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
default: break;
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 28a1cf7..be1ab48 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -268,6 +268,21 @@
case AMDGPU::XNACK_MASK:
O << "xnack_mask";
return;
+ case AMDGPU::SRC_SHARED_BASE:
+ O << "src_shared_base";
+ return;
+ case AMDGPU::SRC_SHARED_LIMIT:
+ O << "src_shared_limit";
+ return;
+ case AMDGPU::SRC_PRIVATE_BASE:
+ O << "src_private_base";
+ return;
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ O << "src_private_limit";
+ return;
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ O << "src_pops_exiting_wave_id";
+ return;
case AMDGPU::LDS_DIRECT:
O << "src_lds_direct";
return;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c3e13a6..c6af452 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -160,6 +160,9 @@
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+ // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
+
// Reserve xnack_mask registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 5f04a8c..551ffcb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -74,6 +74,7 @@
def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
def LDS_DIRECT : SIReg <"lds_direct", 254>;
@@ -422,7 +423,7 @@
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID)> {
let AllocationPriority = 7;
}