[AMDGPU][MC][GFX9] Corrected encoding of ttmp registers, disabled tba/tma
See bugs 35494 and 35559:
https://bugs.llvm.org/show_bug.cgi?id=35494
https://bugs.llvm.org/show_bug.cgi?id=35559
Reviewers: vpykhtin, artem.tamazov, arsenm
Differential Revision: https://reviews.llvm.org/D41007
llvm-svn: 320375
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7223e88..2acd7f7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2578,6 +2578,25 @@
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
unsigned RegNo) const {
+
+ for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
+ R.isValid(); ++R) {
+ if (*R == RegNo)
+ return isGFX9();
+ }
+
+ switch (RegNo) {
+ case AMDGPU::TBA:
+ case AMDGPU::TBA_LO:
+ case AMDGPU::TBA_HI:
+ case AMDGPU::TMA:
+ case AMDGPU::TMA_LO:
+ case AMDGPU::TMA_HI:
+ return !isGFX9();
+ default:
+ break;
+ }
+
if (isCI())
return true;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9f399c3..a33670c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -250,7 +250,7 @@
int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
if (SDst != -1) {
// VOPC - insert VCC register as sdst
- insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC),
+ insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
AMDGPU::OpName::sdst);
} else {
// VOP1/2 - insert omod if present in instruction
@@ -277,7 +277,7 @@
inline
MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
- return MCOperand::createReg(RegId);
+ return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
}
inline
@@ -571,6 +571,15 @@
}
}
+int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+
+ unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN;
+ unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX;
+
+ return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
+}
+
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
using namespace AMDGPU::EncValues;
@@ -583,8 +592,10 @@
assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
}
- if (TTMP_MIN <= Val && Val <= TTMP_MAX) {
- return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN);
+
+ int TTmpIdx = getTTmpIdx(Val);
+ if (TTmpIdx >= 0) {
+ return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
}
if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
@@ -612,17 +623,17 @@
using namespace AMDGPU;
switch (Val) {
- case 102: return createRegOperand(getMCReg(FLAT_SCR_LO, STI));
- case 103: return createRegOperand(getMCReg(FLAT_SCR_HI, STI));
+ case 102: return createRegOperand(FLAT_SCR_LO);
+ case 103: return createRegOperand(FLAT_SCR_HI);
// ToDo: no support for xnack_mask_lo/_hi register
case 104:
case 105: break;
case 106: return createRegOperand(VCC_LO);
case 107: return createRegOperand(VCC_HI);
- case 108: return createRegOperand(TBA_LO);
- case 109: return createRegOperand(TBA_HI);
- case 110: return createRegOperand(TMA_LO);
- case 111: return createRegOperand(TMA_HI);
+ case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
+ case 109: assert(!isGFX9()); return createRegOperand(TBA_HI);
+ case 110: assert(!isGFX9()); return createRegOperand(TMA_LO);
+ case 111: assert(!isGFX9()); return createRegOperand(TMA_HI);
case 124: return createRegOperand(M0);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
@@ -645,10 +656,10 @@
using namespace AMDGPU;
switch (Val) {
- case 102: return createRegOperand(getMCReg(FLAT_SCR, STI));
+ case 102: return createRegOperand(FLAT_SCR);
case 106: return createRegOperand(VCC);
- case 108: return createRegOperand(TBA);
- case 110: return createRegOperand(TMA);
+ case 108: assert(!isGFX9()); return createRegOperand(TBA);
+ case 110: assert(!isGFX9()); return createRegOperand(TMA);
case 126: return createRegOperand(EXEC);
default: break;
}
@@ -672,6 +683,11 @@
return createSRegOperand(getSgprClassId(Width),
Val - SDWA9EncValues::SRC_SGPR_MIN);
}
+ if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_TTMP_MAX) {
+ return createSRegOperand(getTtmpClassId(Width),
+ Val - SDWA9EncValues::SRC_TTMP_MIN);
+ }
return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
} else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
@@ -695,7 +711,11 @@
"SDWAVopcDst should be present only on GFX9");
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
- if (Val > AMDGPU::EncValues::SGPR_MAX) {
+
+ int TTmpIdx = getTTmpIdx(Val);
+ if (TTmpIdx >= 0) {
+ return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
+ } else if (Val > AMDGPU::EncValues::SGPR_MAX) {
return decodeSpecialReg64(Val);
} else {
return createSRegOperand(getSgprClassId(OPW64), Val);
@@ -705,6 +725,14 @@
}
}
+bool AMDGPUDisassembler::isVI() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+}
+
+bool AMDGPUDisassembler::isGFX9() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index c487fe9..18a91356 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -111,7 +111,12 @@
MCOperand decodeSDWASrc16(unsigned Val) const;
MCOperand decodeSDWASrc32(unsigned Val) const;
MCOperand decodeSDWAVopcDst(unsigned Val) const;
-};
+
+ int getTTmpIdx(unsigned Val) const;
+
+ bool isVI() const;
+ bool isGFX9() const;
+ };
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 2768e5c..67663d3 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -344,16 +344,6 @@
} else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) {
O << 's';
NumRegs = 16;
- } else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(RegNo)) {
- O << "ttmp";
- NumRegs = 2;
- // Trap temps start at offset 112. TODO: Get this from tablegen.
- RegIdx -= 112;
- } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(RegNo)) {
- O << "ttmp";
- NumRegs = 4;
- // Trap temps start at offset 112. TODO: Get this from tablegen.
- RegIdx -= 112;
} else {
O << getRegisterName(RegNo);
return;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 23bdd69..a9f6069 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -194,8 +194,10 @@
enum {
SGPR_MIN = 0,
SGPR_MAX = 101,
- TTMP_MIN = 112,
- TTMP_MAX = 123,
+ TTMP_VI_MIN = 112,
+ TTMP_VI_MAX = 123,
+ TTMP_GFX9_MIN = 108,
+ TTMP_GFX9_MAX = 123,
INLINE_INTEGER_C_MIN = 128,
INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64
INLINE_INTEGER_C_MAX = 208,
@@ -368,6 +370,8 @@
SRC_VGPR_MAX = 255,
SRC_SGPR_MIN = 256,
SRC_SGPR_MAX = 357,
+ SRC_TTMP_MIN = 364,
+ SRC_TTMP_MAX = 379,
};
} // namespace SDWA
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6dc67d2..1b813a3 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -172,6 +172,8 @@
reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
+ reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
+ reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 5062a62..6b7c3ff 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -77,18 +77,11 @@
let HWEncoding = 110;
}
-def TTMP0 : SIReg <"ttmp0", 112>;
-def TTMP1 : SIReg <"ttmp1", 113>;
-def TTMP2 : SIReg <"ttmp2", 114>;
-def TTMP3 : SIReg <"ttmp3", 115>;
-def TTMP4 : SIReg <"ttmp4", 116>;
-def TTMP5 : SIReg <"ttmp5", 117>;
-def TTMP6 : SIReg <"ttmp6", 118>;
-def TTMP7 : SIReg <"ttmp7", 119>;
-def TTMP8 : SIReg <"ttmp8", 120>;
-def TTMP9 : SIReg <"ttmp9", 121>;
-def TTMP10 : SIReg <"ttmp10", 122>;
-def TTMP11 : SIReg <"ttmp11", 123>;
+foreach Index = 0-15 in {
+ def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
+ def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
+ def TTMP#Index : SIReg<"", 0>;
+}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
def _ci : SIReg<n, ci_e>;
@@ -192,7 +185,7 @@
// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
- (add (sequence "TTMP%u", 0, 11))> {
+ (add (sequence "TTMP%u", 0, 15))> {
let isAllocatable = 0;
}
@@ -208,6 +201,36 @@
(add (decimate (shl TTMP_32, 2), 4)),
(add (decimate (shl TTMP_32, 3), 4))]>;
+class TmpRegTuples <string tgt,
+ bit Is64Bit,
+ int Index0,
+ int Index1 = !add(Index0, 1),
+ int Index2 = !add(Index0, !if(Is64Bit, 1, 2)),
+ int Index3 = !add(Index0, !if(Is64Bit, 1, 3)),
+ string name = "ttmp["#Index0#":"#Index3#"]",
+ Register r0 = !cast<Register>("TTMP"#Index0#tgt),
+ Register r1 = !cast<Register>("TTMP"#Index1#tgt),
+ Register r2 = !cast<Register>("TTMP"#Index2#tgt),
+ Register r3 = !cast<Register>("TTMP"#Index3#tgt)> :
+ RegisterWithSubRegs<name, !if(Is64Bit, [r0, r1], [r0, r1, r2, r3])> {
+ let SubRegIndices = !if(Is64Bit, [sub0, sub1], [sub0, sub1, sub2, sub3]);
+ let HWEncoding = r0.HWEncoding;
+}
+
+foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
+ def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 1, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 1, Index>;
+}
+
+foreach Index = {0, 4, 8, 12} in {
+ def TTMP#Index#_TTMP#!add(Index,1)#
+ _TTMP#!add(Index,2)#
+ _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 0, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#
+ _TTMP#!add(Index,2)#
+ _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 0, Index>;
+}
+
// VGPR 32-bit registers
// i16/f16 only on VI+
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 177b030..5a59e04 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -569,44 +569,68 @@
return false;
}
-unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
-
- switch(Reg) {
- default: break;
- case AMDGPU::FLAT_SCR:
- assert(!isSI(STI));
- return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
-
- case AMDGPU::FLAT_SCR_LO:
- assert(!isSI(STI));
- return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
-
- case AMDGPU::FLAT_SCR_HI:
- assert(!isSI(STI));
- return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
+#define MAP_REG2REG \
+ using namespace AMDGPU; \
+ switch(Reg) { \
+ default: return Reg; \
+ CASE_CI_VI(FLAT_SCR) \
+ CASE_CI_VI(FLAT_SCR_LO) \
+ CASE_CI_VI(FLAT_SCR_HI) \
+ CASE_VI_GFX9(TTMP0) \
+ CASE_VI_GFX9(TTMP1) \
+ CASE_VI_GFX9(TTMP2) \
+ CASE_VI_GFX9(TTMP3) \
+ CASE_VI_GFX9(TTMP4) \
+ CASE_VI_GFX9(TTMP5) \
+ CASE_VI_GFX9(TTMP6) \
+ CASE_VI_GFX9(TTMP7) \
+ CASE_VI_GFX9(TTMP8) \
+ CASE_VI_GFX9(TTMP9) \
+ CASE_VI_GFX9(TTMP10) \
+ CASE_VI_GFX9(TTMP11) \
+ CASE_VI_GFX9(TTMP12) \
+ CASE_VI_GFX9(TTMP13) \
+ CASE_VI_GFX9(TTMP14) \
+ CASE_VI_GFX9(TTMP15) \
+ CASE_VI_GFX9(TTMP0_TTMP1) \
+ CASE_VI_GFX9(TTMP2_TTMP3) \
+ CASE_VI_GFX9(TTMP4_TTMP5) \
+ CASE_VI_GFX9(TTMP6_TTMP7) \
+ CASE_VI_GFX9(TTMP8_TTMP9) \
+ CASE_VI_GFX9(TTMP10_TTMP11) \
+ CASE_VI_GFX9(TTMP12_TTMP13) \
+ CASE_VI_GFX9(TTMP14_TTMP15) \
+ CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
}
- return Reg;
+
+#define CASE_CI_VI(node) \
+ assert(!isSI(STI)); \
+ case node: return isCI(STI) ? node##_ci : node##_vi;
+
+#define CASE_VI_GFX9(node) \
+ case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
+
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+ MAP_REG2REG
}
+#undef CASE_CI_VI
+#undef CASE_VI_GFX9
+
+#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
+#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
+
unsigned mc2PseudoReg(unsigned Reg) {
- switch (Reg) {
- case AMDGPU::FLAT_SCR_ci:
- case AMDGPU::FLAT_SCR_vi:
- return FLAT_SCR;
-
- case AMDGPU::FLAT_SCR_LO_ci:
- case AMDGPU::FLAT_SCR_LO_vi:
- return AMDGPU::FLAT_SCR_LO;
-
- case AMDGPU::FLAT_SCR_HI_ci:
- case AMDGPU::FLAT_SCR_HI_vi:
- return AMDGPU::FLAT_SCR_HI;
-
- default:
- return Reg;
- }
+ MAP_REG2REG
}
+#undef CASE_CI_VI
+#undef CASE_VI_GFX9
+#undef MAP_REG2REG
+
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;