Fixed/Recommitted r267733 "[AMDGPU][llvm-mc] Add support of TTMP quads. Rework M0 exclusion for SMRD."
Previously reverted by r267752.
r267733 review:
Differential Revision: http://reviews.llvm.org/D19342
llvm-svn: 268066
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6ffb062..f67a093 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -644,13 +644,14 @@
default: return -1;
case 1: return AMDGPU::TTMP_32RegClassID;
case 2: return AMDGPU::TTMP_64RegClassID;
+ case 4: return AMDGPU::TTMP_128RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
- case 4: return AMDGPU::SReg_128RegClassID;
+ case 4: return AMDGPU::SGPR_128RegClassID;
case 8: return AMDGPU::SReg_256RegClassID;
case 16: return AMDGPU::SReg_512RegClassID;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2990b57..2e3a818 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -66,8 +66,8 @@
DECODE_OPERAND(VReg_96)
DECODE_OPERAND(VReg_128)
-DECODE_OPERAND(SGPR_32)
DECODE_OPERAND(SReg_32)
+DECODE_OPERAND(SReg_32_XM0)
DECODE_OPERAND(SReg_64)
DECODE_OPERAND(SReg_128)
DECODE_OPERAND(SReg_256)
@@ -237,10 +237,6 @@
return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
}
-MCOperand AMDGPUDisassembler::decodeOperand_SGPR_32(unsigned Val) const {
- return createSRegOperand(AMDGPU::SGPR_32RegClassID, Val);
-}
-
MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
// table-gen generated disassembler doesn't care about operand types
// leaving only registry class so SSrc_32 operand turns into SReg_32
@@ -248,6 +244,11 @@
return decodeSrcOp(OP32, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
+ // SReg_32_XM0 is SReg_32 without M0
+ return decodeOperand_SReg_32(Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
// see decodeOperand_SReg_32 comment
return decodeSrcOp(OP64, Val);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f1ba30e..1856a4e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -62,8 +62,8 @@
MCOperand decodeOperand_VReg_96(unsigned Val) const;
MCOperand decodeOperand_VReg_128(unsigned Val) const;
- MCOperand decodeOperand_SGPR_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32(unsigned Val) const;
+ MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_256(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 37acd2e..652b172 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -250,9 +250,12 @@
} else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
Type = "v";
NumRegs = 4;
- } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
+ } else if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
Type = "s";
NumRegs = 4;
+ } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
+ Type = "ttmp";
+ NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
Type = "v";
NumRegs = 3;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8b73010..dfd6eb6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -60,17 +60,17 @@
// SMRD Instructions
//===----------------------------------------------------------------------===//
-// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SGPR_32 register class does not include M0
+// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SReg_32_XM0 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>;
defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
+ smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@@ -2087,9 +2087,9 @@
}
// It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SGPR_32 register class for spills to prevent
+// instructions, so use SReg_32_XM0 register class for spills to prevent
// this from happening.
-defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>;
+defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32_XM0>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
@@ -3431,7 +3431,7 @@
def : Pat <
(i64 (sext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0,
- (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)
+ (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
>;
def : Pat <
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 384b761..6c6fa3c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -132,7 +132,7 @@
(add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
@@ -255,6 +255,13 @@
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
>;
+// Subset of SReg_32 without M0 for SMRD instructions and alike.
+// See comments in SIInstructions.td for more info.
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
+ (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+ TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
+>;
+
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
@@ -265,11 +272,19 @@
(add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
- // Requires 2 s_mov_b64 to copy
- let CopyCost = 2;
+// Requires 2 s_mov_b64 to copy
+let CopyCost = 2 in {
+
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>;
+
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+ let isAllocatable = 0;
}
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>;
+
+} // End CopyCost = 2
+
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;