AMDGPU: Add instruction definitions for some scratch_* instructions
Omit atomics for now since they probably aren't useful.
llvm-svn: 308747
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 79d47f5..e70d056 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -687,6 +687,8 @@
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<"FeatureFlatGlobalInsts">;
+def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
+ AssemblerPredicate<"FeatureFlatScratchInsts">;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
AssemblerPredicate<"FeatureAddNoCarryInsts">;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 966c6fe..ca4e501 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -95,6 +95,7 @@
DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
+DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
DECODE_OPERAND_REG(SReg_64)
DECODE_OPERAND_REG(SReg_64_XEXEC)
DECODE_OPERAND_REG(SReg_128)
@@ -365,6 +366,12 @@
return decodeOperand_SReg_32(Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
+ unsigned Val) const {
+ // SReg_32_XM0 is SReg_32 without EXEC_HI
+ return decodeOperand_SReg_32(Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 4c755be..b39d145 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -80,6 +80,7 @@
MCOperand decodeOperand_SReg_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
+ MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;
MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1f59f48..c886e49 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -51,6 +51,7 @@
bits<1> has_saddr = 0;
bits<1> enabled_saddr = 0;
bits<7> saddr_value = 0;
+ bits<1> has_vaddr = 1;
bits<1> has_data = 1;
bits<1> has_glc = 1;
@@ -106,7 +107,7 @@
let Inst{17} = slc;
let Inst{24-18} = op;
let Inst{31-26} = 0x37; // Encoding.
- let Inst{39-32} = vaddr;
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_data, vdata, ?);
let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
@@ -137,18 +138,6 @@
let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
}
-multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> {
- let is_flat_global = 1 in {
- def "" : FLAT_Load_Pseudo<opName, regClass, 1, 1>;
- def _SADDR : FLAT_Load_Pseudo<opName, regClass, 1, 1, 1>;
- }
-}
-
-class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> :
- FLAT_Load_Pseudo<opName, regClass, 1> {
- let is_flat_scratch = 1;
-}
-
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
@@ -169,6 +158,13 @@
let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
}
+multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> {
+ let is_flat_global = 1 in {
+ def "" : FLAT_Load_Pseudo<opName, regClass, 1, 1>;
+ def _SADDR : FLAT_Load_Pseudo<opName, regClass, 1, 1, 1>;
+ }
+}
+
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1 in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
@@ -176,9 +172,51 @@
}
}
-class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> :
- FLAT_Store_Pseudo<opName, regClass, 1> {
- let is_flat_scratch = 1;
+class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
+ bit EnableSaddr = 0>: FLAT_Pseudo<
+ opName,
+ (outs regClass:$vdst),
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
+ " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+ let has_data = 0;
+ let mayLoad = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let has_vaddr = !if(EnableSaddr, 0, 1);
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs),
+ !if(EnableSaddr,
+ (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
+ " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let has_vdst = 0;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let has_vaddr = !if(EnableSaddr, 0, 1);
+
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
+ let is_flat_scratch = 1 in {
+ def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
+ def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
+ }
+}
+
+multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
+ let is_flat_scratch = 1 in {
+ def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
+ def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
+ }
}
class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
@@ -519,6 +557,25 @@
} // End SubtargetPredicate = HasFlatGlobalInsts
+let SubtargetPredicate = HasFlatScratchInsts in {
+defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
+defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
+defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
+
+defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
+defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
+defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
+
+} // End SubtargetPredicate = HasFlatScratchInsts
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -788,15 +845,16 @@
defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
-defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
+defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
-defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
+defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
@@ -824,3 +882,19 @@
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
+
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
+
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 54ea780..efbb58d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -279,6 +279,11 @@
let AllocationPriority = 7;
}
+def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
+ let AllocationPriority = 7;
+}
+
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
@@ -286,7 +291,7 @@
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
let AllocationPriority = 7;
}