AMDGPU: Add instruction definitions for some scratch_* instructions

Omit atomics for now since they probably aren't useful.

llvm-svn: 308747
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 79d47f5..e70d056 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -687,6 +687,8 @@
 
 def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
   AssemblerPredicate<"FeatureFlatGlobalInsts">;
+def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
+  AssemblerPredicate<"FeatureFlatScratchInsts">;
 
 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
   AssemblerPredicate<"FeatureAddNoCarryInsts">;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 966c6fe..ca4e501 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -95,6 +95,7 @@
 
 DECODE_OPERAND_REG(SReg_32)
 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
+DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
 DECODE_OPERAND_REG(SReg_64)
 DECODE_OPERAND_REG(SReg_64_XEXEC)
 DECODE_OPERAND_REG(SReg_128)
@@ -365,6 +366,12 @@
   return decodeOperand_SReg_32(Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
+  unsigned Val) const {
+  // SReg_32_XM0 is SReg_32 without EXEC_HI
+  return decodeOperand_SReg_32(Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
   return decodeSrcOp(OPW64, Val);
 }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 4c755be..b39d145 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -80,6 +80,7 @@
 
   MCOperand decodeOperand_SReg_32(unsigned Val) const;
   MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
+  MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
   MCOperand decodeOperand_SReg_64(unsigned Val) const;
   MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
   MCOperand decodeOperand_SReg_128(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1f59f48..c886e49 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -51,6 +51,7 @@
   bits<1> has_saddr = 0;
   bits<1> enabled_saddr = 0;
   bits<7> saddr_value = 0;
+  bits<1> has_vaddr = 1;
 
   bits<1> has_data = 1;
   bits<1> has_glc  = 1;
@@ -106,7 +107,7 @@
   let Inst{17}    = slc;
   let Inst{24-18} = op;
   let Inst{31-26} = 0x37; // Encoding.
-  let Inst{39-32} = vaddr;
+  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_data, vdata, ?);
   let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
 
@@ -137,18 +138,6 @@
   let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
 }
 
-multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> {
-  let is_flat_global = 1 in {
-    def "" : FLAT_Load_Pseudo<opName, regClass, 1, 1>;
-    def _SADDR : FLAT_Load_Pseudo<opName, regClass, 1, 1, 1>;
-  }
-}
-
-class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> :
-  FLAT_Load_Pseudo<opName, regClass, 1> {
-  let is_flat_scratch = 1;
-}
-
 class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
   bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
@@ -169,6 +158,13 @@
   let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
 }
 
+multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> {
+  let is_flat_global = 1 in {
+    def "" : FLAT_Load_Pseudo<opName, regClass, 1, 1>;
+    def _SADDR : FLAT_Load_Pseudo<opName, regClass, 1, 1, 1>;
+  }
+}
+
 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
   let is_flat_global = 1 in {
     def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
@@ -176,9 +172,51 @@
   }
 }
 
-class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> :
-  FLAT_Store_Pseudo<opName, regClass, 1> {
-  let is_flat_scratch = 1;
+class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
+  bit EnableSaddr = 0>: FLAT_Pseudo<
+  opName,
+  (outs regClass:$vdst),
+  !if(EnableSaddr,
+      (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+      (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
+  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+  let has_data = 0;
+  let mayLoad = 1;
+  let has_saddr = 1;
+  let enabled_saddr = EnableSaddr;
+  let has_vaddr = !if(EnableSaddr, 0, 1);
+  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
+  opName,
+  (outs),
+  !if(EnableSaddr,
+    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
+    (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
+  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+  let mayLoad  = 0;
+  let mayStore = 1;
+  let has_vdst = 0;
+  let has_saddr = 1;
+  let enabled_saddr = EnableSaddr;
+  let has_vaddr = !if(EnableSaddr, 0, 1);
+
+  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+}
+
+multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
+  let is_flat_scratch = 1 in {
+    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
+    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
+  }
+}
+
+multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
+  let is_flat_scratch = 1 in {
+    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
+    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
+  }
 }
 
 class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
@@ -519,6 +557,25 @@
 } // End SubtargetPredicate = HasFlatGlobalInsts
 
 
+let SubtargetPredicate = HasFlatScratchInsts in {
+defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
+defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
+defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
+defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
+defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
+defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
+defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
+defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
+
+defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
+defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
+defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
+
+} // End SubtargetPredicate = HasFlatScratchInsts
+
 //===----------------------------------------------------------------------===//
 // Flat Patterns
 //===----------------------------------------------------------------------===//
@@ -788,15 +845,16 @@
 defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
 defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
 defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
-defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
 defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
+defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
 
 defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
 defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
 defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
 defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
-defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
+defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+
 
 defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
 defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
@@ -824,3 +882,19 @@
 defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
 defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
 defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
+
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
+
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 54ea780..efbb58d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -279,6 +279,11 @@
   let AllocationPriority = 7;
 }
 
+def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+  (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
+  let AllocationPriority = 7;
+}
+
 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
   (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
   let AllocationPriority = 7;
@@ -286,7 +291,7 @@
 
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
   let AllocationPriority = 7;
 }