AMDGPU: Add patterns for i32/i64 local atomic load/store
Not sure why the 32/64 split is needed in the atomic_load
store hierarchies. The regular PatFrags do this, but we don't
do it for the existing handling for global.
llvm-svn: 335325
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 28887ea..cdc6ab9 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -647,6 +647,8 @@
defm : DSReadPat_mc <DS_READ_U16, i32, "az_extloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
+defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
+defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
let AddedComplexity = 100 in {
@@ -683,11 +685,30 @@
}
}
+// Irritatingly, atomic_store reverses the order of operands from a
+// normal store.
+class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
+ let OtherPredicates = [LDSRequiresM0Init] in {
+ def : DSAtomicWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DSAtomicWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
+}
+
defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local">;
let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;