AMDGPU: Select DS insts without m0 initialization

GFX9 stopped using m0 for most DS instructions. Select
a different instruction without the use. I think this will
be less error prone than trying to manually maintain m0
uses as needed.

llvm-svn: 319270
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 9fcfb108..f898fd7 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -600,6 +600,20 @@
   (inst $ptr, (as_i16imm $offset), (i1 0))
 >;
 
+// FIXME: Passing name of PatFrag in workaround. Why doesn't
+// !cast<PatFrag>(frag.NAME#"_m0") work!?
+multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
+
+  let OtherPredicates = [LDSRequiresM0Init] in {
+    def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+    def : DSReadPat<!cast<DS_Pseudo>(inst.NAME#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+  }
+}
+
+
 multiclass DSReadPat_Hi16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> {
   def : GCNPat <
     (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))),
@@ -624,30 +638,22 @@
   >;
 }
 
-
-def : DSReadPat <DS_READ_I8,  i32, sextloadi8_local_m0>;
-def : DSReadPat <DS_READ_U8,  i32, az_extloadi8_local_m0>;
-def : DSReadPat <DS_READ_I8,  i16, sextloadi8_local_m0>;
-def : DSReadPat <DS_READ_U8,  i16, az_extloadi8_local_m0>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local_m0>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local_m0>;
-def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local_m0>;
-def : DSReadPat <DS_READ_U16, i16, load_local_m0>;
-def : DSReadPat <DS_READ_B32, i32, load_local_m0>;
+defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i32, "az_extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i16, "az_extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
+defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "az_extloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
+defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
 
 let AddedComplexity = 100 in {
 
-def : DSReadPat <DS_READ_B64, v2i32, load_align8_local_m0>;
+defm : DSReadPat_mc <DS_READ_B64, v2i32, "load_align8_local">;
 
 } // End AddedComplexity = 100
 
-def : GCNPat <
-  (v2i32 (load_local_m0 (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
-                                                       i8:$offset1))),
-  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
->;
-
-
 let OtherPredicates = [HasD16LoadStore] in {
 let AddedComplexity = 100 in {
 defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>;
@@ -666,71 +672,119 @@
   (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 >;
 
-def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local_m0>;
-def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local_m0>;
-def : DSWritePat <DS_WRITE_B8, i16, truncstorei8_local_m0>;
-def : DSWritePat <DS_WRITE_B16, i16, store_local_m0>;
-def : DSWritePat <DS_WRITE_B32, i32, store_local_m0>;
+multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
+  let OtherPredicates = [LDSRequiresM0Init] in {
+    def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+    def : DSWritePat<!cast<DS_Pseudo>(inst.NAME#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+  }
+}
+
+defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
+defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
+defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
+defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
+defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">;
 
 let OtherPredicates = [HasD16LoadStore] in {
 def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
 def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_local_hi16>;
 }
 
-let AddedComplexity = 100 in {
 
-def : DSWritePat <DS_WRITE_B64, v2i32, store_align8_local_m0>;
-} // End AddedComplexity = 100
-
-def : GCNPat <
-  (store_local_m0 v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
-                                                               i8:$offset1)),
-  (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
-                       (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
-                       (i1 0))
+class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, PatFrag frag> : GCNPat <
+  (v2i32 (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
+  (inst $ptr, $offset0, $offset1, (i1 0))
 >;
 
+class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
+  (frag v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
+  (inst $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
+              (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
+              (i1 0))
+>;
+
+let OtherPredicates = [LDSRequiresM0Init] in {
+def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
+def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
+}
+
+let OtherPredicates = [NotLDSRequiresM0Init] in {
+def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, load_local>;
+def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, store_local>;
+}
+
+
+let AddedComplexity = 100 in {
+
+defm : DSWritePat_mc <DS_WRITE_B64, v2i32, "store_align8_local">;
+} // End AddedComplexity = 100
 class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
   (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 >;
 
+multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
+  let OtherPredicates = [LDSRequiresM0Init] in {
+    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+    def : DSAtomicRetPat<!cast<DS_Pseudo>(inst.NAME#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+  }
+}
+
+
+
 class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
   (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
 >;
 
+multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
+  let OtherPredicates = [LDSRequiresM0Init] in {
+    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+    def : DSAtomicCmpXChg<!cast<DS_Pseudo>(inst.NAME#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+  }
+}
+
+
 
 // 32-bit atomics.
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local_m0>;
-def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local_m0>;
-def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local_m0>;
-def : DSAtomicRetPat<DS_INC_RTN_U32, i32, atomic_inc_local_m0>;
-def : DSAtomicRetPat<DS_DEC_RTN_U32, i32, atomic_dec_local_m0>;
-def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local_m0>;
-def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local_m0>;
-def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local_m0>;
-def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local_m0>;
-def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local_m0>;
-def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local_m0>;
-def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local_m0>;
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_local_m0>;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap_local">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add_local">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub_local">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc_local">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec_local">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and_local">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or_local">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor_local">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min_local">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max_local">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin_local">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax_local">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap_local">;
 
 // 64-bit atomics.
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local_m0>;
-def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local_m0>;
-def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local_m0>;
-def : DSAtomicRetPat<DS_INC_RTN_U64, i64, atomic_inc_local_m0>;
-def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, atomic_dec_local_m0>;
-def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local_m0>;
-def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local_m0>;
-def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local_m0>;
-def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local_m0>;
-def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local_m0>;
-def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local_m0>;
-def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local_m0>;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap_local">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add_local">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub_local">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc_local">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec_local">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and_local">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or_local">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor_local">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min_local">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max_local">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin_local">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
 
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_local_m0>;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
 
 //===----------------------------------------------------------------------===//
 // Real instructions