Add IntrWrite[Arg]Mem intrinsic property

Summary:
This property is used to mark an intrinsic that only writes to memory, but
neither reads from memory nor has other side effects.

An example where this is useful is the llvm.amdgcn.buffer.store.format.*
intrinsic, which corresponds to a store instruction that goes through a special
buffer descriptor rather than through a plain pointer.

With this property, the intrinsic should still be handled as having side
effects at the LLVM IR level, but machine scheduling can make smarter
decisions.

Reviewers: tstellarAMD, arsenm, joker.eph, reames

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18291

llvm-svn: 266826
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b67dfc4..aee029c 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -38,6 +38,17 @@
 // deleted if dead.
 def IntrReadMem : IntrinsicProperty;
 
+// IntrWriteMem - This intrinsic writes to unspecified memory, but does not
+// read from memory, and has no other side effects. This means dead stores
+// before calls to this intrinsics may be removed.
+def IntrWriteMem : IntrinsicProperty;
+
+// IntrWriteArgMem - This intrinsic writes only to memory that one of its
+// arguments points to, but may access an unspecified amount. The intrinsic
+// does not read from memory and has no other side effects. This means that
+// dead stores before calls to this intrinsics may be removed.
+def IntrWriteArgMem : IntrinsicProperty;
+
 // IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
 // one of its arguments points to, but may access an unspecified amount.  The
 // reads and writes may be volatile, but except for this it has no other side
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 24e8c04..5c5128c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -243,7 +243,7 @@
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [IntrWriteMem]>;
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f33d412..6fc8aeb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -949,23 +949,18 @@
 defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
   mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
 >;
-// Without mayLoad and hasSideEffects, TableGen complains about the pattern
-// matching llvm.amdgcn.buffer.store.format. Eventually, we'll need a way
-// to express the effects of the intrinsic more precisely.
-let mayLoad = 1, hasSideEffects = 1 in {
-  defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
-    mubuf<0x04>, "buffer_store_format_x", VGPR_32
-  >;
-  defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
-    mubuf<0x05>, "buffer_store_format_xy", VReg_64
-  >;
-  defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
-    mubuf<0x06>, "buffer_store_format_xyz", VReg_96
-  >;
-  defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
-    mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
-  >;
-}
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+  mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+  mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+  mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+  mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
 defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
   mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
 >;
@@ -996,11 +991,6 @@
   mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
 >;
 
-// Without mayLoad and hasSideEffects, TableGen complains about the pattern
-// matching llvm.amdgcn.buffer.store. Eventually, we'll want a WriteOnly
-// property to express the effects of this intrinsic more precisely, see
-// http://reviews.llvm.org/D18291
-let mayLoad = 1, hasSideEffects = 1 in {
 defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
   mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
 >;
@@ -1012,7 +1002,6 @@
 defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
   mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
 >;
-}
 
 defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
   mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 92dd7ba..f1ab48b 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -16,11 +16,11 @@
 
 ; Offset is applied
 ; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
-; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
+; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}
 
-; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
+; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]
 
 ; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
 ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO]]
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 6e2bd96..34b85cd 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -11,8 +11,8 @@
 
 ; FUNC-LABEL: @reorder_local_load_global_store_local_load
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI-NEXT: buffer_store_dword
 ; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
 define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
   %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
@@ -71,9 +71,9 @@
 }
 
 ; FUNC-LABEL: @reorder_constant_load_global_store_constant_load
+; CI: buffer_store_dword
 ; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
 ; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
-; CI-DAG: buffer_store_dword
 ; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
 ; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2
 ; CI: buffer_store_dword
@@ -184,11 +184,11 @@
 }
 
 ; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
 ; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
 ; CI: buffer_store_dword
 ; CI: s_endpgm
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 2c85dfc..c2bb55f 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2816,14 +2816,14 @@
 
     if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
       // If this is an intrinsic, analyze it.
-      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadArgMem)
+      if (IntInfo->ModRef & CodeGenIntrinsic::MR_Ref)
         mayLoad = true;// These may load memory.
 
-      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteArgMem)
+      if (IntInfo->ModRef & CodeGenIntrinsic::MR_Mod)
         mayStore = true;// Intrinsics that can write to memory are 'mayStore'.
 
       if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
-        // WriteMem intrinsics can have other strange effects.
+        // ReadWriteMem intrinsics can have other strange effects.
         hasSideEffects = true;
     }
   }
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h
index 7bdb7e1..fe1fd7f 100644
--- a/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -59,11 +59,36 @@
 
     IntrinsicSignature IS;
 
-    // Memory mod/ref behavior of this intrinsic.
-    enum ModRefKind {
-      NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
+    /// Bit flags describing the type (ref/mod) and location of memory
+    /// accesses that may be performed by the intrinsics. Analogous to
+    /// \c FunctionModRefBehaviour.
+    enum ModRefBits {
+      /// The intrinsic may access memory anywhere, i.e. it is not restricted
+      /// to access through pointer arguments.
+      MR_Anywhere = 1,
+
+      /// The intrinsic may read memory.
+      MR_Ref = 2,
+
+      /// The intrinsic may write memory.
+      MR_Mod = 4,
+
+      /// The intrinsic may both read and write memory.
+      MR_ModRef = MR_Ref | MR_Mod,
     };
-    ModRefKind ModRef;
+
+    /// Memory mod/ref behavior of this intrinsic, corresponding to
+    /// intrinsic properties (IntrReadMem, IntrReadArgMem, etc.).
+    enum ModRefBehavior {
+      NoMem = 0,
+      ReadArgMem = MR_Ref,
+      ReadMem = MR_Ref | MR_Anywhere,
+      WriteArgMem = MR_Mod,
+      WriteMem = MR_Mod | MR_Anywhere,
+      ReadWriteArgMem = MR_ModRef,
+      ReadWriteMem = MR_ModRef | MR_Anywhere,
+    };
+    ModRefBehavior ModRef;
 
     /// This is set to true if the intrinsic is overloaded by its argument
     /// types.
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index b952e90..d89793f 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -577,6 +577,10 @@
       ModRef = ReadArgMem;
     else if (Property->getName() == "IntrReadMem")
       ModRef = ReadMem;
+    else if (Property->getName() == "IntrWriteMem")
+      ModRef = WriteMem;
+    else if (Property->getName() == "IntrWriteArgMem")
+      ModRef = WriteArgMem;
     else if (Property->getName() == "IntrReadWriteArgMem")
       ModRef = ReadWriteArgMem;
     else if (Property->getName() == "Commutative")
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index dcb70e1..2004eae 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -462,8 +462,8 @@
       return R->isConvergent;
 
     // Try to order by readonly/readnone attribute.
-    CodeGenIntrinsic::ModRefKind LK = L->ModRef;
-    CodeGenIntrinsic::ModRefKind RK = R->ModRef;
+    CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
+    CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
     if (LK != RK) return (LK > RK);
 
     // Order by argument attributes.
@@ -616,11 +616,13 @@
           OS << ",";
         OS << "Attribute::ReadOnly";
         break;
+      case CodeGenIntrinsic::WriteArgMem:
       case CodeGenIntrinsic::ReadWriteArgMem:
         if (addComma)
           OS << ",";
         OS << "Attribute::ArgMemOnly";
         break;
+      case CodeGenIntrinsic::WriteMem:
       case CodeGenIntrinsic::ReadWriteMem:
         break;
       }