AMDGPU: Remove the s_buffer workaround for GFX9 chips Summary: I checked the AMD closed source compiler and the workaround is only needed when x3 is emulated as x4, which we don't do in LLVM. SMEM x3 opcodes don't exist, and instead there is a possibility to use x4 with the last component being unused. If the last component is out of buffer bounds and falls on the next 4K page, the hw hangs. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D42756 llvm-svn: 324486

commit: b2cc77985b1c02c08651e2547c094781e75343eb [log] [tgz]
author: Marek Olsak <marek.olsak@amd.com> Wed Feb 07 16:00:40 2018 +0000
committer: Marek Olsak <marek.olsak@amd.com> Wed Feb 07 16:00:40 2018 +0000
tree: bb2779121825d6882b00dbfaaaeb02acfbbc6a55
parent: b4e789e8f6aa62b628a41a73275aff95c9dd4389 [diff]
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 962c985..700be80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

@@ -333,14 +333,6 @@
     return HasMadMixInsts;
   }
 
-  bool hasSBufferLoadStoreAtomicDwordxN() const {
-    // Only use the "x1" variants on GFX9 or don't use the buffer variants.
-    // For x2 and higher variants, if the accessed region spans 2 VM pages and
-    // the second page is unmapped, the hw hangs.
-    // TODO: There is one future GFX9 chip that doesn't have this bug.
-    return getGeneration() != GFX9;
-  }
-
   bool hasCARRY() const {
     return (getGeneration() >= EVERGREEN);
   }

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 65bb5f3..26ba06a 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

@@ -853,9 +853,8 @@
 
       continue;
     }
-    if (STM->hasSBufferLoadStoreAtomicDwordxN() &&
-        (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
-         Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM)) {
+    if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
+        Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
       // EltSize is in units of the offset encoding.
       CI.InstClass = S_BUFFER_LOAD_IMM;
       CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
commit	b2cc77985b1c02c08651e2547c094781e75343eb	[log] [tgz]
author	Marek Olsak <marek.olsak@amd.com>	Wed Feb 07 16:00:40 2018 +0000
committer	Marek Olsak <marek.olsak@amd.com>	Wed Feb 07 16:00:40 2018 +0000
tree	bb2779121825d6882b00dbfaaaeb02acfbbc6a55
parent	b4e789e8f6aa62b628a41a73275aff95c9dd4389 [diff]