AMDGPU: Split MUBUF offset into aligned components Summary: Atomic buffer operations do not work (and trap on gfx9) when the components are unaligned, even if their sum is aligned. Previously, we generated an offset of 4156 without an SGPR by splitting it as 4095 + 61 (immediate + inline constant). The highest offset for which we can do this correctly is 4156 = 4092 + 64. Fixes dEQP-GLES31.functional.ssbo.atomic.* Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D37850 llvm-svn: 315302

commit: 312b64f4d703947779ce56abdcc0d59741ff99df [log] [tgz]
author: Nicolai Haehnle <nhaehnle@gmail.com> Tue Oct 10 12:22:23 2017 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> Tue Oct 10 12:22:23 2017 +0000
tree: 9eb7bccd2f72bdf3326848767801ab20070931af
parent: 0f22a06b4de75d4d02741b9f39d39df1cd4e8066 [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
index 5c93ae0..03caca8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll

@@ -18,18 +18,18 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
-  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 40, i1 0, i1 0)
   ret <4 x float> %data
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
 ;SICI: buffer_load_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 offen
-;VI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff
-;VI: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1
+;VI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1ffc
+;VI: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:4
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
@@ -56,11 +56,11 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
-;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:58
+;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
 main_body:
-  %ofs = add i32 %1, 58
+  %ofs = add i32 %1, 60
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
   ret <4 x float> %data
 }
commit	312b64f4d703947779ce56abdcc0d59741ff99df	[log] [tgz]
author	Nicolai Haehnle <nhaehnle@gmail.com>	Tue Oct 10 12:22:23 2017 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	Tue Oct 10 12:22:23 2017 +0000
tree	9eb7bccd2f72bdf3326848767801ab20070931af
parent	0f22a06b4de75d4d02741b9f39d39df1cd4e8066 [diff] [blame]