AMDGPU: Fix broken FrameIndex handling We were trying to avoid using a FrameIndex operand in non-pointer operands in a convoluted way, and would break because of using TargetFrameIndex. The TargetFrameIndex should only be used in the case where it makes sense to fold it as part of the addressing mode, otherwise it requires materialization like a normal constant. This wasn't working reliably and failed in the added testcase, hitting the assert when processing the frame index. The TargetFrameIndex was coming from trying to produce an AssertZext limiting the maximum stack size. I'm not sure this was correct to begin with, because it is apparently possible to have a single workitem dispatch that requires all 4G of private memory. llvm-svn: 281824

commit: ac0fc849cf53b41735526eba7931ee4f3508fb8f [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Sat Sep 17 16:09:55 2016 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Sat Sep 17 16:09:55 2016 +0000
tree: f4cf0befd7d3ba8e6255338d85188df82606185d
parent: bcfd94c2982e6b8468596390234832653a56fb54 [diff]
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 161c46b..55b3230 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll

@@ -1,5 +1,17 @@
 ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
+; GCN-LABEL: {{^}}store_fi_lifetime:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
+entry:
+  %b = alloca i8
+  call void @llvm.lifetime.start(i64 1, i8* %b)
+  store volatile i8* %b, i8* addrspace(1)* undef
+  call void @llvm.lifetime.end(i64 1, i8* %b)
+  ret void
+}
+
 ; GCN-LABEL: {{^}}stored_fi_to_lds:
 ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
 ; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
@@ -140,17 +152,18 @@
 }
 
 ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
-; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
+; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}
 ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
 
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
+; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
+
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
-; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]
+; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
 
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
-; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca [4096 x i32]
   %tmp1 = alloca [4096 x i32]
@@ -163,4 +176,27 @@
   ret void
 }
 
+@g1 = external addrspace(1) global i32*
+
+; This was leaving a dead node around resulting in failing to select
+; on the leftover AssertZext's ValueType operand.
+
+; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, g1@GOTPCREL+4
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
+entry:
+  %b = alloca i32, align 4
+  %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  store volatile i32* %b, i32* addrspace(1)* undef
+  ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
 attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }

diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
index eb554e2..105d205 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll

@@ -7,8 +7,10 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}}
+; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 define amdgpu_ps float @main(i32 %idx) {
commit	ac0fc849cf53b41735526eba7931ee4f3508fb8f	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Sat Sep 17 16:09:55 2016 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Sat Sep 17 16:09:55 2016 +0000
tree	f4cf0befd7d3ba8e6255338d85188df82606185d
parent	bcfd94c2982e6b8468596390234832653a56fb54 [diff]