AMDGPU: Fix broken FrameIndex handling

We were trying to avoid using a FrameIndex operand in non-pointer
operands in a convoluted way, and would break because of
using TargetFrameIndex. The TargetFrameIndex should only be used
in the case where it makes sense to fold it as part of the addressing
mode, otherwise it requires materialization like a normal constant.
This wasn't working reliably and failed in the added testcase, hitting
the assert when processing the frame index.

The TargetFrameIndex was coming from trying to produce an AssertZext
limiting the maximum stack size. I'm not sure this was correct to begin
with, because it is apparently possible to have a single workitem
dispatch that requires all 4G of private memory.

llvm-svn: 281824
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 161c46b..55b3230 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -1,5 +1,17 @@
 ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
+; GCN-LABEL: {{^}}store_fi_lifetime:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
+entry:
+  %b = alloca i8
+  call void @llvm.lifetime.start(i64 1, i8* %b)
+  store volatile i8* %b, i8* addrspace(1)* undef
+  call void @llvm.lifetime.end(i64 1, i8* %b)
+  ret void
+}
+
 ; GCN-LABEL: {{^}}stored_fi_to_lds:
 ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
 ; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
@@ -140,17 +152,18 @@
 }
 
 ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
-; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
+; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}
 ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
 
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
+; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
+
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
-; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]
+; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
 
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
-; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca [4096 x i32]
   %tmp1 = alloca [4096 x i32]
@@ -163,4 +176,27 @@
   ret void
 }
 
+@g1 = external addrspace(1) global i32*
+
+; This was leaving a dead node around resulting in failing to select
+; on the leftover AssertZext's ValueType operand.
+
+; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, g1@GOTPCREL+4
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
+entry:
+  %b = alloca i32, align 4
+  %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  store volatile i32* %b, i32* addrspace(1)* undef
+  ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
 attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
index eb554e2..105d205 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
@@ -7,8 +7,10 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}}
+; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
+
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 define amdgpu_ps float @main(i32 %idx) {