Matt Arsenault | a202538 | 2017-08-03 23:24:05 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s |
| 2 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 3 | |
| 4 | %struct.ByValStruct = type { [4 x i32] } |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}void_func_byval_struct: |
| 7 | ; GCN: s_mov_b32 s5, s32 |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 8 | ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 9 | ; GCN-NOT: s32 |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 10 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 11 | ; GCN-NOT: s32 |
| 12 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 13 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 14 | ; GCN-NOT: s32 |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 15 | ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 16 | ; GCN-NOT: s32 |
| 17 | define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { |
| 18 | entry: |
| 19 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 20 | %tmp = load volatile i32, i32* %arrayidx, align 4 |
| 21 | %add = add nsw i32 %tmp, 1 |
| 22 | store volatile i32 %add, i32* %arrayidx, align 4 |
| 23 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 24 | %tmp1 = load volatile i32, i32* %arrayidx2, align 4 |
| 25 | %add3 = add nsw i32 %tmp1, 2 |
| 26 | store volatile i32 %add3, i32* %arrayidx2, align 4 |
| 27 | store volatile i32 9, i32 addrspace(1)* null, align 4 |
| 28 | ret void |
| 29 | } |
| 30 | |
| 31 | ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: |
| 32 | ; GCN: s_mov_b32 s5, s32 |
Matt Arsenault | 8e8f8f4 | 2017-08-02 01:52:45 +0000 | [diff] [blame] | 33 | ; GCN-DAG: buffer_store_dword v32 |
| 34 | ; GCN-DAG: buffer_store_dword v33 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 35 | ; GCN: v_writelane_b32 |
| 36 | |
Matt Arsenault | 8e8f8f4 | 2017-08-02 01:52:45 +0000 | [diff] [blame] | 37 | ; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 38 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 39 | ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 40 | ; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 41 | ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 42 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 43 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 44 | ; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] |
| 45 | |
| 46 | ; GCN: s_swappc_b64 |
| 47 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 48 | ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 49 | |
| 50 | ; GCN: v_readlane_b32 |
| 51 | ; GCN: buffer_load_dword v32, |
Matt Arsenault | 8e8f8f4 | 2017-08-02 01:52:45 +0000 | [diff] [blame] | 52 | ; GCN: buffer_load_dword v33, |
| 53 | ; GCN: s_sub_u32 s32, s32, 0xb00{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 54 | ; GCN: s_setpc_b64 |
| 55 | define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { |
| 56 | entry: |
| 57 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 58 | %tmp = load volatile i32, i32* %arrayidx, align 4 |
| 59 | %add = add nsw i32 %tmp, 1 |
| 60 | store volatile i32 %add, i32* %arrayidx, align 4 |
| 61 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 62 | %tmp1 = load volatile i32, i32* %arrayidx2, align 4 |
| 63 | %add3 = add nsw i32 %tmp1, 2 |
| 64 | call void @external_void_func_void() |
| 65 | store volatile i32 %add3, i32* %arrayidx2, align 4 |
| 66 | store volatile i32 9, i32 addrspace(1)* null, align 4 |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: |
| 71 | ; GCN: s_mov_b32 s5, s32 |
Matt Arsenault | 8e8f8f4 | 2017-08-02 01:52:45 +0000 | [diff] [blame] | 72 | ; GCN: s_add_u32 s32, s32, 0xc00{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 73 | ; GCN: v_writelane_b32 |
| 74 | |
| 75 | ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} |
| 76 | ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 |
| 77 | ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 |
| 78 | |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 79 | ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 |
| 80 | ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 |
| 81 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame] | 82 | ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 |
| 83 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 |
| 84 | ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 |
| 85 | ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 86 | |
| 87 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 88 | ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} |
| 89 | ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 |
| 90 | ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 |
| 91 | ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 92 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame] | 93 | ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 |
| 94 | ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 |
| 95 | ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 |
| 96 | ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 97 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 98 | ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 |
| 99 | ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 |
| 100 | ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 |
| 101 | ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 102 | |
| 103 | ; GCN: s_swappc_b64 |
| 104 | ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} |
| 105 | |
| 106 | ; GCN: v_readlane_b32 |
| 107 | |
Matt Arsenault | 8e8f8f4 | 2017-08-02 01:52:45 +0000 | [diff] [blame] | 108 | ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 109 | ; GCN-NEXT: s_waitcnt |
| 110 | ; GCN-NEXT: s_setpc_b64 |
| 111 | define void @call_void_func_byval_struct_func() #0 { |
| 112 | entry: |
| 113 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 114 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 115 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 116 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 117 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 118 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 119 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 120 | store volatile i32 9, i32* %arrayidx, align 4 |
| 121 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 122 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 123 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 124 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 125 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 126 | ret void |
| 127 | } |
| 128 | |
| 129 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame^] | 130 | ; GCN: s_mov_b32 s33, s7 |
| 131 | ; GCN: s_add_u32 s32, s33, 0xa00{{$}} |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 132 | |
| 133 | ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 |
| 134 | ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame^] | 135 | ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 |
| 136 | ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 137 | |
| 138 | ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} |
| 139 | |
Geoff Berry | 4e38e02 | 2017-08-17 04:04:11 +0000 | [diff] [blame^] | 140 | ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 |
| 141 | ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 |
| 142 | ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 |
| 143 | ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 144 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 145 | ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} |
| 146 | ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 |
| 147 | ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 |
| 148 | ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 149 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame] | 150 | ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 |
| 151 | ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 |
| 152 | ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 |
| 153 | ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 154 | |
Matt Arsenault | d1867c0 | 2017-08-02 00:59:51 +0000 | [diff] [blame] | 155 | ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 |
| 156 | ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 |
| 157 | ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 |
| 158 | ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 159 | |
| 160 | |
| 161 | ; GCN: s_swappc_b64 |
| 162 | ; FIXME: Dead SP modfication |
| 163 | ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} |
| 164 | ; GCN-NEXT: s_endpgm |
| 165 | define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { |
| 166 | entry: |
| 167 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 168 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 169 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 170 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 171 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 172 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 173 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 174 | store volatile i32 9, i32* %arrayidx, align 4 |
| 175 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 176 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 177 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 178 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 179 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 180 | ret void |
| 181 | } |
| 182 | |
| 183 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: |
| 184 | define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { |
| 185 | entry: |
| 186 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 187 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 188 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 189 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 190 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 191 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 192 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 193 | store volatile i32 9, i32* %arrayidx, align 4 |
| 194 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 195 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 196 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 197 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 198 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 199 | ret void |
| 200 | } |
| 201 | |
| 202 | declare void @external_void_func_void() #0 |
| 203 | |
| 204 | declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #3 |
| 205 | declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #3 |
| 206 | |
| 207 | attributes #0 = { nounwind } |
| 208 | attributes #1 = { noinline norecurse nounwind } |
| 209 | attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" } |