Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s |
| 2 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s |
| 3 | |
| 4 | %struct.ByValStruct = type { [4 x i32] } |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}void_func_byval_struct: |
| 7 | ; GCN: s_mov_b32 s5, s32 |
| 8 | ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}} |
| 9 | ; GCN-NOT: s32 |
| 10 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5{{$}} |
| 11 | ; GCN-NOT: s32 |
| 12 | |
| 13 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}} |
| 14 | ; GCN-NOT: s32 |
| 15 | ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:16{{$}} |
| 16 | ; GCN-NOT: s32 |
| 17 | define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { |
| 18 | entry: |
| 19 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 20 | %tmp = load volatile i32, i32* %arrayidx, align 4 |
| 21 | %add = add nsw i32 %tmp, 1 |
| 22 | store volatile i32 %add, i32* %arrayidx, align 4 |
| 23 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 24 | %tmp1 = load volatile i32, i32* %arrayidx2, align 4 |
| 25 | %add3 = add nsw i32 %tmp1, 2 |
| 26 | store volatile i32 %add3, i32* %arrayidx2, align 4 |
| 27 | store volatile i32 9, i32 addrspace(1)* null, align 4 |
| 28 | ret void |
| 29 | } |
| 30 | |
| 31 | ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: |
| 32 | ; GCN: s_mov_b32 s5, s32 |
| 33 | ; GCN: buffer_store_dword v32 |
| 34 | ; GCN: v_writelane_b32 |
| 35 | |
| 36 | ; GCN-DAG: s_add_u32 s32, s32, 0x900{{$}} |
| 37 | |
| 38 | ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}} |
| 39 | ; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] |
| 40 | ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5{{$}} |
| 41 | |
| 42 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}} |
| 43 | ; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] |
| 44 | |
| 45 | ; GCN: s_swappc_b64 |
| 46 | |
| 47 | ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:16{{$}} |
| 48 | |
| 49 | ; GCN: v_readlane_b32 |
| 50 | ; GCN: buffer_load_dword v32, |
| 51 | ; GCN: s_sub_u32 s32, s32, 0x900{{$}} |
| 52 | ; GCN: s_setpc_b64 |
| 53 | define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { |
| 54 | entry: |
| 55 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 56 | %tmp = load volatile i32, i32* %arrayidx, align 4 |
| 57 | %add = add nsw i32 %tmp, 1 |
| 58 | store volatile i32 %add, i32* %arrayidx, align 4 |
| 59 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 60 | %tmp1 = load volatile i32, i32* %arrayidx2, align 4 |
| 61 | %add3 = add nsw i32 %tmp1, 2 |
| 62 | call void @external_void_func_void() |
| 63 | store volatile i32 %add3, i32* %arrayidx2, align 4 |
| 64 | store volatile i32 9, i32 addrspace(1)* null, align 4 |
| 65 | ret void |
| 66 | } |
| 67 | |
| 68 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: |
| 69 | ; GCN: s_mov_b32 s5, s32 |
| 70 | ; GCN: s_add_u32 s32, s32, 0xa00{{$}} |
| 71 | ; GCN: v_writelane_b32 |
| 72 | |
| 73 | ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} |
| 74 | ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 |
| 75 | ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 |
| 76 | |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 77 | ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 |
| 78 | ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 |
| 79 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 80 | ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 |
| 81 | ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 |
| 82 | ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 |
| 83 | ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 84 | |
| 85 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 86 | ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} |
| 87 | ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 |
| 88 | ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 |
| 89 | ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 90 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 91 | ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 |
| 92 | ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 |
| 93 | ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 |
| 94 | ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 95 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 96 | ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 |
| 97 | ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 |
| 98 | ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 |
| 99 | ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 100 | |
| 101 | ; GCN: s_swappc_b64 |
| 102 | ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} |
| 103 | |
| 104 | ; GCN: v_readlane_b32 |
| 105 | |
| 106 | ; GCN: s_sub_u32 s32, s32, 0xa00{{$}} |
| 107 | ; GCN-NEXT: s_waitcnt |
| 108 | ; GCN-NEXT: s_setpc_b64 |
| 109 | define void @call_void_func_byval_struct_func() #0 { |
| 110 | entry: |
| 111 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 112 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 113 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 114 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 115 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 116 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 117 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 118 | store volatile i32 9, i32* %arrayidx, align 4 |
| 119 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 120 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 121 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 122 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 123 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 124 | ret void |
| 125 | } |
| 126 | |
| 127 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: |
| 128 | ; GCN: s_mov_b32 s33, s7 |
| 129 | ; GCN: s_add_u32 s32, s33, 0xa00{{$}} |
| 130 | |
| 131 | ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 |
| 132 | ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 |
| 133 | ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 |
| 134 | ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 |
| 135 | |
| 136 | ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} |
| 137 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 138 | ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 |
| 139 | ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 |
| 140 | ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 |
| 141 | ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 142 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 143 | ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} |
| 144 | ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 |
| 145 | ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 |
| 146 | ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 147 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 148 | ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 |
| 149 | ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 |
| 150 | ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 |
| 151 | ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 152 | |
Matt Arsenault | acc5e82 | 2017-08-02 00:43:42 +0000 | [diff] [blame^] | 153 | ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 |
| 154 | ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 |
| 155 | ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 |
| 156 | ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 |
Matt Arsenault | b62a4eb | 2017-08-01 19:54:18 +0000 | [diff] [blame] | 157 | |
| 158 | |
| 159 | ; GCN: s_swappc_b64 |
| 160 | ; FIXME: Dead SP modfication |
| 161 | ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} |
| 162 | ; GCN-NEXT: s_endpgm |
| 163 | define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { |
| 164 | entry: |
| 165 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 166 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 167 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 168 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 169 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 170 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 171 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 172 | store volatile i32 9, i32* %arrayidx, align 4 |
| 173 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 174 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 175 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 176 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 177 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 178 | ret void |
| 179 | } |
| 180 | |
| 181 | ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: |
| 182 | define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { |
| 183 | entry: |
| 184 | %arg0 = alloca %struct.ByValStruct, align 4 |
| 185 | %arg1 = alloca %struct.ByValStruct, align 4 |
| 186 | %tmp = bitcast %struct.ByValStruct* %arg0 to i8* |
| 187 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) |
| 188 | %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* |
| 189 | call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) |
| 190 | %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 |
| 191 | store volatile i32 9, i32* %arrayidx, align 4 |
| 192 | %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 |
| 193 | store volatile i32 13, i32* %arrayidx2, align 4 |
| 194 | call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) |
| 195 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) |
| 196 | call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) |
| 197 | ret void |
| 198 | } |
| 199 | |
| 200 | declare void @external_void_func_void() #0 |
| 201 | |
| 202 | declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #3 |
| 203 | declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #3 |
| 204 | |
| 205 | attributes #0 = { nounwind } |
| 206 | attributes #1 = { noinline norecurse nounwind } |
| 207 | attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" } |