blob: a2ee2bad848c43d3a7c1ed32857e4394112d925d [file] [log] [blame]
Tony Tye7a893d42018-03-23 18:45:18 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s
Matt Arsenaulta2025382017-08-03 23:24:05 +00002; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
Matt Arsenault9166ce82017-07-28 15:52:08 +00003
4; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
5; GCN: enable_sgpr_kernarg_segment_ptr = 1
6
Tony Tye7a893d42018-03-23 18:45:18 +00007; HSA: kernarg_segment_byte_size = 0
Matt Arsenault9166ce82017-07-28 15:52:08 +00008; MESA: kernarg_segment_byte_size = 16
9
10; HSA: s_load_dword s0, s[4:5], 0x0
11define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +000012 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
13 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
14 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault9166ce82017-07-28 15:52:08 +000015 ret void
16}
17
Tony Tye7a893d42018-03-23 18:45:18 +000018; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
19; GCN: enable_sgpr_kernarg_segment_ptr = 1
20
21; HSA: kernarg_segment_byte_size = 32
22; MESA: kernarg_segment_byte_size = 16
23
24; HSA: s_load_dword s0, s[4:5], 0x0
25define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
26 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
27 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
28 %load = load volatile i32, i32 addrspace(4)* %cast
29 ret void
30}
31
Matt Arsenault9166ce82017-07-28 15:52:08 +000032; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
33; GCN: enable_sgpr_kernarg_segment_ptr = 1
34
Tony Tye7a893d42018-03-23 18:45:18 +000035; HSA: kernarg_segment_byte_size = 112
Matt Arsenault9166ce82017-07-28 15:52:08 +000036; MESA: kernarg_segment_byte_size = 464
37
38; HSA: s_load_dword s0, s[4:5], 0x1c
39define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +000040 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
41 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
42 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault9166ce82017-07-28 15:52:08 +000043 ret void
44}
45
Tony Tye7a893d42018-03-23 18:45:18 +000046; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
47; GCN: enable_sgpr_kernarg_segment_ptr = 1
48
49; HSA: kernarg_segment_byte_size = 144
50; MESA: kernarg_segment_byte_size = 464
51
52; HSA: s_load_dword s0, s[4:5], 0x1c
53define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
54 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
55 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
56 %load = load volatile i32, i32 addrspace(4)* %cast
57 ret void
58}
59
Matt Arsenault817c2532017-08-03 23:12:44 +000060; GCN-LABEL: {{^}}func_implicitarg_ptr:
61; GCN: s_waitcnt
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000062; MESA: s_mov_b64 s[8:9], s[6:7]
63; MESA: s_mov_b32 s11, 0xf000
64; MESA: s_mov_b32 s10, -1
65; MESA: buffer_load_dword v0, off, s[8:11], 0
66; HSA: v_mov_b32_e32 v0, s6
67; HSA: v_mov_b32_e32 v1, s7
68; HSA: flat_load_dword v0, v[0:1]
Matt Arsenault817c2532017-08-03 23:12:44 +000069; GCN-NEXT: s_waitcnt
70; GCN-NEXT: s_setpc_b64
Tony Tye7a893d42018-03-23 18:45:18 +000071define void @func_implicitarg_ptr() #0 {
72 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
73 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
74 %load = load volatile i32, i32 addrspace(4)* %cast
75 ret void
76}
77
78; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr:
79; GCN: s_waitcnt
80; MESA: s_mov_b64 s[8:9], s[6:7]
81; MESA: s_mov_b32 s11, 0xf000
82; MESA: s_mov_b32 s10, -1
83; MESA: buffer_load_dword v0, off, s[8:11], 0
84; HSA: v_mov_b32_e32 v0, s6
85; HSA: v_mov_b32_e32 v1, s7
86; HSA: flat_load_dword v0, v[0:1]
87; GCN-NEXT: s_waitcnt
88; GCN-NEXT: s_setpc_b64
89define void @opencl_func_implicitarg_ptr() #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +000090 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
91 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
92 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault817c2532017-08-03 23:12:44 +000093 ret void
94}
95
96; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
97; GCN: enable_sgpr_kernarg_segment_ptr = 1
Tony Tye7a893d42018-03-23 18:45:18 +000098; HSA: kernarg_segment_byte_size = 0
Matt Arsenault817c2532017-08-03 23:12:44 +000099; MESA: kernarg_segment_byte_size = 16
100; GCN: s_mov_b64 s[6:7], s[4:5]
101; GCN: s_swappc_b64
102define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
103 call void @func_implicitarg_ptr()
104 ret void
105}
106
Tony Tye7a893d42018-03-23 18:45:18 +0000107; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
108; GCN: enable_sgpr_kernarg_segment_ptr = 1
109; HSA: kernarg_segment_byte_size = 32
110; MESA: kernarg_segment_byte_size = 16
111; GCN: s_mov_b64 s[6:7], s[4:5]
112; GCN: s_swappc_b64
113define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
114 call void @func_implicitarg_ptr()
115 ret void
116}
117
Matt Arsenault817c2532017-08-03 23:12:44 +0000118; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
119; GCN: enable_sgpr_kernarg_segment_ptr = 1
Tony Tye7a893d42018-03-23 18:45:18 +0000120; HSA: kernarg_segment_byte_size = 112
Matt Arsenault817c2532017-08-03 23:12:44 +0000121; MESA: kernarg_segment_byte_size = 464
122
123; HSA: s_add_u32 s6, s4, 0x70
124; MESA: s_add_u32 s6, s4, 0x1c0
125
126; GCN: s_addc_u32 s7, s5, 0{{$}}
127; GCN: s_swappc_b64
128define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
129 call void @func_implicitarg_ptr()
130 ret void
131}
132
Tony Tye7a893d42018-03-23 18:45:18 +0000133; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
134; GCN: enable_sgpr_kernarg_segment_ptr = 1
135; HSA: kernarg_segment_byte_size = 144
136; MESA: kernarg_segment_byte_size = 464
137
138; HSA: s_add_u32 s6, s4, 0x70
139; MESA: s_add_u32 s6, s4, 0x1c0
140
141; GCN: s_addc_u32 s7, s5, 0{{$}}
142; GCN: s_swappc_b64
143define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
144 call void @func_implicitarg_ptr()
145 ret void
146}
147
Matt Arsenault817c2532017-08-03 23:12:44 +0000148; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
149; GCN-NOT: s6
150; GCN-NOT: s7
151; GCN-NOT: s[6:7]
Tony Tye7a893d42018-03-23 18:45:18 +0000152define void @func_call_implicitarg_ptr_func() #0 {
153 call void @func_implicitarg_ptr()
154 ret void
155}
156
157; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func:
158; GCN-NOT: s6
159; GCN-NOT: s7
160; GCN-NOT: s[6:7]
161define void @opencl_func_call_implicitarg_ptr_func() #0 {
Matt Arsenault817c2532017-08-03 23:12:44 +0000162 call void @func_implicitarg_ptr()
163 ret void
164}
165
166; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
167; GCN: s_waitcnt
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000168; MESA: s_mov_b64 s[12:13], s[6:7]
169; MESA: s_mov_b32 s15, 0xf000
170; MESA: s_mov_b32 s14, -1
171; MESA: buffer_load_dword v0, off, s[12:15], 0
172; HSA: v_mov_b32_e32 v0, s6
173; HSA: v_mov_b32_e32 v1, s7
174; HSA: flat_load_dword v0, v[0:1]
175; MESA: s_mov_b32 s10, s14
176; MESA: s_mov_b32 s11, s15
177; MESA: buffer_load_dword v0, off, s[8:11], 0
178; HSA: v_mov_b32_e32 v0, s8
179; HSA: v_mov_b32_e32 v1, s9
180; HSA: flat_load_dword v0, v[0:1]
181
182; GCN: s_waitcnt vmcnt(0)
Tony Tye7a893d42018-03-23 18:45:18 +0000183define void @func_kernarg_implicitarg_ptr() #0 {
184 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
185 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
186 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
187 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
188 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
189 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
190 ret void
191}
192
193; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr:
194; GCN: s_waitcnt
195; MESA: s_mov_b64 s[12:13], s[6:7]
196; MESA: s_mov_b32 s15, 0xf000
197; MESA: s_mov_b32 s14, -1
198; MESA: buffer_load_dword v0, off, s[12:15], 0
199; HSA: v_mov_b32_e32 v0, s6
200; HSA: v_mov_b32_e32 v1, s7
201; HSA: flat_load_dword v0, v[0:1]
202; MESA: s_mov_b32 s10, s14
203; MESA: s_mov_b32 s11, s15
204; MESA: buffer_load_dword v0, off, s[8:11], 0
205; HSA: v_mov_b32_e32 v0, s8
206; HSA: v_mov_b32_e32 v1, s9
207; HSA: flat_load_dword v0, v[0:1]
208
209; GCN: s_waitcnt vmcnt(0)
210define void @opencl_func_kernarg_implicitarg_ptr() #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +0000211 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
212 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
213 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
214 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
215 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
216 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
Matt Arsenault817c2532017-08-03 23:12:44 +0000217 ret void
218}
219
220; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
221; GCN: s_mov_b64 s[6:7], s[4:5]
222; HSA: s_add_u32 s8, s6, 0x70
223; MESA: s_add_u32 s8, s6, 0x1c0
224; GCN: s_addc_u32 s9, s7, 0
225; GCN: s_swappc_b64
226define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
227 call void @func_kernarg_implicitarg_ptr()
228 ret void
229}
230
Yaxun Liu0124b542018-02-13 18:00:25 +0000231declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
232declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
Matt Arsenault9166ce82017-07-28 15:52:08 +0000233
234attributes #0 = { nounwind noinline }
Tony Tye7a893d42018-03-23 18:45:18 +0000235attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
Matt Arsenault9166ce82017-07-28 15:52:08 +0000236attributes #2 = { nounwind readnone speculatable }