blob: b5548ab13ab605e1c9b7422da5f7b35e665f6c79 [file] [log] [blame]
Konstantin Zhuravlyov2d22d242018-10-30 22:02:40 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenault8623e8d2017-08-03 23:00:29 +00003
4; GCN-LABEL: {{^}}use_dispatch_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +00005; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
6; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
7; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +00008define void @use_dispatch_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +00009 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
11 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000012 ret void
13}
14
15; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
16; GCN: enable_sgpr_dispatch_ptr = 1
17; GCN: s_mov_b64 s[6:7], s[4:5]
18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
19 call void @use_dispatch_ptr()
20 ret void
21}
22
23; GCN-LABEL: {{^}}use_queue_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000024; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
25; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
26; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000027define void @use_queue_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +000028 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
29 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
30 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000031 ret void
32}
33
34; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
35; GCN: enable_sgpr_queue_ptr = 1
36; GCN: s_mov_b64 s[6:7], s[4:5]
37; GCN: s_swappc_b64
38define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
39 call void @use_queue_ptr()
40 ret void
41}
42
43; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000044; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
Matt Arsenault8623e8d2017-08-03 23:00:29 +000045; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000046; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
47; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
48; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
49; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000050define void @use_queue_ptr_addrspacecast() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000051 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
52 store volatile i32 0, i32* %asc
Matt Arsenault8623e8d2017-08-03 23:00:29 +000053 ret void
54}
55
56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
57; CIVI: enable_sgpr_queue_ptr = 1
58
59; CIVI: s_mov_b64 s[6:7], s[4:5]
60; GFX9-NOT: s_mov_b64
61; GCN: s_swappc_b64
62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
63 call void @use_queue_ptr_addrspacecast()
64 ret void
65}
66
67; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000068; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
69; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
70; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000071define void @use_kernarg_segment_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +000072 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
73 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
74 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000075 ret void
76}
77
78; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
79; GCN: enable_sgpr_kernarg_segment_ptr = 1
80; GCN: s_mov_b64 s[6:7], s[4:5]
81; GCN: s_swappc_b64
82define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
83 call void @use_kernarg_segment_ptr()
84 ret void
85}
86
87; GCN-LABEL: {{^}}use_dispatch_id:
88; GCN: ; use s[6:7]
89define void @use_dispatch_id() #1 {
90 %id = call i64 @llvm.amdgcn.dispatch.id()
91 call void asm sideeffect "; use $0", "s"(i64 %id)
92 ret void
93}
94
95; No kernarg segment so that there is a mov to check. With kernarg
96; pointer enabled, it happens to end up in the right place anyway.
97
98; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
99; GCN: enable_sgpr_dispatch_id = 1
100
101; GCN: s_mov_b64 s[6:7], s[4:5]
102define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
103 call void @use_dispatch_id()
104 ret void
105}
106
107; GCN-LABEL: {{^}}use_workgroup_id_x:
108; GCN: s_waitcnt
109; GCN: ; use s6
110define void @use_workgroup_id_x() #1 {
111 %val = call i32 @llvm.amdgcn.workgroup.id.x()
112 call void asm sideeffect "; use $0", "s"(i32 %val)
113 ret void
114}
115
116; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
117; GCN: s_waitcnt
118; GCN: s_mov_b32 s5, s32
119; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
120; GCN: ; use s6
121; GCN: s_setpc_b64
122define void @use_stack_workgroup_id_x() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000123 %alloca = alloca i32, addrspace(5)
124 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000125 %val = call i32 @llvm.amdgcn.workgroup.id.x()
126 call void asm sideeffect "; use $0", "s"(i32 %val)
127 ret void
128}
129
130; GCN-LABEL: {{^}}use_workgroup_id_y:
131; GCN: s_waitcnt
132; GCN: ; use s6
133define void @use_workgroup_id_y() #1 {
134 %val = call i32 @llvm.amdgcn.workgroup.id.y()
135 call void asm sideeffect "; use $0", "s"(i32 %val)
136 ret void
137}
138
139; GCN-LABEL: {{^}}use_workgroup_id_z:
140; GCN: s_waitcnt
141; GCN: ; use s6
142define void @use_workgroup_id_z() #1 {
143 %val = call i32 @llvm.amdgcn.workgroup.id.z()
144 call void asm sideeffect "; use $0", "s"(i32 %val)
145 ret void
146}
147
148; GCN-LABEL: {{^}}use_workgroup_id_xy:
149; GCN: ; use s6
150; GCN: ; use s7
151define void @use_workgroup_id_xy() #1 {
152 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
153 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
154 call void asm sideeffect "; use $0", "s"(i32 %val0)
155 call void asm sideeffect "; use $0", "s"(i32 %val1)
156 ret void
157}
158
159; GCN-LABEL: {{^}}use_workgroup_id_xyz:
160; GCN: ; use s6
161; GCN: ; use s7
162; GCN: ; use s8
163define void @use_workgroup_id_xyz() #1 {
164 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
165 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
166 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
167 call void asm sideeffect "; use $0", "s"(i32 %val0)
168 call void asm sideeffect "; use $0", "s"(i32 %val1)
169 call void asm sideeffect "; use $0", "s"(i32 %val2)
170 ret void
171}
172
173; GCN-LABEL: {{^}}use_workgroup_id_xz:
174; GCN: ; use s6
175; GCN: ; use s7
176define void @use_workgroup_id_xz() #1 {
177 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
178 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
179 call void asm sideeffect "; use $0", "s"(i32 %val0)
180 call void asm sideeffect "; use $0", "s"(i32 %val1)
181 ret void
182}
183
184; GCN-LABEL: {{^}}use_workgroup_id_yz:
185; GCN: ; use s6
186; GCN: ; use s7
187define void @use_workgroup_id_yz() #1 {
188 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
189 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
190 call void asm sideeffect "; use $0", "s"(i32 %val0)
191 call void asm sideeffect "; use $0", "s"(i32 %val1)
192 ret void
193}
194
195; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
196; GCN: enable_sgpr_workgroup_id_x = 1
197; GCN: enable_sgpr_workgroup_id_y = 0
198; GCN: enable_sgpr_workgroup_id_z = 0
199
200; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000201; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000202; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000203; GCN: s_mov_b32 s4, s33
204; GCN-NOT: s6
205; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000206; GCN: s_swappc_b64
207define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
208 call void @use_workgroup_id_x()
209 ret void
210}
211
212; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
213; GCN: enable_sgpr_workgroup_id_x = 1
214; GCN: enable_sgpr_workgroup_id_y = 1
215; GCN: enable_sgpr_workgroup_id_z = 0
216
217; GCN: s_mov_b32 s33, s8
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000218; GCN-DAG: s_mov_b32 s4, s33
219; GCN-DAG: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000220; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000221; GCN: s_swappc_b64
222define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
223 call void @use_workgroup_id_y()
224 ret void
225}
226
227; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
228; GCN: enable_sgpr_workgroup_id_x = 1
229; GCN: enable_sgpr_workgroup_id_y = 0
230; GCN: enable_sgpr_workgroup_id_z = 1
231
232; GCN: s_mov_b32 s33, s8
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000233; GCN-DAG: s_mov_b32 s4, s33
234; GCN-DAG: s_mov_b32 s6, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000235; GCN: s_swappc_b64
236define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
237 call void @use_workgroup_id_z()
238 ret void
239}
240
241; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
242; GCN: enable_sgpr_workgroup_id_x = 1
243; GCN: enable_sgpr_workgroup_id_y = 1
244; GCN: enable_sgpr_workgroup_id_z = 0
245
246; GCN: s_mov_b32 s33, s8
247; GCN-NOT: s6
248; GCN-NOT: s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000249; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000250; GCN-NOT: s6
251; GCN-NOT: s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000252; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000253; GCN-NOT: s6
254; GCN-NOT: s7
255; GCN: s_swappc_b64
256define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
257 call void @use_workgroup_id_xy()
258 ret void
259}
260
261; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
262; GCN: enable_sgpr_workgroup_id_x = 1
263; GCN: enable_sgpr_workgroup_id_y = 1
264; GCN: enable_sgpr_workgroup_id_z = 1
265
Geoff Berry4e38e022017-08-17 04:04:11 +0000266; GCN: s_mov_b32 s33, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000267
268; GCN-NOT: s6
269; GCN-NOT: s7
270; GCN-NOT: s8
271
Geoff Berry4e38e022017-08-17 04:04:11 +0000272; GCN: s_mov_b32 s4, s33
273
274; GCN-NOT: s6
275; GCN-NOT: s7
276; GCN-NOT: s8
277
278; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000279
280; GCN-NOT: s6
281; GCN-NOT: s7
282; GCN-NOT: s8
283
284; GCN: s_swappc_b64
285define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
286 call void @use_workgroup_id_xyz()
287 ret void
288}
289
290; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
291; GCN: enable_sgpr_workgroup_id_x = 1
292; GCN: enable_sgpr_workgroup_id_y = 0
293; GCN: enable_sgpr_workgroup_id_z = 1
294
295; GCN: s_mov_b32 s33, s8
296; GCN-NOT: s6
297; GCN-NOT: s7
298
Geoff Berry4e38e022017-08-17 04:04:11 +0000299; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000300; GCN-NOT: s6
301; GCN-NOT: s7
302
Geoff Berry4e38e022017-08-17 04:04:11 +0000303; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000304; GCN-NOT: s6
305; GCN-NOT: s7
306
307; GCN: s_swappc_b64
308define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
309 call void @use_workgroup_id_xz()
310 ret void
311}
312
313; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
314; GCN: enable_sgpr_workgroup_id_x = 1
315; GCN: enable_sgpr_workgroup_id_y = 1
316; GCN: enable_sgpr_workgroup_id_z = 1
317
Geoff Berry4e38e022017-08-17 04:04:11 +0000318; GCN: s_mov_b32 s33, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000319; GCN: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000320; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000321; GCN: s_mov_b32 s7, s8
Geoff Berry4e38e022017-08-17 04:04:11 +0000322; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000323; GCN: s_swappc_b64
324define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
325 call void @use_workgroup_id_yz()
326 ret void
327}
328
329; Argument is in right place already
330; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
331; GCN-NOT: s6
332define void @func_indirect_use_workgroup_id_x() #1 {
333 call void @use_workgroup_id_x()
334 ret void
335}
336
337; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
338; GCN-NOT: s6
339define void @func_indirect_use_workgroup_id_y() #1 {
340 call void @use_workgroup_id_y()
341 ret void
342}
343
344; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
345; GCN-NOT: s6
346define void @func_indirect_use_workgroup_id_z() #1 {
347 call void @use_workgroup_id_z()
348 ret void
349}
350
351; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
352; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
353; GCN: ; use s6
354define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
355 %val = call i32 @llvm.amdgcn.workgroup.id.x()
356 store volatile i32 %arg0, i32 addrspace(1)* undef
357 call void asm sideeffect "; use $0", "s"(i32 %val)
358 ret void
359}
360
361; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
362; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
363; GCN: ; use s6
364define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
365 %val = call i32 @llvm.amdgcn.workgroup.id.y()
366 store volatile i32 %arg0, i32 addrspace(1)* undef
367 call void asm sideeffect "; use $0", "s"(i32 %val)
368 ret void
369}
370
371; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
372; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
373; GCN: ; use s6
374define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
375 %val = call i32 @llvm.amdgcn.workgroup.id.z()
376 store volatile i32 %arg0, i32 addrspace(1)* undef
377 call void asm sideeffect "; use $0", "s"(i32 %val)
378 ret void
379}
380
381; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
382; GCN: enable_sgpr_workgroup_id_x = 1
383; GCN: enable_sgpr_workgroup_id_y = 0
384; GCN: enable_sgpr_workgroup_id_z = 0
385
Nirav Dave1241dcb2018-11-14 21:11:53 +0000386; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000387; GCN-DAG: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000388; GCN-DAG: v_mov_b32_e32 v0, 0x22b
Nirav Dave1241dcb2018-11-14 21:11:53 +0000389; GCN-DAG: s_mov_b32 s4, s33
Geoff Berry4e38e022017-08-17 04:04:11 +0000390; GCN-DAG: s_mov_b32 s32, s33
Nirav Dave1241dcb2018-11-14 21:11:53 +0000391; GCN-NOT: s6
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000392; GCN: s_swappc_b64
393define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
394 call void @other_arg_use_workgroup_id_x(i32 555)
395 ret void
396}
397
398; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
399; GCN: enable_sgpr_workgroup_id_x = 1
400; GCN: enable_sgpr_workgroup_id_y = 1
401; GCN: enable_sgpr_workgroup_id_z = 0
402
403; GCN-DAG: s_mov_b32 s33, s8
404; GCN-DAG: v_mov_b32_e32 v0, 0x22b
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000405; GCN-DAG: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000406; GCN-DAG: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000407; GCN-DAG: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000408; GCN: s_swappc_b64
409define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
410 call void @other_arg_use_workgroup_id_y(i32 555)
411 ret void
412}
413
414; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
415; GCN: enable_sgpr_workgroup_id_x = 1
416; GCN: enable_sgpr_workgroup_id_y = 0
417; GCN: enable_sgpr_workgroup_id_z = 1
418
419; GCN: s_mov_b32 s33, s8
420; GCN-DAG: v_mov_b32_e32 v0, 0x22b
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000421; GCN-DAG: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000422; GCN-DAG: s_mov_b32 s6, s7
423
Geoff Berry4e38e022017-08-17 04:04:11 +0000424; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000425; GCN: s_swappc_b64
426define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
427 call void @other_arg_use_workgroup_id_z(i32 555)
428 ret void
429}
430
431; GCN-LABEL: {{^}}use_every_sgpr_input:
432; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000433; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
434; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
435; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
436; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
437; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
438; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
439; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
440; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
441; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000442; GCN: ; use s[12:13]
443; GCN: ; use s14
444; GCN: ; use s15
445; GCN: ; use s16
446define void @use_every_sgpr_input() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000447 %alloca = alloca i32, align 4, addrspace(5)
448 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000449
Yaxun Liu0124b542018-02-13 18:00:25 +0000450 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
451 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
452 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000453
Yaxun Liu0124b542018-02-13 18:00:25 +0000454 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
455 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
456 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000457
Yaxun Liu0124b542018-02-13 18:00:25 +0000458 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
459 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
460 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000461
462 %val3 = call i64 @llvm.amdgcn.dispatch.id()
463 call void asm sideeffect "; use $0", "s"(i64 %val3)
464
465 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
466 call void asm sideeffect "; use $0", "s"(i32 %val4)
467
468 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
469 call void asm sideeffect "; use $0", "s"(i32 %val5)
470
471 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
472 call void asm sideeffect "; use $0", "s"(i32 %val6)
473
474 ret void
475}
476
477; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
478; GCN: enable_sgpr_workgroup_id_x = 1
479; GCN: enable_sgpr_workgroup_id_y = 1
480; GCN: enable_sgpr_workgroup_id_z = 1
481; GCN: enable_sgpr_workgroup_info = 0
482
483; GCN: enable_sgpr_private_segment_buffer = 1
484; GCN: enable_sgpr_dispatch_ptr = 1
485; GCN: enable_sgpr_queue_ptr = 1
486; GCN: enable_sgpr_kernarg_segment_ptr = 1
487; GCN: enable_sgpr_dispatch_id = 1
488; GCN: enable_sgpr_flat_scratch_init = 1
489
Geoff Berry4e38e022017-08-17 04:04:11 +0000490; GCN: s_mov_b32 s33, s17
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000491; GCN: s_mov_b64 s[12:13], s[10:11]
492; GCN: s_mov_b64 s[10:11], s[8:9]
493; GCN: s_mov_b64 s[8:9], s[6:7]
494; GCN: s_mov_b64 s[6:7], s[4:5]
Geoff Berry4e38e022017-08-17 04:04:11 +0000495; GCN: s_mov_b32 s4, s33
496; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000497; GCN: s_swappc_b64
498define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
499 call void @use_every_sgpr_input()
500 ret void
501}
502
503; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
504; GCN-NOT: s6
505; GCN-NOT: s7
506; GCN-NOT: s8
507; GCN-NOT: s9
508; GCN-NOT: s10
509; GCN-NOT: s11
510; GCN-NOT: s12
511; GCN-NOT: s13
512; GCN-NOT: s[6:7]
513; GCN-NOT: s[8:9]
514; GCN-NOT: s[10:11]
515; GCN-NOT: s[12:13]
516define void @func_indirect_use_every_sgpr_input() #1 {
517 call void @use_every_sgpr_input()
518 ret void
519}
520
521; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
522; GCN-DAG: s_mov_b32 s6, s14
523; GCN-DAG: s_mov_b32 s7, s15
524; GCN-DAG: s_mov_b32 s8, s16
525; GCN: s_swappc_b64
526define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000527 %alloca = alloca i32, align 4, addrspace(5)
528 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000529
Yaxun Liu0124b542018-02-13 18:00:25 +0000530 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
531 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
532 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000533
Yaxun Liu0124b542018-02-13 18:00:25 +0000534 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
535 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
536 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000537
Yaxun Liu0124b542018-02-13 18:00:25 +0000538 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
539 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
540 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000541
542 %val3 = call i64 @llvm.amdgcn.dispatch.id()
543 call void asm sideeffect "; use $0", "s"(i64 %val3)
544
545 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
546 call void asm sideeffect "; use $0", "s"(i32 %val4)
547
548 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
549 call void asm sideeffect "; use $0", "s"(i32 %val5)
550
551 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
552 call void asm sideeffect "; use $0", "s"(i32 %val6)
553
554 call void @use_workgroup_id_xyz()
555 ret void
556}
557
558; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
559; GCN: s_mov_b32 s5, s32
Jonas Paulsson611b5332018-10-30 15:04:40 +0000560
561; GCN-DAG: s_add_u32 s32, s32, 0x400
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000562
Geoff Berrya2b90112018-02-27 16:59:10 +0000563; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
564; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
565; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
Geoff Berry4e38e022017-08-17 04:04:11 +0000566; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
567; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
568; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000569
Geoff Berrya2b90112018-02-27 16:59:10 +0000570; GCN-DAG: s_mov_b32 s6, s14
571; GCN-DAG: s_mov_b32 s7, s15
572; GCN-DAG: s_mov_b32 s8, s16
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000573
574; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
575; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
576; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
577
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000578; GCN: s_swappc_b64
579
Nirav Dave1241dcb2018-11-14 21:11:53 +0000580; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
581; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]]
582; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]]
583; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}}
584; GCN-DAG: v_mov_b32_e32 v[[LO2:[0-9]+]], s[[LO_Y]]
585; GCN-DAG: v_mov_b32_e32 v[[HI2:[0-9]+]], s[[HI_Y]]
586; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO2]]:[[HI2]]{{\]}}
587; GCN-DAG: v_mov_b32_e32 v[[LO3:[0-9]+]], s[[LO_Z]]
588; GCN-DAG: v_mov_b32_e32 v[[HI3:[0-9]+]], s[[HI_Z]]
589; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO3]]:[[HI3]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000590; GCN: ; use
591; GCN: ; use [[SAVE_X]]
592; GCN: ; use [[SAVE_Y]]
593; GCN: ; use [[SAVE_Z]]
594define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000595 %alloca = alloca i32, align 4, addrspace(5)
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000596 call void @use_workgroup_id_xyz()
597
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000598 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000599
Yaxun Liu0124b542018-02-13 18:00:25 +0000600 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
601 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
602 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000603
Yaxun Liu0124b542018-02-13 18:00:25 +0000604 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
605 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
606 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000607
Yaxun Liu0124b542018-02-13 18:00:25 +0000608 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
609 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
610 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000611
612 %val3 = call i64 @llvm.amdgcn.dispatch.id()
613 call void asm sideeffect "; use $0", "s"(i64 %val3)
614
615 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
616 call void asm sideeffect "; use $0", "s"(i32 %val4)
617
618 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
619 call void asm sideeffect "; use $0", "s"(i32 %val5)
620
621 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
622 call void asm sideeffect "; use $0", "s"(i32 %val6)
623
624 ret void
625}
626
627declare i32 @llvm.amdgcn.workgroup.id.x() #0
628declare i32 @llvm.amdgcn.workgroup.id.y() #0
629declare i32 @llvm.amdgcn.workgroup.id.z() #0
Yaxun Liu0124b542018-02-13 18:00:25 +0000630declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
631declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000632declare i64 @llvm.amdgcn.dispatch.id() #0
Yaxun Liu0124b542018-02-13 18:00:25 +0000633declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000634
635attributes #0 = { nounwind readnone speculatable }
636attributes #1 = { nounwind noinline }