blob: 2dc1fa2dd9fa2dd37b1774eeb36045438ee4cdbd [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
Matt Arsenaulta2025382017-08-03 23:24:05 +00002; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenault8623e8d2017-08-03 23:00:29 +00003
4; GCN-LABEL: {{^}}use_dispatch_ptr:
5; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
6define void @use_dispatch_ptr() #1 {
7 %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
8 %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
9 %value = load volatile i32, i32 addrspace(2)* %header_ptr
10 ret void
11}
12
13; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
14; GCN: enable_sgpr_dispatch_ptr = 1
15; GCN: s_mov_b64 s[6:7], s[4:5]
16define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
17 call void @use_dispatch_ptr()
18 ret void
19}
20
21; GCN-LABEL: {{^}}use_queue_ptr:
22; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
23define void @use_queue_ptr() #1 {
24 %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
25 %header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
26 %value = load volatile i32, i32 addrspace(2)* %header_ptr
27 ret void
28}
29
30; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
31; GCN: enable_sgpr_queue_ptr = 1
32; GCN: s_mov_b64 s[6:7], s[4:5]
33; GCN: s_swappc_b64
34define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
35 call void @use_queue_ptr()
36 ret void
37}
38
39; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
40; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10
41; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
42
43; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
44; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
45define void @use_queue_ptr_addrspacecast() #1 {
46 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32 addrspace(4)*
47 store volatile i32 0, i32 addrspace(4)* %asc
48 ret void
49}
50
51; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
52; CIVI: enable_sgpr_queue_ptr = 1
53
54; CIVI: s_mov_b64 s[6:7], s[4:5]
55; GFX9-NOT: s_mov_b64
56; GCN: s_swappc_b64
57define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
58 call void @use_queue_ptr_addrspacecast()
59 ret void
60}
61
62; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
63; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
64define void @use_kernarg_segment_ptr() #1 {
65 %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
66 %header_ptr = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
67 %value = load volatile i32, i32 addrspace(2)* %header_ptr
68 ret void
69}
70
71; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
72; GCN: enable_sgpr_kernarg_segment_ptr = 1
73; GCN: s_mov_b64 s[6:7], s[4:5]
74; GCN: s_swappc_b64
75define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
76 call void @use_kernarg_segment_ptr()
77 ret void
78}
79
80; GCN-LABEL: {{^}}use_dispatch_id:
81; GCN: ; use s[6:7]
82define void @use_dispatch_id() #1 {
83 %id = call i64 @llvm.amdgcn.dispatch.id()
84 call void asm sideeffect "; use $0", "s"(i64 %id)
85 ret void
86}
87
88; No kernarg segment so that there is a mov to check. With kernarg
89; pointer enabled, it happens to end up in the right place anyway.
90
91; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
92; GCN: enable_sgpr_dispatch_id = 1
93
94; GCN: s_mov_b64 s[6:7], s[4:5]
95define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
96 call void @use_dispatch_id()
97 ret void
98}
99
100; GCN-LABEL: {{^}}use_workgroup_id_x:
101; GCN: s_waitcnt
102; GCN: ; use s6
103define void @use_workgroup_id_x() #1 {
104 %val = call i32 @llvm.amdgcn.workgroup.id.x()
105 call void asm sideeffect "; use $0", "s"(i32 %val)
106 ret void
107}
108
109; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
110; GCN: s_waitcnt
111; GCN: s_mov_b32 s5, s32
112; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
113; GCN: ; use s6
114; GCN: s_setpc_b64
115define void @use_stack_workgroup_id_x() #1 {
116 %alloca = alloca i32
117 store volatile i32 0, i32* %alloca
118 %val = call i32 @llvm.amdgcn.workgroup.id.x()
119 call void asm sideeffect "; use $0", "s"(i32 %val)
120 ret void
121}
122
123; GCN-LABEL: {{^}}use_workgroup_id_y:
124; GCN: s_waitcnt
125; GCN: ; use s6
126define void @use_workgroup_id_y() #1 {
127 %val = call i32 @llvm.amdgcn.workgroup.id.y()
128 call void asm sideeffect "; use $0", "s"(i32 %val)
129 ret void
130}
131
132; GCN-LABEL: {{^}}use_workgroup_id_z:
133; GCN: s_waitcnt
134; GCN: ; use s6
135define void @use_workgroup_id_z() #1 {
136 %val = call i32 @llvm.amdgcn.workgroup.id.z()
137 call void asm sideeffect "; use $0", "s"(i32 %val)
138 ret void
139}
140
141; GCN-LABEL: {{^}}use_workgroup_id_xy:
142; GCN: ; use s6
143; GCN: ; use s7
144define void @use_workgroup_id_xy() #1 {
145 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
146 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
147 call void asm sideeffect "; use $0", "s"(i32 %val0)
148 call void asm sideeffect "; use $0", "s"(i32 %val1)
149 ret void
150}
151
152; GCN-LABEL: {{^}}use_workgroup_id_xyz:
153; GCN: ; use s6
154; GCN: ; use s7
155; GCN: ; use s8
156define void @use_workgroup_id_xyz() #1 {
157 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
158 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
159 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
160 call void asm sideeffect "; use $0", "s"(i32 %val0)
161 call void asm sideeffect "; use $0", "s"(i32 %val1)
162 call void asm sideeffect "; use $0", "s"(i32 %val2)
163 ret void
164}
165
166; GCN-LABEL: {{^}}use_workgroup_id_xz:
167; GCN: ; use s6
168; GCN: ; use s7
169define void @use_workgroup_id_xz() #1 {
170 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
171 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
172 call void asm sideeffect "; use $0", "s"(i32 %val0)
173 call void asm sideeffect "; use $0", "s"(i32 %val1)
174 ret void
175}
176
177; GCN-LABEL: {{^}}use_workgroup_id_yz:
178; GCN: ; use s6
179; GCN: ; use s7
180define void @use_workgroup_id_yz() #1 {
181 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
182 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
183 call void asm sideeffect "; use $0", "s"(i32 %val0)
184 call void asm sideeffect "; use $0", "s"(i32 %val1)
185 ret void
186}
187
188; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
189; GCN: enable_sgpr_workgroup_id_x = 1
190; GCN: enable_sgpr_workgroup_id_y = 0
191; GCN: enable_sgpr_workgroup_id_z = 0
192
193; GCN-NOT: s6
Geoff Berry87f8d252017-08-16 20:50:01 +0000194; GCN: s_mov_b32 s4, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000195; GCN-NOT: s6
Geoff Berry87f8d252017-08-16 20:50:01 +0000196; GCN: s_mov_b32 s32, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000197; GCN: s_swappc_b64
198define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
199 call void @use_workgroup_id_x()
200 ret void
201}
202
203; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
204; GCN: enable_sgpr_workgroup_id_x = 1
205; GCN: enable_sgpr_workgroup_id_y = 1
206; GCN: enable_sgpr_workgroup_id_z = 0
207
208; GCN: s_mov_b32 s33, s8
Geoff Berry87f8d252017-08-16 20:50:01 +0000209; GCN: s_mov_b32 s32, s8
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000210; GCN: s_mov_b32 s4, s33
211; GCN: s_mov_b32 s6, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000212; GCN: s_swappc_b64
213define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
214 call void @use_workgroup_id_y()
215 ret void
216}
217
218; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
219; GCN: enable_sgpr_workgroup_id_x = 1
220; GCN: enable_sgpr_workgroup_id_y = 0
221; GCN: enable_sgpr_workgroup_id_z = 1
222
223; GCN: s_mov_b32 s33, s8
224; GCN: s_mov_b32 s4, s33
225; GCN: s_mov_b32 s6, s7
226; GCN: s_swappc_b64
227define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
228 call void @use_workgroup_id_z()
229 ret void
230}
231
232; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
233; GCN: enable_sgpr_workgroup_id_x = 1
234; GCN: enable_sgpr_workgroup_id_y = 1
235; GCN: enable_sgpr_workgroup_id_z = 0
236
237; GCN: s_mov_b32 s33, s8
238; GCN-NOT: s6
239; GCN-NOT: s7
Geoff Berry87f8d252017-08-16 20:50:01 +0000240; GCN: s_mov_b32 s32, s8
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000241; GCN-NOT: s6
242; GCN-NOT: s7
Geoff Berry87f8d252017-08-16 20:50:01 +0000243; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000244; GCN-NOT: s6
245; GCN-NOT: s7
246; GCN: s_swappc_b64
247define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
248 call void @use_workgroup_id_xy()
249 ret void
250}
251
252; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
253; GCN: enable_sgpr_workgroup_id_x = 1
254; GCN: enable_sgpr_workgroup_id_y = 1
255; GCN: enable_sgpr_workgroup_id_z = 1
256
Geoff Berry87f8d252017-08-16 20:50:01 +0000257; GCN-NOT: s6
258; GCN-NOT: s7
259; GCN-NOT: s8
260
261; GCN: s_mov_b32 s4, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000262
263; GCN-NOT: s6
264; GCN-NOT: s7
265; GCN-NOT: s8
266
Geoff Berry87f8d252017-08-16 20:50:01 +0000267; GCN: s_mov_b32 s32, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000268
269; GCN-NOT: s6
270; GCN-NOT: s7
271; GCN-NOT: s8
272
273; GCN: s_swappc_b64
274define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
275 call void @use_workgroup_id_xyz()
276 ret void
277}
278
279; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
280; GCN: enable_sgpr_workgroup_id_x = 1
281; GCN: enable_sgpr_workgroup_id_y = 0
282; GCN: enable_sgpr_workgroup_id_z = 1
283
284; GCN: s_mov_b32 s33, s8
285; GCN-NOT: s6
286; GCN-NOT: s7
287
Geoff Berry87f8d252017-08-16 20:50:01 +0000288; GCN: s_mov_b32 s32, s8
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000289; GCN-NOT: s6
290; GCN-NOT: s7
291
Geoff Berry87f8d252017-08-16 20:50:01 +0000292; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000293; GCN-NOT: s6
294; GCN-NOT: s7
295
296; GCN: s_swappc_b64
297define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
298 call void @use_workgroup_id_xz()
299 ret void
300}
301
302; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
303; GCN: enable_sgpr_workgroup_id_x = 1
304; GCN: enable_sgpr_workgroup_id_y = 1
305; GCN: enable_sgpr_workgroup_id_z = 1
306
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000307; GCN: s_mov_b32 s6, s7
Geoff Berry87f8d252017-08-16 20:50:01 +0000308; GCN: s_mov_b32 s4, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000309; GCN: s_mov_b32 s7, s8
Geoff Berry87f8d252017-08-16 20:50:01 +0000310; GCN: s_mov_b32 s32, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000311; GCN: s_swappc_b64
312define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
313 call void @use_workgroup_id_yz()
314 ret void
315}
316
317; Argument is in right place already
318; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
319; GCN-NOT: s6
320define void @func_indirect_use_workgroup_id_x() #1 {
321 call void @use_workgroup_id_x()
322 ret void
323}
324
325; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
326; GCN-NOT: s6
327define void @func_indirect_use_workgroup_id_y() #1 {
328 call void @use_workgroup_id_y()
329 ret void
330}
331
332; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
333; GCN-NOT: s6
334define void @func_indirect_use_workgroup_id_z() #1 {
335 call void @use_workgroup_id_z()
336 ret void
337}
338
339; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
340; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
341; GCN: ; use s6
342define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
343 %val = call i32 @llvm.amdgcn.workgroup.id.x()
344 store volatile i32 %arg0, i32 addrspace(1)* undef
345 call void asm sideeffect "; use $0", "s"(i32 %val)
346 ret void
347}
348
349; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
350; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
351; GCN: ; use s6
352define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
353 %val = call i32 @llvm.amdgcn.workgroup.id.y()
354 store volatile i32 %arg0, i32 addrspace(1)* undef
355 call void asm sideeffect "; use $0", "s"(i32 %val)
356 ret void
357}
358
359; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
360; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
361; GCN: ; use s6
362define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
363 %val = call i32 @llvm.amdgcn.workgroup.id.z()
364 store volatile i32 %arg0, i32 addrspace(1)* undef
365 call void asm sideeffect "; use $0", "s"(i32 %val)
366 ret void
367}
368
369; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
370; GCN: enable_sgpr_workgroup_id_x = 1
371; GCN: enable_sgpr_workgroup_id_y = 0
372; GCN: enable_sgpr_workgroup_id_z = 0
373
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000374; GCN-DAG: v_mov_b32_e32 v0, 0x22b
375
376; GCN-NOT: s6
Geoff Berry87f8d252017-08-16 20:50:01 +0000377; GCN: s_mov_b32 s4, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000378; GCN-NOT: s6
Geoff Berry87f8d252017-08-16 20:50:01 +0000379; GCN-DAG: s_mov_b32 s32, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000380; GCN: s_swappc_b64
381define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
382 call void @other_arg_use_workgroup_id_x(i32 555)
383 ret void
384}
385
386; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
387; GCN: enable_sgpr_workgroup_id_x = 1
388; GCN: enable_sgpr_workgroup_id_y = 1
389; GCN: enable_sgpr_workgroup_id_z = 0
390
391; GCN-DAG: s_mov_b32 s33, s8
Geoff Berry87f8d252017-08-16 20:50:01 +0000392; GCN-DAG: s_mov_b32 s32, s8
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000393; GCN-DAG: v_mov_b32_e32 v0, 0x22b
394; GCN: s_mov_b32 s4, s33
395; GCN-DAG: s_mov_b32 s6, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000396; GCN: s_swappc_b64
397define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
398 call void @other_arg_use_workgroup_id_y(i32 555)
399 ret void
400}
401
402; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
403; GCN: enable_sgpr_workgroup_id_x = 1
404; GCN: enable_sgpr_workgroup_id_y = 0
405; GCN: enable_sgpr_workgroup_id_z = 1
406
407; GCN: s_mov_b32 s33, s8
Geoff Berry87f8d252017-08-16 20:50:01 +0000408; GCN: s_mov_b32 s32, s8
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000409; GCN-DAG: v_mov_b32_e32 v0, 0x22b
410; GCN: s_mov_b32 s4, s33
411; GCN-DAG: s_mov_b32 s6, s7
412
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000413; GCN: s_swappc_b64
414define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
415 call void @other_arg_use_workgroup_id_z(i32 555)
416 ret void
417}
418
419; GCN-LABEL: {{^}}use_every_sgpr_input:
420; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
421; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
422; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
423; GCN: s_load_dword s{{[0-9]+}}, s[10:11], 0x0
424; GCN: ; use s[12:13]
425; GCN: ; use s14
426; GCN: ; use s15
427; GCN: ; use s16
428define void @use_every_sgpr_input() #1 {
429 %alloca = alloca i32, align 4
430 store volatile i32 0, i32* %alloca
431
432 %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
433 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
434 %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
435
436 %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
437 %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
438 %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
439
440 %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
441 %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
442 %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
443
444 %val3 = call i64 @llvm.amdgcn.dispatch.id()
445 call void asm sideeffect "; use $0", "s"(i64 %val3)
446
447 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
448 call void asm sideeffect "; use $0", "s"(i32 %val4)
449
450 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
451 call void asm sideeffect "; use $0", "s"(i32 %val5)
452
453 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
454 call void asm sideeffect "; use $0", "s"(i32 %val6)
455
456 ret void
457}
458
459; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
460; GCN: enable_sgpr_workgroup_id_x = 1
461; GCN: enable_sgpr_workgroup_id_y = 1
462; GCN: enable_sgpr_workgroup_id_z = 1
463; GCN: enable_sgpr_workgroup_info = 0
464
465; GCN: enable_sgpr_private_segment_buffer = 1
466; GCN: enable_sgpr_dispatch_ptr = 1
467; GCN: enable_sgpr_queue_ptr = 1
468; GCN: enable_sgpr_kernarg_segment_ptr = 1
469; GCN: enable_sgpr_dispatch_id = 1
470; GCN: enable_sgpr_flat_scratch_init = 1
471
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000472; GCN: s_mov_b64 s[12:13], s[10:11]
473; GCN: s_mov_b64 s[10:11], s[8:9]
474; GCN: s_mov_b64 s[8:9], s[6:7]
475; GCN: s_mov_b64 s[6:7], s[4:5]
Geoff Berry87f8d252017-08-16 20:50:01 +0000476; GCN: s_mov_b32 s4, s17
477; GCN: s_mov_b32 s32, s17
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000478; GCN: s_swappc_b64
479define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
480 call void @use_every_sgpr_input()
481 ret void
482}
483
484; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
485; GCN-NOT: s6
486; GCN-NOT: s7
487; GCN-NOT: s8
488; GCN-NOT: s9
489; GCN-NOT: s10
490; GCN-NOT: s11
491; GCN-NOT: s12
492; GCN-NOT: s13
493; GCN-NOT: s[6:7]
494; GCN-NOT: s[8:9]
495; GCN-NOT: s[10:11]
496; GCN-NOT: s[12:13]
497define void @func_indirect_use_every_sgpr_input() #1 {
498 call void @use_every_sgpr_input()
499 ret void
500}
501
502; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
503; GCN-DAG: s_mov_b32 s6, s14
504; GCN-DAG: s_mov_b32 s7, s15
505; GCN-DAG: s_mov_b32 s8, s16
506; GCN: s_swappc_b64
507define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
508 %alloca = alloca i32, align 4
509 store volatile i32 0, i32* %alloca
510
511 %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
512 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
513 %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
514
515 %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
516 %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
517 %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
518
519 %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
520 %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
521 %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
522
523 %val3 = call i64 @llvm.amdgcn.dispatch.id()
524 call void asm sideeffect "; use $0", "s"(i64 %val3)
525
526 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
527 call void asm sideeffect "; use $0", "s"(i32 %val4)
528
529 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
530 call void asm sideeffect "; use $0", "s"(i32 %val5)
531
532 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
533 call void asm sideeffect "; use $0", "s"(i32 %val6)
534
535 call void @use_workgroup_id_xyz()
536 ret void
537}
538
539; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
540; GCN: s_mov_b32 s5, s32
541; GCN: s_add_u32 s32, s32, 0x300
542
Geoff Berry87f8d252017-08-16 20:50:01 +0000543; GCN: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
544; GCN: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
545; GCN: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000546
Geoff Berry87f8d252017-08-16 20:50:01 +0000547; GCN: s_mov_b32 s6, s14
548; GCN: s_mov_b32 s7, s15
549; GCN: s_mov_b32 s8, s16
550
551; GCN: s_mov_b32 [[SAVE_Z:s[0-9]+]], s16
552; GCN: s_mov_b32 [[SAVE_Y:s[0-9]+]], s15
553; GCN: s_mov_b32 [[SAVE_X:s[0-9]+]], s14
554
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000555; GCN: s_swappc_b64
556
557; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
558; GCN: s_load_dword s{{[0-9]+}},
559; GCN: s_load_dword s{{[0-9]+}},
560; GCN: s_load_dword s{{[0-9]+}},
561; GCN: ; use
562; GCN: ; use [[SAVE_X]]
563; GCN: ; use [[SAVE_Y]]
564; GCN: ; use [[SAVE_Z]]
565define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
566 %alloca = alloca i32, align 4
567 call void @use_workgroup_id_xyz()
568
569 store volatile i32 0, i32* %alloca
570
571 %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
572 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
573 %val0 = load volatile i32, i32 addrspace(2)* %dispatch_ptr.bc
574
575 %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
576 %queue_ptr.bc = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
577 %val1 = load volatile i32, i32 addrspace(2)* %queue_ptr.bc
578
579 %kernarg_segment_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
580 %kernarg_segment_ptr.bc = bitcast i8 addrspace(2)* %kernarg_segment_ptr to i32 addrspace(2)*
581 %val2 = load volatile i32, i32 addrspace(2)* %kernarg_segment_ptr.bc
582
583 %val3 = call i64 @llvm.amdgcn.dispatch.id()
584 call void asm sideeffect "; use $0", "s"(i64 %val3)
585
586 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
587 call void asm sideeffect "; use $0", "s"(i32 %val4)
588
589 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
590 call void asm sideeffect "; use $0", "s"(i32 %val5)
591
592 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
593 call void asm sideeffect "; use $0", "s"(i32 %val6)
594
595 ret void
596}
597
598declare i32 @llvm.amdgcn.workgroup.id.x() #0
599declare i32 @llvm.amdgcn.workgroup.id.y() #0
600declare i32 @llvm.amdgcn.workgroup.id.z() #0
601declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
602declare noalias i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #0
603declare i64 @llvm.amdgcn.dispatch.id() #0
604declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
605
606attributes #0 = { nounwind readnone speculatable }
607attributes #1 = { nounwind noinline }