blob: 79abb96cccf16221d0011a2512c309f0d5c853ba [file] [log] [blame]
Konstantin Zhuravlyov2d22d242018-10-30 22:02:40 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenault8623e8d2017-08-03 23:00:29 +00003
4; GCN-LABEL: {{^}}use_dispatch_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +00005; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
6; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
7; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +00008define void @use_dispatch_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +00009 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
11 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000012 ret void
13}
14
15; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
16; GCN: enable_sgpr_dispatch_ptr = 1
17; GCN: s_mov_b64 s[6:7], s[4:5]
18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
19 call void @use_dispatch_ptr()
20 ret void
21}
22
23; GCN-LABEL: {{^}}use_queue_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000024; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
25; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
26; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000027define void @use_queue_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +000028 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
29 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
30 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000031 ret void
32}
33
34; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
35; GCN: enable_sgpr_queue_ptr = 1
36; GCN: s_mov_b64 s[6:7], s[4:5]
37; GCN: s_swappc_b64
38define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
39 call void @use_queue_ptr()
40 ret void
41}
42
43; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000044; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
Matt Arsenault8623e8d2017-08-03 23:00:29 +000045; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000046; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
47; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
48; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
49; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000050define void @use_queue_ptr_addrspacecast() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000051 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
52 store volatile i32 0, i32* %asc
Matt Arsenault8623e8d2017-08-03 23:00:29 +000053 ret void
54}
55
56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
57; CIVI: enable_sgpr_queue_ptr = 1
58
59; CIVI: s_mov_b64 s[6:7], s[4:5]
60; GFX9-NOT: s_mov_b64
61; GCN: s_swappc_b64
62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
63 call void @use_queue_ptr_addrspacecast()
64 ret void
65}
66
67; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000068; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
69; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
70; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +000071define void @use_kernarg_segment_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +000072 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
73 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
74 %value = load volatile i32, i32 addrspace(4)* %header_ptr
Matt Arsenault8623e8d2017-08-03 23:00:29 +000075 ret void
76}
77
78; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
79; GCN: enable_sgpr_kernarg_segment_ptr = 1
80; GCN: s_mov_b64 s[6:7], s[4:5]
81; GCN: s_swappc_b64
82define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
83 call void @use_kernarg_segment_ptr()
84 ret void
85}
86
87; GCN-LABEL: {{^}}use_dispatch_id:
88; GCN: ; use s[6:7]
89define void @use_dispatch_id() #1 {
90 %id = call i64 @llvm.amdgcn.dispatch.id()
91 call void asm sideeffect "; use $0", "s"(i64 %id)
92 ret void
93}
94
95; No kernarg segment so that there is a mov to check. With kernarg
96; pointer enabled, it happens to end up in the right place anyway.
97
98; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
99; GCN: enable_sgpr_dispatch_id = 1
100
101; GCN: s_mov_b64 s[6:7], s[4:5]
102define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
103 call void @use_dispatch_id()
104 ret void
105}
106
107; GCN-LABEL: {{^}}use_workgroup_id_x:
108; GCN: s_waitcnt
109; GCN: ; use s6
110define void @use_workgroup_id_x() #1 {
111 %val = call i32 @llvm.amdgcn.workgroup.id.x()
112 call void asm sideeffect "; use $0", "s"(i32 %val)
113 ret void
114}
115
116; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
117; GCN: s_waitcnt
118; GCN: s_mov_b32 s5, s32
119; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
120; GCN: ; use s6
121; GCN: s_setpc_b64
122define void @use_stack_workgroup_id_x() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000123 %alloca = alloca i32, addrspace(5)
124 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000125 %val = call i32 @llvm.amdgcn.workgroup.id.x()
126 call void asm sideeffect "; use $0", "s"(i32 %val)
127 ret void
128}
129
130; GCN-LABEL: {{^}}use_workgroup_id_y:
131; GCN: s_waitcnt
132; GCN: ; use s6
133define void @use_workgroup_id_y() #1 {
134 %val = call i32 @llvm.amdgcn.workgroup.id.y()
135 call void asm sideeffect "; use $0", "s"(i32 %val)
136 ret void
137}
138
139; GCN-LABEL: {{^}}use_workgroup_id_z:
140; GCN: s_waitcnt
141; GCN: ; use s6
142define void @use_workgroup_id_z() #1 {
143 %val = call i32 @llvm.amdgcn.workgroup.id.z()
144 call void asm sideeffect "; use $0", "s"(i32 %val)
145 ret void
146}
147
148; GCN-LABEL: {{^}}use_workgroup_id_xy:
149; GCN: ; use s6
150; GCN: ; use s7
151define void @use_workgroup_id_xy() #1 {
152 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
153 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
154 call void asm sideeffect "; use $0", "s"(i32 %val0)
155 call void asm sideeffect "; use $0", "s"(i32 %val1)
156 ret void
157}
158
159; GCN-LABEL: {{^}}use_workgroup_id_xyz:
160; GCN: ; use s6
161; GCN: ; use s7
162; GCN: ; use s8
163define void @use_workgroup_id_xyz() #1 {
164 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
165 %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
166 %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
167 call void asm sideeffect "; use $0", "s"(i32 %val0)
168 call void asm sideeffect "; use $0", "s"(i32 %val1)
169 call void asm sideeffect "; use $0", "s"(i32 %val2)
170 ret void
171}
172
173; GCN-LABEL: {{^}}use_workgroup_id_xz:
174; GCN: ; use s6
175; GCN: ; use s7
176define void @use_workgroup_id_xz() #1 {
177 %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
178 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
179 call void asm sideeffect "; use $0", "s"(i32 %val0)
180 call void asm sideeffect "; use $0", "s"(i32 %val1)
181 ret void
182}
183
184; GCN-LABEL: {{^}}use_workgroup_id_yz:
185; GCN: ; use s6
186; GCN: ; use s7
187define void @use_workgroup_id_yz() #1 {
188 %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
189 %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
190 call void asm sideeffect "; use $0", "s"(i32 %val0)
191 call void asm sideeffect "; use $0", "s"(i32 %val1)
192 ret void
193}
194
195; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
196; GCN: enable_sgpr_workgroup_id_x = 1
197; GCN: enable_sgpr_workgroup_id_y = 0
198; GCN: enable_sgpr_workgroup_id_z = 0
199
200; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000201; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000202; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000203; GCN: s_mov_b32 s4, s33
204; GCN-NOT: s6
205; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000206; GCN: s_swappc_b64
207define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
208 call void @use_workgroup_id_x()
209 ret void
210}
211
212; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
213; GCN: enable_sgpr_workgroup_id_x = 1
214; GCN: enable_sgpr_workgroup_id_y = 1
215; GCN: enable_sgpr_workgroup_id_z = 0
216
217; GCN: s_mov_b32 s33, s8
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000218; GCN-DAG: s_mov_b32 s4, s33
219; GCN-DAG: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000220; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000221; GCN: s_swappc_b64
222define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
223 call void @use_workgroup_id_y()
224 ret void
225}
226
227; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
228; GCN: enable_sgpr_workgroup_id_x = 1
229; GCN: enable_sgpr_workgroup_id_y = 0
230; GCN: enable_sgpr_workgroup_id_z = 1
231
232; GCN: s_mov_b32 s33, s8
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000233; GCN-DAG: s_mov_b32 s4, s33
234; GCN-DAG: s_mov_b32 s6, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000235; GCN: s_swappc_b64
236define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
237 call void @use_workgroup_id_z()
238 ret void
239}
240
241; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
242; GCN: enable_sgpr_workgroup_id_x = 1
243; GCN: enable_sgpr_workgroup_id_y = 1
244; GCN: enable_sgpr_workgroup_id_z = 0
245
246; GCN: s_mov_b32 s33, s8
247; GCN-NOT: s6
248; GCN-NOT: s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000249; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000250; GCN-NOT: s6
251; GCN-NOT: s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000252; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000253; GCN-NOT: s6
254; GCN-NOT: s7
255; GCN: s_swappc_b64
256define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
257 call void @use_workgroup_id_xy()
258 ret void
259}
260
261; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
262; GCN: enable_sgpr_workgroup_id_x = 1
263; GCN: enable_sgpr_workgroup_id_y = 1
264; GCN: enable_sgpr_workgroup_id_z = 1
265
Geoff Berry4e38e022017-08-17 04:04:11 +0000266; GCN: s_mov_b32 s33, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000267
268; GCN-NOT: s6
269; GCN-NOT: s7
270; GCN-NOT: s8
271
Geoff Berry4e38e022017-08-17 04:04:11 +0000272; GCN: s_mov_b32 s4, s33
273
274; GCN-NOT: s6
275; GCN-NOT: s7
276; GCN-NOT: s8
277
278; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000279
280; GCN-NOT: s6
281; GCN-NOT: s7
282; GCN-NOT: s8
283
284; GCN: s_swappc_b64
285define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
286 call void @use_workgroup_id_xyz()
287 ret void
288}
289
290; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
291; GCN: enable_sgpr_workgroup_id_x = 1
292; GCN: enable_sgpr_workgroup_id_y = 0
293; GCN: enable_sgpr_workgroup_id_z = 1
294
295; GCN: s_mov_b32 s33, s8
296; GCN-NOT: s6
297; GCN-NOT: s7
298
Geoff Berry4e38e022017-08-17 04:04:11 +0000299; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000300; GCN-NOT: s6
301; GCN-NOT: s7
302
Geoff Berry4e38e022017-08-17 04:04:11 +0000303; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000304; GCN-NOT: s6
305; GCN-NOT: s7
306
307; GCN: s_swappc_b64
308define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
309 call void @use_workgroup_id_xz()
310 ret void
311}
312
313; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
314; GCN: enable_sgpr_workgroup_id_x = 1
315; GCN: enable_sgpr_workgroup_id_y = 1
316; GCN: enable_sgpr_workgroup_id_z = 1
317
Geoff Berry4e38e022017-08-17 04:04:11 +0000318; GCN: s_mov_b32 s33, s9
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000319; GCN: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000320; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000321; GCN: s_mov_b32 s7, s8
Geoff Berry4e38e022017-08-17 04:04:11 +0000322; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000323; GCN: s_swappc_b64
324define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
325 call void @use_workgroup_id_yz()
326 ret void
327}
328
329; Argument is in right place already
330; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
331; GCN-NOT: s6
332define void @func_indirect_use_workgroup_id_x() #1 {
333 call void @use_workgroup_id_x()
334 ret void
335}
336
337; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
338; GCN-NOT: s6
339define void @func_indirect_use_workgroup_id_y() #1 {
340 call void @use_workgroup_id_y()
341 ret void
342}
343
344; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
345; GCN-NOT: s6
346define void @func_indirect_use_workgroup_id_z() #1 {
347 call void @use_workgroup_id_z()
348 ret void
349}
350
351; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
352; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
353; GCN: ; use s6
354define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
355 %val = call i32 @llvm.amdgcn.workgroup.id.x()
356 store volatile i32 %arg0, i32 addrspace(1)* undef
357 call void asm sideeffect "; use $0", "s"(i32 %val)
358 ret void
359}
360
361; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
362; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
363; GCN: ; use s6
364define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
365 %val = call i32 @llvm.amdgcn.workgroup.id.y()
366 store volatile i32 %arg0, i32 addrspace(1)* undef
367 call void asm sideeffect "; use $0", "s"(i32 %val)
368 ret void
369}
370
371; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
372; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
373; GCN: ; use s6
374define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
375 %val = call i32 @llvm.amdgcn.workgroup.id.z()
376 store volatile i32 %arg0, i32 addrspace(1)* undef
377 call void asm sideeffect "; use $0", "s"(i32 %val)
378 ret void
379}
380
381; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
382; GCN: enable_sgpr_workgroup_id_x = 1
383; GCN: enable_sgpr_workgroup_id_y = 0
384; GCN: enable_sgpr_workgroup_id_z = 0
385
Geoff Berry4e38e022017-08-17 04:04:11 +0000386; GCN-DAG: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000387; GCN-DAG: v_mov_b32_e32 v0, 0x22b
388
389; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000390; GCN: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000391; GCN-NOT: s6
Geoff Berry4e38e022017-08-17 04:04:11 +0000392; GCN-DAG: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000393; GCN: s_swappc_b64
394define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
395 call void @other_arg_use_workgroup_id_x(i32 555)
396 ret void
397}
398
399; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
400; GCN: enable_sgpr_workgroup_id_x = 1
401; GCN: enable_sgpr_workgroup_id_y = 1
402; GCN: enable_sgpr_workgroup_id_z = 0
403
404; GCN-DAG: s_mov_b32 s33, s8
405; GCN-DAG: v_mov_b32_e32 v0, 0x22b
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000406; GCN-DAG: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000407; GCN-DAG: s_mov_b32 s6, s7
Geoff Berry4e38e022017-08-17 04:04:11 +0000408; GCN-DAG: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000409; GCN: s_swappc_b64
410define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
411 call void @other_arg_use_workgroup_id_y(i32 555)
412 ret void
413}
414
415; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
416; GCN: enable_sgpr_workgroup_id_x = 1
417; GCN: enable_sgpr_workgroup_id_y = 0
418; GCN: enable_sgpr_workgroup_id_z = 1
419
420; GCN: s_mov_b32 s33, s8
421; GCN-DAG: v_mov_b32_e32 v0, 0x22b
Jonas Paulssonb51a9bc2018-02-17 10:00:28 +0000422; GCN-DAG: s_mov_b32 s4, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000423; GCN-DAG: s_mov_b32 s6, s7
424
Geoff Berry4e38e022017-08-17 04:04:11 +0000425; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000426; GCN: s_swappc_b64
427define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
428 call void @other_arg_use_workgroup_id_z(i32 555)
429 ret void
430}
431
432; GCN-LABEL: {{^}}use_every_sgpr_input:
433; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000434; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
435; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
436; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
437; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
438; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
439; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
440; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
441; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
442; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000443; GCN: ; use s[12:13]
444; GCN: ; use s14
445; GCN: ; use s15
446; GCN: ; use s16
447define void @use_every_sgpr_input() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000448 %alloca = alloca i32, align 4, addrspace(5)
449 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000450
Yaxun Liu0124b542018-02-13 18:00:25 +0000451 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
452 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
453 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000454
Yaxun Liu0124b542018-02-13 18:00:25 +0000455 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
456 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
457 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000458
Yaxun Liu0124b542018-02-13 18:00:25 +0000459 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
460 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
461 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000462
463 %val3 = call i64 @llvm.amdgcn.dispatch.id()
464 call void asm sideeffect "; use $0", "s"(i64 %val3)
465
466 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
467 call void asm sideeffect "; use $0", "s"(i32 %val4)
468
469 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
470 call void asm sideeffect "; use $0", "s"(i32 %val5)
471
472 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
473 call void asm sideeffect "; use $0", "s"(i32 %val6)
474
475 ret void
476}
477
478; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
479; GCN: enable_sgpr_workgroup_id_x = 1
480; GCN: enable_sgpr_workgroup_id_y = 1
481; GCN: enable_sgpr_workgroup_id_z = 1
482; GCN: enable_sgpr_workgroup_info = 0
483
484; GCN: enable_sgpr_private_segment_buffer = 1
485; GCN: enable_sgpr_dispatch_ptr = 1
486; GCN: enable_sgpr_queue_ptr = 1
487; GCN: enable_sgpr_kernarg_segment_ptr = 1
488; GCN: enable_sgpr_dispatch_id = 1
489; GCN: enable_sgpr_flat_scratch_init = 1
490
Geoff Berry4e38e022017-08-17 04:04:11 +0000491; GCN: s_mov_b32 s33, s17
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000492; GCN: s_mov_b64 s[12:13], s[10:11]
493; GCN: s_mov_b64 s[10:11], s[8:9]
494; GCN: s_mov_b64 s[8:9], s[6:7]
495; GCN: s_mov_b64 s[6:7], s[4:5]
Geoff Berry4e38e022017-08-17 04:04:11 +0000496; GCN: s_mov_b32 s4, s33
497; GCN: s_mov_b32 s32, s33
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000498; GCN: s_swappc_b64
499define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
500 call void @use_every_sgpr_input()
501 ret void
502}
503
504; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
505; GCN-NOT: s6
506; GCN-NOT: s7
507; GCN-NOT: s8
508; GCN-NOT: s9
509; GCN-NOT: s10
510; GCN-NOT: s11
511; GCN-NOT: s12
512; GCN-NOT: s13
513; GCN-NOT: s[6:7]
514; GCN-NOT: s[8:9]
515; GCN-NOT: s[10:11]
516; GCN-NOT: s[12:13]
517define void @func_indirect_use_every_sgpr_input() #1 {
518 call void @use_every_sgpr_input()
519 ret void
520}
521
522; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
523; GCN-DAG: s_mov_b32 s6, s14
524; GCN-DAG: s_mov_b32 s7, s15
525; GCN-DAG: s_mov_b32 s8, s16
526; GCN: s_swappc_b64
527define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000528 %alloca = alloca i32, align 4, addrspace(5)
529 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000530
Yaxun Liu0124b542018-02-13 18:00:25 +0000531 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
532 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
533 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000534
Yaxun Liu0124b542018-02-13 18:00:25 +0000535 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
536 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
537 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000538
Yaxun Liu0124b542018-02-13 18:00:25 +0000539 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
540 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
541 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000542
543 %val3 = call i64 @llvm.amdgcn.dispatch.id()
544 call void asm sideeffect "; use $0", "s"(i64 %val3)
545
546 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
547 call void asm sideeffect "; use $0", "s"(i32 %val4)
548
549 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
550 call void asm sideeffect "; use $0", "s"(i32 %val5)
551
552 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
553 call void asm sideeffect "; use $0", "s"(i32 %val6)
554
555 call void @use_workgroup_id_xyz()
556 ret void
557}
558
559; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
560; GCN: s_mov_b32 s5, s32
Jonas Paulsson611b5332018-10-30 15:04:40 +0000561
562; GCN-DAG: s_add_u32 s32, s32, 0x400
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000563
Geoff Berrya2b90112018-02-27 16:59:10 +0000564; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
565; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
566; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
Geoff Berry4e38e022017-08-17 04:04:11 +0000567; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
568; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
569; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000570
Geoff Berrya2b90112018-02-27 16:59:10 +0000571; GCN-DAG: s_mov_b32 s6, s14
572; GCN-DAG: s_mov_b32 s7, s15
573; GCN-DAG: s_mov_b32 s8, s16
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000574
575; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
576; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
577; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
578
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000579; GCN: s_swappc_b64
580
581; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000582; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]]
583; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]]
584; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
585; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]]
586; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]]
587; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
588; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]]
589; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]]
590; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000591; GCN: ; use
592; GCN: ; use [[SAVE_X]]
593; GCN: ; use [[SAVE_Y]]
594; GCN: ; use [[SAVE_Z]]
595define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000596 %alloca = alloca i32, align 4, addrspace(5)
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000597 call void @use_workgroup_id_xyz()
598
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000599 store volatile i32 0, i32 addrspace(5)* %alloca
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000600
Yaxun Liu0124b542018-02-13 18:00:25 +0000601 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
602 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
603 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000604
Yaxun Liu0124b542018-02-13 18:00:25 +0000605 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
606 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
607 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000608
Yaxun Liu0124b542018-02-13 18:00:25 +0000609 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
610 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
611 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000612
613 %val3 = call i64 @llvm.amdgcn.dispatch.id()
614 call void asm sideeffect "; use $0", "s"(i64 %val3)
615
616 %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
617 call void asm sideeffect "; use $0", "s"(i32 %val4)
618
619 %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
620 call void asm sideeffect "; use $0", "s"(i32 %val5)
621
622 %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
623 call void asm sideeffect "; use $0", "s"(i32 %val6)
624
625 ret void
626}
627
628declare i32 @llvm.amdgcn.workgroup.id.x() #0
629declare i32 @llvm.amdgcn.workgroup.id.y() #0
630declare i32 @llvm.amdgcn.workgroup.id.z() #0
Yaxun Liu0124b542018-02-13 18:00:25 +0000631declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
632declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000633declare i64 @llvm.amdgcn.dispatch.id() #0
Yaxun Liu0124b542018-02-13 18:00:25 +0000634declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000635
636attributes #0 = { nounwind readnone speculatable }
637attributes #1 = { nounwind noinline }