blob: f166d08935d5be947fd42ca6544b580b41c742a2 [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
Matt Arsenault8623e8d2017-08-03 23:00:29 +00002
3; GCN-LABEL: {{^}}use_workitem_id_x:
4; GCN: s_waitcnt
5; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
6; GCN-NEXT: s_waitcnt
7; GCN-NEXT: s_setpc_b64
8define void @use_workitem_id_x() #1 {
9 %val = call i32 @llvm.amdgcn.workitem.id.x()
10 store volatile i32 %val, i32 addrspace(1)* undef
11 ret void
12}
13
14; GCN-LABEL: {{^}}use_workitem_id_y:
15; GCN: s_waitcnt
16; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
17; GCN-NEXT: s_waitcnt
18; GCN-NEXT: s_setpc_b64
19define void @use_workitem_id_y() #1 {
20 %val = call i32 @llvm.amdgcn.workitem.id.y()
21 store volatile i32 %val, i32 addrspace(1)* undef
22 ret void
23}
24
25; GCN-LABEL: {{^}}use_workitem_id_z:
26; GCN: s_waitcnt
27; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
28; GCN-NEXT: s_waitcnt
29; GCN-NEXT: s_setpc_b64
30define void @use_workitem_id_z() #1 {
31 %val = call i32 @llvm.amdgcn.workitem.id.z()
32 store volatile i32 %val, i32 addrspace(1)* undef
33 ret void
34}
35
36; GCN-LABEL: {{^}}use_workitem_id_xy:
37; GCN: s_waitcnt
38; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
39; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
40; GCN-NEXT: s_waitcnt
41; GCN-NEXT: s_setpc_b64
42define void @use_workitem_id_xy() #1 {
43 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
44 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
45 store volatile i32 %val0, i32 addrspace(1)* undef
46 store volatile i32 %val1, i32 addrspace(1)* undef
47 ret void
48}
49
50; GCN-LABEL: {{^}}use_workitem_id_xyz:
51; GCN: s_waitcnt
52; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
53; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
54; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2
55; GCN-NEXT: s_waitcnt
56; GCN-NEXT: s_setpc_b64
57define void @use_workitem_id_xyz() #1 {
58 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
59 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
60 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
61 store volatile i32 %val0, i32 addrspace(1)* undef
62 store volatile i32 %val1, i32 addrspace(1)* undef
63 store volatile i32 %val2, i32 addrspace(1)* undef
64 ret void
65}
66
67; GCN-LABEL: {{^}}use_workitem_id_xz:
68; GCN: s_waitcnt
69; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
70; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
71; GCN-NEXT: s_waitcnt
72; GCN-NEXT: s_setpc_b64
73define void @use_workitem_id_xz() #1 {
74 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
75 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
76 store volatile i32 %val0, i32 addrspace(1)* undef
77 store volatile i32 %val1, i32 addrspace(1)* undef
78 ret void
79}
80
81; GCN-LABEL: {{^}}use_workitem_id_yz:
82; GCN: s_waitcnt
83; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
84; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
85; GCN-NEXT: s_waitcnt
86; GCN-NEXT: s_setpc_b64
87define void @use_workitem_id_yz() #1 {
88 %val0 = call i32 @llvm.amdgcn.workitem.id.y()
89 %val1 = call i32 @llvm.amdgcn.workitem.id.z()
90 store volatile i32 %val0, i32 addrspace(1)* undef
91 store volatile i32 %val1, i32 addrspace(1)* undef
92 ret void
93}
94
95; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
96; GCN: enable_vgpr_workitem_id = 0
97
98; GCN-NOT: v0
99; GCN: s_swappc_b64
100; GCN-NOT: v0
101define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
102 call void @use_workitem_id_x()
103 ret void
104}
105
106; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
107; GCN: enable_vgpr_workitem_id = 1
108
109; GCN-NOT: v0
110; GCN-NOT: v1
111; GCN: v_mov_b32_e32 v0, v1
112; GCN-NOT: v0
113; GCN-NOT: v1
114; GCN: s_swappc_b64
115define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
116 call void @use_workitem_id_y()
117 ret void
118}
119
120; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
121; GCN: enable_vgpr_workitem_id = 2
122
123; GCN-NOT: v0
124; GCN-NOT: v2
125; GCN: v_mov_b32_e32 v0, v2
126; GCN-NOT: v0
127; GCN-NOT: v2
128; GCN: s_swappc_b64
129define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
130 call void @use_workitem_id_z()
131 ret void
132}
133
134; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
135; GCN-NOT: v0
136; GCN: s_swappc_b64
137; GCN-NOT: v0
138define void @func_indirect_use_workitem_id_x() #1 {
139 call void @use_workitem_id_x()
140 ret void
141}
142
143; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
144; GCN-NOT: v0
145; GCN: s_swappc_b64
146; GCN-NOT: v0
147define void @func_indirect_use_workitem_id_y() #1 {
148 call void @use_workitem_id_y()
149 ret void
150}
151
152; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
153; GCN-NOT: v0
154; GCN: s_swappc_b64
155; GCN-NOT: v0
156define void @func_indirect_use_workitem_id_z() #1 {
157 call void @use_workitem_id_z()
158 ret void
159}
160
161; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
162; GCN: s_waitcnt
163; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
164; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
165define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
166 %val = call i32 @llvm.amdgcn.workitem.id.x()
167 store volatile i32 %arg0, i32 addrspace(1)* undef
168 store volatile i32 %val, i32 addrspace(1)* undef
169 ret void
170}
171
172; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
173; GCN: s_waitcnt
174; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
175; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
176define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
177 %val = call i32 @llvm.amdgcn.workitem.id.y()
178 store volatile i32 %arg0, i32 addrspace(1)* undef
179 store volatile i32 %val, i32 addrspace(1)* undef
180 ret void
181}
182
183; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
184; GCN: s_waitcnt
185; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
186; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
187define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
188 %val = call i32 @llvm.amdgcn.workitem.id.z()
189 store volatile i32 %arg0, i32 addrspace(1)* undef
190 store volatile i32 %val, i32 addrspace(1)* undef
191 ret void
192}
193
194
195; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
196; GCN: enable_vgpr_workitem_id = 0
197
198; GCN: v_mov_b32_e32 v1, v0
199; GCN: v_mov_b32_e32 v0, 0x22b
200; GCN: s_swappc_b64
201define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
202 call void @other_arg_use_workitem_id_x(i32 555)
203 ret void
204}
205
206
207; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
208; GCN: enable_vgpr_workitem_id = 1
209
210; GCN-NOT: v1
211; GCN: v_mov_b32_e32 v0, 0x22b
212; GCN-NOT: v1
213; GCN: s_swappc_b64
214; GCN-NOT: v0
215define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
216 call void @other_arg_use_workitem_id_y(i32 555)
217 ret void
218}
219
220; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
221; GCN: enable_vgpr_workitem_id = 2
222
223; GCN: v_mov_b32_e32 v0, 0x22b
224; GCN: v_mov_b32_e32 v1, v2
225; GCN: s_swappc_b64
226; GCN-NOT: v0
227define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
228 call void @other_arg_use_workitem_id_z(i32 555)
229 ret void
230}
231
232; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
233; GCN: s_mov_b32 s5, s32
234; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
235; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
236; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
237
238; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
239; GCN-NEXT: s_waitcnt
240; GCN-NEXT: s_setpc_b64
241define void @too_many_args_use_workitem_id_x(
242 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
243 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
244 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
245 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
246 %val = call i32 @llvm.amdgcn.workitem.id.x()
247 store volatile i32 %val, i32 addrspace(1)* undef
248
249 store volatile i32 %arg0, i32 addrspace(1)* undef
250 store volatile i32 %arg1, i32 addrspace(1)* undef
251 store volatile i32 %arg2, i32 addrspace(1)* undef
252 store volatile i32 %arg3, i32 addrspace(1)* undef
253 store volatile i32 %arg4, i32 addrspace(1)* undef
254 store volatile i32 %arg5, i32 addrspace(1)* undef
255 store volatile i32 %arg6, i32 addrspace(1)* undef
256 store volatile i32 %arg7, i32 addrspace(1)* undef
257
258 store volatile i32 %arg8, i32 addrspace(1)* undef
259 store volatile i32 %arg9, i32 addrspace(1)* undef
260 store volatile i32 %arg10, i32 addrspace(1)* undef
261 store volatile i32 %arg11, i32 addrspace(1)* undef
262 store volatile i32 %arg12, i32 addrspace(1)* undef
263 store volatile i32 %arg13, i32 addrspace(1)* undef
264 store volatile i32 %arg14, i32 addrspace(1)* undef
265 store volatile i32 %arg15, i32 addrspace(1)* undef
266
267 store volatile i32 %arg16, i32 addrspace(1)* undef
268 store volatile i32 %arg17, i32 addrspace(1)* undef
269 store volatile i32 %arg18, i32 addrspace(1)* undef
270 store volatile i32 %arg19, i32 addrspace(1)* undef
271 store volatile i32 %arg20, i32 addrspace(1)* undef
272 store volatile i32 %arg21, i32 addrspace(1)* undef
273 store volatile i32 %arg22, i32 addrspace(1)* undef
274 store volatile i32 %arg23, i32 addrspace(1)* undef
275
276 store volatile i32 %arg24, i32 addrspace(1)* undef
277 store volatile i32 %arg25, i32 addrspace(1)* undef
278 store volatile i32 %arg26, i32 addrspace(1)* undef
279 store volatile i32 %arg27, i32 addrspace(1)* undef
280 store volatile i32 %arg28, i32 addrspace(1)* undef
281 store volatile i32 %arg29, i32 addrspace(1)* undef
282 store volatile i32 %arg30, i32 addrspace(1)* undef
283 store volatile i32 %arg31, i32 addrspace(1)* undef
284
285 ret void
286}
287
288; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
289; GCN: enable_vgpr_workitem_id = 0
290
Geoff Berry87f8d252017-08-16 20:50:01 +0000291; GCN: s_mov_b32 s32, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000292; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000293; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
294; GCN: s_mov_b32 s4, s33
295; GCN: s_swappc_b64
296define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
297 call void @too_many_args_use_workitem_id_x(
298 i32 10, i32 20, i32 30, i32 40,
299 i32 50, i32 60, i32 70, i32 80,
300 i32 90, i32 100, i32 110, i32 120,
301 i32 130, i32 140, i32 150, i32 160,
302 i32 170, i32 180, i32 190, i32 200,
303 i32 210, i32 220, i32 230, i32 240,
304 i32 250, i32 260, i32 270, i32 280,
305 i32 290, i32 300, i32 310, i32 320)
306 ret void
307}
308
309; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
310; GCN: s_mov_b32 s5, s32
311; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:8
312; GCN: s_swappc_b64
313define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
314 store volatile i32 %arg0, i32 addrspace(1)* undef
315 call void @too_many_args_use_workitem_id_x(
316 i32 10, i32 20, i32 30, i32 40,
317 i32 50, i32 60, i32 70, i32 80,
318 i32 90, i32 100, i32 110, i32 120,
319 i32 130, i32 140, i32 150, i32 160,
320 i32 170, i32 180, i32 190, i32 200,
321 i32 210, i32 220, i32 230, i32 240,
322 i32 250, i32 260, i32 270, i32 280,
323 i32 290, i32 300, i32 310, i32 320)
324 ret void
325}
326
327; Requires loading and storing to stack slot.
328; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
329; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
330; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
331; GCN: s_add_u32 s32, s32, 0x400{{$}}
332
333; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8{{$}}
334
335; GCN: s_swappc_b64
336
337; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
338; GCN: s_sub_u32 s32, s32, 0x400{{$}}
339; GCN: s_setpc_b64
340define void @too_many_args_call_too_many_args_use_workitem_id_x(
341 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
342 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
343 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
344 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
345 call void @too_many_args_use_workitem_id_x(
346 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
347 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
348 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
349 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
350 ret void
351}
352
353; stack layout:
354; frame[0] = emergency stack slot
355; frame[1] = byval arg32
356; frame[2] = stack passed workitem ID x
357; frame[3] = VGPR spill slot
358
359; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
360; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
361; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
362; GCN-NEXT: s_waitcnt
363; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
364; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4
365; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
366; GCN: s_setpc_b64
367define void @too_many_args_use_workitem_id_x_byval(
368 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
369 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
370 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
371 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32* byval %arg32) #1 {
372 %val = call i32 @llvm.amdgcn.workitem.id.x()
373 store volatile i32 %val, i32 addrspace(1)* undef
374
375 store volatile i32 %arg0, i32 addrspace(1)* undef
376 store volatile i32 %arg1, i32 addrspace(1)* undef
377 store volatile i32 %arg2, i32 addrspace(1)* undef
378 store volatile i32 %arg3, i32 addrspace(1)* undef
379 store volatile i32 %arg4, i32 addrspace(1)* undef
380 store volatile i32 %arg5, i32 addrspace(1)* undef
381 store volatile i32 %arg6, i32 addrspace(1)* undef
382 store volatile i32 %arg7, i32 addrspace(1)* undef
383
384 store volatile i32 %arg8, i32 addrspace(1)* undef
385 store volatile i32 %arg9, i32 addrspace(1)* undef
386 store volatile i32 %arg10, i32 addrspace(1)* undef
387 store volatile i32 %arg11, i32 addrspace(1)* undef
388 store volatile i32 %arg12, i32 addrspace(1)* undef
389 store volatile i32 %arg13, i32 addrspace(1)* undef
390 store volatile i32 %arg14, i32 addrspace(1)* undef
391 store volatile i32 %arg15, i32 addrspace(1)* undef
392
393 store volatile i32 %arg16, i32 addrspace(1)* undef
394 store volatile i32 %arg17, i32 addrspace(1)* undef
395 store volatile i32 %arg18, i32 addrspace(1)* undef
396 store volatile i32 %arg19, i32 addrspace(1)* undef
397 store volatile i32 %arg20, i32 addrspace(1)* undef
398 store volatile i32 %arg21, i32 addrspace(1)* undef
399 store volatile i32 %arg22, i32 addrspace(1)* undef
400 store volatile i32 %arg23, i32 addrspace(1)* undef
401
402 store volatile i32 %arg24, i32 addrspace(1)* undef
403 store volatile i32 %arg25, i32 addrspace(1)* undef
404 store volatile i32 %arg26, i32 addrspace(1)* undef
405 store volatile i32 %arg27, i32 addrspace(1)* undef
406 store volatile i32 %arg28, i32 addrspace(1)* undef
407 store volatile i32 %arg29, i32 addrspace(1)* undef
408 store volatile i32 %arg30, i32 addrspace(1)* undef
409 store volatile i32 %arg31, i32 addrspace(1)* undef
410 %private = load volatile i32, i32* %arg32
411 ret void
412}
413
414; frame[0] = emergency stack slot
415; frame[1] =
416
417; sp[0] = callee emergency stack slot reservation
418; sp[1] = byval
419; sp[2] = ??
420; sp[3] = stack passed workitem ID x
421
422; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
423; GCN: enable_vgpr_workitem_id = 0
424
Geoff Berry87f8d252017-08-16 20:50:01 +0000425; GCN: s_add_u32 s32, s7, 0x200{{$}}
426; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
427; GCN: s_add_u32 s32, s32, 0x100{{$}}
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000428
Geoff Berry87f8d252017-08-16 20:50:01 +0000429
430; GCN: buffer_store_dword [[K]], off, s[0:3], s7 offset:4
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000431; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
Geoff Berry87f8d252017-08-16 20:50:01 +0000432; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s7 offset:4
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000433
Geoff Berry87f8d252017-08-16 20:50:01 +0000434; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000435; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
436; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
437; GCN: s_swappc_b64
438define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
439 %alloca = alloca i32, align 4
440 store volatile i32 999, i32* %alloca
441 call void @too_many_args_use_workitem_id_x_byval(
442 i32 10, i32 20, i32 30, i32 40,
443 i32 50, i32 60, i32 70, i32 80,
444 i32 90, i32 100, i32 110, i32 120,
445 i32 130, i32 140, i32 150, i32 160,
446 i32 170, i32 180, i32 190, i32 200,
447 i32 210, i32 220, i32 230, i32 240,
448 i32 250, i32 260, i32 270, i32 280,
449 i32 290, i32 300, i32 310, i32 320,
450 i32* %alloca)
451 ret void
452}
453
454; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
455; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
456; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4
457; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
458
459; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4
460; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
461; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
462; GCN: s_swappc_b64
463define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
464 %alloca = alloca i32, align 4
465 store volatile i32 999, i32* %alloca
466 call void @too_many_args_use_workitem_id_x_byval(
467 i32 10, i32 20, i32 30, i32 40,
468 i32 50, i32 60, i32 70, i32 80,
469 i32 90, i32 100, i32 110, i32 120,
470 i32 130, i32 140, i32 150, i32 160,
471 i32 170, i32 180, i32 190, i32 200,
472 i32 210, i32 220, i32 230, i32 240,
473 i32 250, i32 260, i32 270, i32 280,
474 i32 290, i32 300, i32 310, i32 320,
475 i32* %alloca)
476 ret void
477}
478
479; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
480; GCN: s_mov_b32 s5, s32
481; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
482; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
483; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
484; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}}
485; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
486; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}}
487; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
488
489; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
490; GCN-NEXT: s_waitcnt
491; GCN-NEXT: s_setpc_b64
492define void @too_many_args_use_workitem_id_xyz(
493 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
494 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
495 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
496 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
497 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
498 store volatile i32 %val0, i32 addrspace(1)* undef
499 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
500 store volatile i32 %val1, i32 addrspace(1)* undef
501 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
502 store volatile i32 %val2, i32 addrspace(1)* undef
503
504 store volatile i32 %arg0, i32 addrspace(1)* undef
505 store volatile i32 %arg1, i32 addrspace(1)* undef
506 store volatile i32 %arg2, i32 addrspace(1)* undef
507 store volatile i32 %arg3, i32 addrspace(1)* undef
508 store volatile i32 %arg4, i32 addrspace(1)* undef
509 store volatile i32 %arg5, i32 addrspace(1)* undef
510 store volatile i32 %arg6, i32 addrspace(1)* undef
511 store volatile i32 %arg7, i32 addrspace(1)* undef
512
513 store volatile i32 %arg8, i32 addrspace(1)* undef
514 store volatile i32 %arg9, i32 addrspace(1)* undef
515 store volatile i32 %arg10, i32 addrspace(1)* undef
516 store volatile i32 %arg11, i32 addrspace(1)* undef
517 store volatile i32 %arg12, i32 addrspace(1)* undef
518 store volatile i32 %arg13, i32 addrspace(1)* undef
519 store volatile i32 %arg14, i32 addrspace(1)* undef
520 store volatile i32 %arg15, i32 addrspace(1)* undef
521
522 store volatile i32 %arg16, i32 addrspace(1)* undef
523 store volatile i32 %arg17, i32 addrspace(1)* undef
524 store volatile i32 %arg18, i32 addrspace(1)* undef
525 store volatile i32 %arg19, i32 addrspace(1)* undef
526 store volatile i32 %arg20, i32 addrspace(1)* undef
527 store volatile i32 %arg21, i32 addrspace(1)* undef
528 store volatile i32 %arg22, i32 addrspace(1)* undef
529 store volatile i32 %arg23, i32 addrspace(1)* undef
530
531 store volatile i32 %arg24, i32 addrspace(1)* undef
532 store volatile i32 %arg25, i32 addrspace(1)* undef
533 store volatile i32 %arg26, i32 addrspace(1)* undef
534 store volatile i32 %arg27, i32 addrspace(1)* undef
535 store volatile i32 %arg28, i32 addrspace(1)* undef
536 store volatile i32 %arg29, i32 addrspace(1)* undef
537 store volatile i32 %arg30, i32 addrspace(1)* undef
538 store volatile i32 %arg31, i32 addrspace(1)* undef
539
540 ret void
541}
542
543; frame[0] = kernel emergency stack slot
544; frame[1] = callee emergency stack slot
545; frame[2] = ID X
546; frame[3] = ID Y
547; frame[4] = ID Z
548
549; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
550; GCN: enable_vgpr_workitem_id = 2
551
Geoff Berry87f8d252017-08-16 20:50:01 +0000552; GCN: s_mov_b32 s32, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000553; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000554
555; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:8
556; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:12
557; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:16
558; GCN: s_swappc_b64
559define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
560 call void @too_many_args_use_workitem_id_xyz(
561 i32 10, i32 20, i32 30, i32 40,
562 i32 50, i32 60, i32 70, i32 80,
563 i32 90, i32 100, i32 110, i32 120,
564 i32 130, i32 140, i32 150, i32 160,
565 i32 170, i32 180, i32 190, i32 200,
566 i32 210, i32 220, i32 230, i32 240,
567 i32 250, i32 260, i32 270, i32 280,
568 i32 290, i32 300, i32 310, i32 320)
569 ret void
570}
571
572; workitem ID X in register, yz on stack
573; v31 = workitem ID X
574; frame[0] = emergency slot
575; frame[1] = workitem Y
576; frame[2] = workitem Z
577
578; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
579; GCN: s_mov_b32 s5, s32
580; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
581; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}}
582; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
583; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}}
584; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
585
586; GCN: s_waitcnt
587; GCN-NEXT: s_setpc_b64
588; GCN: ScratchSize: 12
589define void @too_many_args_use_workitem_id_x_stack_yz(
590 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
591 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
592 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
593 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
594 %val0 = call i32 @llvm.amdgcn.workitem.id.x()
595 store volatile i32 %val0, i32 addrspace(1)* undef
596 %val1 = call i32 @llvm.amdgcn.workitem.id.y()
597 store volatile i32 %val1, i32 addrspace(1)* undef
598 %val2 = call i32 @llvm.amdgcn.workitem.id.z()
599 store volatile i32 %val2, i32 addrspace(1)* undef
600
601 store volatile i32 %arg0, i32 addrspace(1)* undef
602 store volatile i32 %arg1, i32 addrspace(1)* undef
603 store volatile i32 %arg2, i32 addrspace(1)* undef
604 store volatile i32 %arg3, i32 addrspace(1)* undef
605 store volatile i32 %arg4, i32 addrspace(1)* undef
606 store volatile i32 %arg5, i32 addrspace(1)* undef
607 store volatile i32 %arg6, i32 addrspace(1)* undef
608 store volatile i32 %arg7, i32 addrspace(1)* undef
609
610 store volatile i32 %arg8, i32 addrspace(1)* undef
611 store volatile i32 %arg9, i32 addrspace(1)* undef
612 store volatile i32 %arg10, i32 addrspace(1)* undef
613 store volatile i32 %arg11, i32 addrspace(1)* undef
614 store volatile i32 %arg12, i32 addrspace(1)* undef
615 store volatile i32 %arg13, i32 addrspace(1)* undef
616 store volatile i32 %arg14, i32 addrspace(1)* undef
617 store volatile i32 %arg15, i32 addrspace(1)* undef
618
619 store volatile i32 %arg16, i32 addrspace(1)* undef
620 store volatile i32 %arg17, i32 addrspace(1)* undef
621 store volatile i32 %arg18, i32 addrspace(1)* undef
622 store volatile i32 %arg19, i32 addrspace(1)* undef
623 store volatile i32 %arg20, i32 addrspace(1)* undef
624 store volatile i32 %arg21, i32 addrspace(1)* undef
625 store volatile i32 %arg22, i32 addrspace(1)* undef
626 store volatile i32 %arg23, i32 addrspace(1)* undef
627
628 store volatile i32 %arg24, i32 addrspace(1)* undef
629 store volatile i32 %arg25, i32 addrspace(1)* undef
630 store volatile i32 %arg26, i32 addrspace(1)* undef
631 store volatile i32 %arg27, i32 addrspace(1)* undef
632 store volatile i32 %arg28, i32 addrspace(1)* undef
633 store volatile i32 %arg29, i32 addrspace(1)* undef
634 store volatile i32 %arg30, i32 addrspace(1)* undef
635
636 ret void
637}
638
639; frame[0] = kernel emergency stack slot
640; frame[1] = callee emergency stack slot
641; frame[2] = ID Y
642; frame[3] = ID Z
643
644; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
645; GCN: enable_vgpr_workitem_id = 2
646
Geoff Berry87f8d252017-08-16 20:50:01 +0000647; GCN: s_mov_b32 s32, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000648; GCN: s_mov_b32 s33, s7
Matt Arsenault8623e8d2017-08-03 23:00:29 +0000649
650; GCN-DAG: v_mov_b32_e32 v31, v0
651; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
652; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
653; GCN: s_swappc_b64
654define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
655 call void @too_many_args_use_workitem_id_x_stack_yz(
656 i32 10, i32 20, i32 30, i32 40,
657 i32 50, i32 60, i32 70, i32 80,
658 i32 90, i32 100, i32 110, i32 120,
659 i32 130, i32 140, i32 150, i32 160,
660 i32 170, i32 180, i32 190, i32 200,
661 i32 210, i32 220, i32 230, i32 240,
662 i32 250, i32 260, i32 270, i32 280,
663 i32 290, i32 300, i32 310)
664 ret void
665}
666
667declare i32 @llvm.amdgcn.workitem.id.x() #0
668declare i32 @llvm.amdgcn.workitem.id.y() #0
669declare i32 @llvm.amdgcn.workitem.id.z() #0
670
671attributes #0 = { nounwind readnone speculatable }
672attributes #1 = { nounwind noinline }