blob: 139abbe104260dce13d7e59cebf02f092d114024 [file] [log] [blame]
Matt Arsenaulta2025382017-08-03 23:24:05 +00001; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
Matt Arsenault9166ce82017-07-28 15:52:08 +00004
5; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
6; GCN: enable_sgpr_kernarg_segment_ptr = 1
7
8; HSA-NOENV: kernarg_segment_byte_size = 0
9; HSA-OPENCL: kernarg_segment_byte_size = 32
10; MESA: kernarg_segment_byte_size = 16
11
12; HSA: s_load_dword s0, s[4:5], 0x0
13define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +000014 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
15 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
16 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault9166ce82017-07-28 15:52:08 +000017 ret void
18}
19
20; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
21; GCN: enable_sgpr_kernarg_segment_ptr = 1
22
23; HSA-NOENV: kernarg_segment_byte_size = 112
24; HSA-OPENCL: kernarg_segment_byte_size = 144
25; MESA: kernarg_segment_byte_size = 464
26
27; HSA: s_load_dword s0, s[4:5], 0x1c
28define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
Yaxun Liu0124b542018-02-13 18:00:25 +000029 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
30 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
31 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault9166ce82017-07-28 15:52:08 +000032 ret void
33}
34
Matt Arsenault817c2532017-08-03 23:12:44 +000035; GCN-LABEL: {{^}}func_implicitarg_ptr:
36; GCN: s_waitcnt
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000037; MESA: s_mov_b64 s[8:9], s[6:7]
38; MESA: s_mov_b32 s11, 0xf000
39; MESA: s_mov_b32 s10, -1
40; MESA: buffer_load_dword v0, off, s[8:11], 0
41; HSA: v_mov_b32_e32 v0, s6
42; HSA: v_mov_b32_e32 v1, s7
43; HSA: flat_load_dword v0, v[0:1]
Matt Arsenault817c2532017-08-03 23:12:44 +000044; GCN-NEXT: s_waitcnt
45; GCN-NEXT: s_setpc_b64
46define void @func_implicitarg_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +000047 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
48 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
49 %load = load volatile i32, i32 addrspace(4)* %cast
Matt Arsenault817c2532017-08-03 23:12:44 +000050 ret void
51}
52
53; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
54; GCN: enable_sgpr_kernarg_segment_ptr = 1
55; HSA-NOENV: kernarg_segment_byte_size = 0
56; HSA-OPENCL: kernarg_segment_byte_size = 32
57; MESA: kernarg_segment_byte_size = 16
58; GCN: s_mov_b64 s[6:7], s[4:5]
59; GCN: s_swappc_b64
60define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
61 call void @func_implicitarg_ptr()
62 ret void
63}
64
65; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
66; GCN: enable_sgpr_kernarg_segment_ptr = 1
67; HSA-OPENCL: kernarg_segment_byte_size = 144
68; HSA-NOENV: kernarg_segment_byte_size = 112
69; MESA: kernarg_segment_byte_size = 464
70
71; HSA: s_add_u32 s6, s4, 0x70
72; MESA: s_add_u32 s6, s4, 0x1c0
73
74; GCN: s_addc_u32 s7, s5, 0{{$}}
75; GCN: s_swappc_b64
76define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
77 call void @func_implicitarg_ptr()
78 ret void
79}
80
81; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func:
82; GCN-NOT: s6
83; GCN-NOT: s7
84; GCN-NOT: s[6:7]
85define void @func_call_implicitarg_ptr_func() #1 {
86 call void @func_implicitarg_ptr()
87 ret void
88}
89
90; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr:
91; GCN: s_waitcnt
Alexander Timofeev2e5eece2018-03-05 15:12:21 +000092; MESA: s_mov_b64 s[12:13], s[6:7]
93; MESA: s_mov_b32 s15, 0xf000
94; MESA: s_mov_b32 s14, -1
95; MESA: buffer_load_dword v0, off, s[12:15], 0
96; HSA: v_mov_b32_e32 v0, s6
97; HSA: v_mov_b32_e32 v1, s7
98; HSA: flat_load_dword v0, v[0:1]
99; MESA: s_mov_b32 s10, s14
100; MESA: s_mov_b32 s11, s15
101; MESA: buffer_load_dword v0, off, s[8:11], 0
102; HSA: v_mov_b32_e32 v0, s8
103; HSA: v_mov_b32_e32 v1, s9
104; HSA: flat_load_dword v0, v[0:1]
105
106; GCN: s_waitcnt vmcnt(0)
Matt Arsenault817c2532017-08-03 23:12:44 +0000107define void @func_kernarg_implicitarg_ptr() #1 {
Yaxun Liu0124b542018-02-13 18:00:25 +0000108 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
109 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
110 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
111 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
112 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr
113 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg
Matt Arsenault817c2532017-08-03 23:12:44 +0000114 ret void
115}
116
117; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
118; GCN: s_mov_b64 s[6:7], s[4:5]
119; HSA: s_add_u32 s8, s6, 0x70
120; MESA: s_add_u32 s8, s6, 0x1c0
121; GCN: s_addc_u32 s9, s7, 0
122; GCN: s_swappc_b64
123define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
124 call void @func_kernarg_implicitarg_ptr()
125 ret void
126}
127
Yaxun Liu0124b542018-02-13 18:00:25 +0000128declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
129declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
Matt Arsenault9166ce82017-07-28 15:52:08 +0000130
131attributes #0 = { nounwind noinline }
132attributes #1 = { nounwind noinline }
133attributes #2 = { nounwind readnone speculatable }