blob: 12dcda959867d5283e416b2013911d743fa47f00 [file] [log] [blame]
Matt Arsenaulte823d922017-02-18 18:29:53 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
Matt Arsenault592d0682015-12-01 23:04:05 +00003
Matt Arsenault99c14522016-04-25 19:27:24 +00004; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
5; HSA: enable_sgpr_private_segment_buffer = 1
6; HSA: enable_sgpr_dispatch_ptr = 0
Matt Arsenaulte823d922017-02-18 18:29:53 +00007; CI: enable_sgpr_queue_ptr = 1
8; GFX9: enable_sgpr_queue_ptr = 0
Matt Arsenault592d0682015-12-01 23:04:05 +00009
Matt Arsenaulte823d922017-02-18 18:29:53 +000010; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
11; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
12; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
Matt Arsenault99c14522016-04-25 19:27:24 +000013
Matt Arsenaulte823d922017-02-18 18:29:53 +000014; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
15; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
16
Matt Arsenault99c14522016-04-25 19:27:24 +000017; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
18
Matt Arsenault5d8eb252016-09-30 01:50:20 +000019; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
Matt Arsenault99c14522016-04-25 19:27:24 +000020; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
21; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
22; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
23
24; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
Matt Arsenaulte823d922017-02-18 18:29:53 +000025
26; At most 2 digits. Make sure src_shared_base is not counted as a high
27; number SGPR.
28
29; CI: NumSgprs: {{[0-9][0-9]+}}
30; GFX9: NumSgprs: {{[0-9]+}}
Matt Arsenault99c14522016-04-25 19:27:24 +000031define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
32 %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
33 store volatile i32 7, i32 addrspace(4)* %stof
34 ret void
35}
36
37; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
38; HSA: enable_sgpr_private_segment_buffer = 1
39; HSA: enable_sgpr_dispatch_ptr = 0
Matt Arsenaulte823d922017-02-18 18:29:53 +000040; CI: enable_sgpr_queue_ptr = 1
41; GFX9: enable_sgpr_queue_ptr = 0
Matt Arsenault99c14522016-04-25 19:27:24 +000042
Matt Arsenaulte823d922017-02-18 18:29:53 +000043; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
44; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
45; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
Matt Arsenault99c14522016-04-25 19:27:24 +000046
Matt Arsenaulte823d922017-02-18 18:29:53 +000047; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
48; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
49
Matt Arsenault99c14522016-04-25 19:27:24 +000050; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
51
Matt Arsenault5d8eb252016-09-30 01:50:20 +000052; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
Matt Arsenault99c14522016-04-25 19:27:24 +000053; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
54; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
55; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
56
57; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
Matt Arsenaulte823d922017-02-18 18:29:53 +000058
59; CI: NumSgprs: {{[0-9][0-9]+}}
60; GFX9: NumSgprs: {{[0-9]+}}
Matt Arsenault99c14522016-04-25 19:27:24 +000061define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
62 %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
63 store volatile i32 7, i32 addrspace(4)* %stof
64 ret void
65}
66
67; no-op
68; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
69; HSA: enable_sgpr_queue_ptr = 0
70
71; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
72; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
73; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
74; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
75; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
76define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
77 %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
78 store volatile i32 7, i32 addrspace(4)* %stof
79 ret void
80}
81
82; no-op
83; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
84; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
85; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
86; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
87; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
88define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
89 %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
90 %ld = load volatile i32, i32 addrspace(4)* %stof
91 ret void
92}
93
94; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
95; HSA: enable_sgpr_private_segment_buffer = 1
96; HSA: enable_sgpr_dispatch_ptr = 0
97; HSA: enable_sgpr_queue_ptr = 0
98
99; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000100; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000101; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
102; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
103; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
104; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
105define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
106 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
107 store volatile i32 0, i32 addrspace(3)* %ftos
108 ret void
109}
110
111; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
112; HSA: enable_sgpr_private_segment_buffer = 1
113; HSA: enable_sgpr_dispatch_ptr = 0
114; HSA: enable_sgpr_queue_ptr = 0
115
116; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000117; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000118; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
119; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
120; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
121; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
122define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
123 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
124 store volatile i32 0, i32* %ftos
125 ret void
126}
127
128; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
129; HSA: enable_sgpr_queue_ptr = 0
130
131; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
132; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
133; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
134; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
135; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
136define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
137 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
138 store volatile i32 0, i32 addrspace(1)* %ftos
139 ret void
140}
141
142; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
143; HSA: enable_sgpr_queue_ptr = 0
144
145; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
146; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
147define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
148 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
149 load volatile i32, i32 addrspace(2)* %ftos
150 ret void
151}
152
153; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000154; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
155; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
156; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
157
Matt Arsenault99c14522016-04-25 19:27:24 +0000158; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
159; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
160; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
161define void @cast_0_group_to_flat_addrspacecast() #0 {
162 %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000163 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000164 ret void
165}
166
167; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
168; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
169; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
170; HSA: ds_write_b32 [[PTR]], [[K]]
171define void @cast_0_flat_to_group_addrspacecast() #0 {
172 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000173 store volatile i32 7, i32 addrspace(3)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000174 ret void
175}
176
177; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
Matt Arsenault99c14522016-04-25 19:27:24 +0000178; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
Tom Stellardcb6ba622016-04-30 00:23:06 +0000179; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000180; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
181; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
182define void @cast_neg1_group_to_flat_addrspacecast() #0 {
183 %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000184 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000185 ret void
186}
187
188; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
189; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
190; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
191; HSA: ds_write_b32 [[PTR]], [[K]]
192define void @cast_neg1_flat_to_group_addrspacecast() #0 {
193 %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000194 store volatile i32 7, i32 addrspace(3)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000195 ret void
196}
197
198; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000199; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
200; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
201
202; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base
203
Matt Arsenault99c14522016-04-25 19:27:24 +0000204; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
205; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
206; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
207define void @cast_0_private_to_flat_addrspacecast() #0 {
208 %cast = addrspacecast i32* null to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000209 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000210 ret void
211}
212
213; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
214; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
215; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
216; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
217define void @cast_0_flat_to_private_addrspacecast() #0 {
218 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000219 store volatile i32 7, i32* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000220 ret void
221}
Matt Arsenault592d0682015-12-01 23:04:05 +0000222
223; Disable optimizations in case there are optimizations added that
224; specialize away generic pointer accesses.
225
Matt Arsenault99c14522016-04-25 19:27:24 +0000226; HSA-LABEL: {{^}}branch_use_flat_i32:
227; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
228; HSA: s_endpgm
Matt Arsenault592d0682015-12-01 23:04:05 +0000229define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
230entry:
231 %cmp = icmp ne i32 %c, 0
232 br i1 %cmp, label %local, label %global
233
234local:
235 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
236 br label %end
237
238global:
239 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
240 br label %end
241
242end:
243 %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
Matt Arsenault417e0072017-02-08 06:16:04 +0000244 store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
Matt Arsenault592d0682015-12-01 23:04:05 +0000245; %val = load i32, i32 addrspace(4)* %fptr, align 4
246; store i32 %val, i32 addrspace(1)* %out, align 4
247 ret void
248}
249
Matt Arsenault592d0682015-12-01 23:04:05 +0000250; Check for prologue initializing special SGPRs pointing to scratch.
Matt Arsenault99c14522016-04-25 19:27:24 +0000251; HSA-LABEL: {{^}}store_flat_scratch:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000252; CI-DAG: s_mov_b32 flat_scratch_lo, s9
253; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
254; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
255
256; GFX9: s_add_u32 flat_scratch_lo, s6, s9
257; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
258
Matt Arsenault99c14522016-04-25 19:27:24 +0000259; HSA: flat_store_dword
260; HSA: s_barrier
261; HSA: flat_load_dword
Matt Arsenault592d0682015-12-01 23:04:05 +0000262define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
263 %alloca = alloca i32, i32 9, align 4
Matt Arsenault99c14522016-04-25 19:27:24 +0000264 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000265 %pptr = getelementptr i32, i32* %alloca, i32 %x
266 %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000267 store volatile i32 %x, i32 addrspace(4)* %fptr
Matt Arsenault592d0682015-12-01 23:04:05 +0000268 ; Dummy call
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000269 call void @llvm.amdgcn.s.barrier() #1
Matt Arsenault417e0072017-02-08 06:16:04 +0000270 %reload = load volatile i32, i32 addrspace(4)* %fptr, align 4
271 store volatile i32 %reload, i32 addrspace(1)* %out, align 4
Matt Arsenault592d0682015-12-01 23:04:05 +0000272 ret void
273}
274
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000275declare void @llvm.amdgcn.s.barrier() #1
Matt Arsenault99c14522016-04-25 19:27:24 +0000276declare i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000277
278attributes #0 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000279attributes #1 = { nounwind convergent }
Matt Arsenault99c14522016-04-25 19:27:24 +0000280attributes #2 = { nounwind readnone }