blob: 0a2130c96addc29f76de3dd968e66bdae9ae0244 [file] [log] [blame]
Matt Arsenault99c14522016-04-25 19:27:24 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s
Matt Arsenault592d0682015-12-01 23:04:05 +00002
Matt Arsenault99c14522016-04-25 19:27:24 +00003; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
4; HSA: enable_sgpr_private_segment_buffer = 1
5; HSA: enable_sgpr_dispatch_ptr = 0
6; HSA: enable_sgpr_queue_ptr = 1
Matt Arsenault592d0682015-12-01 23:04:05 +00007
Matt Arsenault99c14522016-04-25 19:27:24 +00008; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
9; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
10
11; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
12; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
13
Matt Arsenault5d8eb252016-09-30 01:50:20 +000014; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
Matt Arsenault99c14522016-04-25 19:27:24 +000015; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
16; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
17; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
18
19; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
20define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
21 %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
22 store volatile i32 7, i32 addrspace(4)* %stof
23 ret void
24}
25
26; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
27; HSA: enable_sgpr_private_segment_buffer = 1
28; HSA: enable_sgpr_dispatch_ptr = 0
29; HSA: enable_sgpr_queue_ptr = 1
30
31; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
32; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
33
34; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
35; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
36
Matt Arsenault5d8eb252016-09-30 01:50:20 +000037; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
Matt Arsenault99c14522016-04-25 19:27:24 +000038; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
39; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
40; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
41
42; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
43define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
44 %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
45 store volatile i32 7, i32 addrspace(4)* %stof
46 ret void
47}
48
49; no-op
50; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
51; HSA: enable_sgpr_queue_ptr = 0
52
53; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
54; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
55; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
56; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
57; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
58define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
59 %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
60 store volatile i32 7, i32 addrspace(4)* %stof
61 ret void
62}
63
64; no-op
65; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
66; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
67; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
68; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
69; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
70define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
71 %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
72 %ld = load volatile i32, i32 addrspace(4)* %stof
73 ret void
74}
75
76; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
77; HSA: enable_sgpr_private_segment_buffer = 1
78; HSA: enable_sgpr_dispatch_ptr = 0
79; HSA: enable_sgpr_queue_ptr = 0
80
81; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +000082; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +000083; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
84; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
85; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
86; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
87define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
88 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
89 store volatile i32 0, i32 addrspace(3)* %ftos
90 ret void
91}
92
93; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
94; HSA: enable_sgpr_private_segment_buffer = 1
95; HSA: enable_sgpr_dispatch_ptr = 0
96; HSA: enable_sgpr_queue_ptr = 0
97
98; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +000099; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000100; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
101; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
102; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
103; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
104define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
105 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
106 store volatile i32 0, i32* %ftos
107 ret void
108}
109
110; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
111; HSA: enable_sgpr_queue_ptr = 0
112
113; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
114; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
115; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
116; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
117; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
118define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
119 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
120 store volatile i32 0, i32 addrspace(1)* %ftos
121 ret void
122}
123
124; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
125; HSA: enable_sgpr_queue_ptr = 0
126
127; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
128; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
129define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
130 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
131 load volatile i32, i32 addrspace(2)* %ftos
132 ret void
133}
134
135; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
136; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
137; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
138; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
139; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
140; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
141define void @cast_0_group_to_flat_addrspacecast() #0 {
142 %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
143 store i32 7, i32 addrspace(4)* %cast
144 ret void
145}
146
147; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
148; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
149; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
150; HSA: ds_write_b32 [[PTR]], [[K]]
151define void @cast_0_flat_to_group_addrspacecast() #0 {
152 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
153 store i32 7, i32 addrspace(3)* %cast
154 ret void
155}
156
157; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
Matt Arsenault99c14522016-04-25 19:27:24 +0000158; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
Tom Stellardcb6ba622016-04-30 00:23:06 +0000159; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000160; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
161; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
162define void @cast_neg1_group_to_flat_addrspacecast() #0 {
163 %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
164 store i32 7, i32 addrspace(4)* %cast
165 ret void
166}
167
168; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
169; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
170; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
171; HSA: ds_write_b32 [[PTR]], [[K]]
172define void @cast_neg1_flat_to_group_addrspacecast() #0 {
173 %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
174 store i32 7, i32 addrspace(3)* %cast
175 ret void
176}
177
178; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
179; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
180; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
181; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
182; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
183; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
184define void @cast_0_private_to_flat_addrspacecast() #0 {
185 %cast = addrspacecast i32* null to i32 addrspace(4)*
186 store i32 7, i32 addrspace(4)* %cast
187 ret void
188}
189
190; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
191; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
192; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
193; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
194define void @cast_0_flat_to_private_addrspacecast() #0 {
195 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
196 store i32 7, i32* %cast
197 ret void
198}
Matt Arsenault592d0682015-12-01 23:04:05 +0000199
200; Disable optimizations in case there are optimizations added that
201; specialize away generic pointer accesses.
202
Matt Arsenault99c14522016-04-25 19:27:24 +0000203; HSA-LABEL: {{^}}branch_use_flat_i32:
204; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
205; HSA: s_endpgm
Matt Arsenault592d0682015-12-01 23:04:05 +0000206define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
207entry:
208 %cmp = icmp ne i32 %c, 0
209 br i1 %cmp, label %local, label %global
210
211local:
212 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
213 br label %end
214
215global:
216 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
217 br label %end
218
219end:
220 %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
221 store i32 %x, i32 addrspace(4)* %fptr, align 4
222; %val = load i32, i32 addrspace(4)* %fptr, align 4
223; store i32 %val, i32 addrspace(1)* %out, align 4
224 ret void
225}
226
Matt Arsenault592d0682015-12-01 23:04:05 +0000227; Check for prologue initializing special SGPRs pointing to scratch.
Matt Arsenault99c14522016-04-25 19:27:24 +0000228; HSA-LABEL: {{^}}store_flat_scratch:
Tom Stellardcb6ba622016-04-30 00:23:06 +0000229; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
230; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
Matt Arsenault99c14522016-04-25 19:27:24 +0000231; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
232; HSA: flat_store_dword
233; HSA: s_barrier
234; HSA: flat_load_dword
Matt Arsenault592d0682015-12-01 23:04:05 +0000235define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
236 %alloca = alloca i32, i32 9, align 4
Matt Arsenault99c14522016-04-25 19:27:24 +0000237 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000238 %pptr = getelementptr i32, i32* %alloca, i32 %x
239 %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
240 store i32 %x, i32 addrspace(4)* %fptr
241 ; Dummy call
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000242 call void @llvm.amdgcn.s.barrier() #1
Matt Arsenault592d0682015-12-01 23:04:05 +0000243 %reload = load i32, i32 addrspace(4)* %fptr, align 4
244 store i32 %reload, i32 addrspace(1)* %out, align 4
245 ret void
246}
247
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000248declare void @llvm.amdgcn.s.barrier() #1
Matt Arsenault99c14522016-04-25 19:27:24 +0000249declare i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000250
251attributes #0 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000252attributes #1 = { nounwind convergent }
Matt Arsenault99c14522016-04-25 19:27:24 +0000253attributes #2 = { nounwind readnone }