blob: b1e71722d80c5c2193ac41b2d2115c367364b449 [file] [log] [blame]
Matt Arsenault0774ea22017-04-24 19:40:59 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
Matt Arsenault592d0682015-12-01 23:04:05 +00003
Matt Arsenault99c14522016-04-25 19:27:24 +00004; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
5; HSA: enable_sgpr_private_segment_buffer = 1
6; HSA: enable_sgpr_dispatch_ptr = 0
Matt Arsenaulte823d922017-02-18 18:29:53 +00007; CI: enable_sgpr_queue_ptr = 1
8; GFX9: enable_sgpr_queue_ptr = 0
Matt Arsenault592d0682015-12-01 23:04:05 +00009
Matt Arsenaulte823d922017-02-18 18:29:53 +000010; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
11; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
12; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
Matt Arsenault99c14522016-04-25 19:27:24 +000013
Matt Arsenaulte823d922017-02-18 18:29:53 +000014; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
Konstantin Zhuravlyov4b3847e2017-04-06 23:02:33 +000015; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
16; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
17; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]]
18
19; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
Matt Arsenaulte823d922017-02-18 18:29:53 +000020
Matt Arsenault99c14522016-04-25 19:27:24 +000021; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
22
Matt Arsenault5d8eb252016-09-30 01:50:20 +000023; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
Matt Arsenault99c14522016-04-25 19:27:24 +000024; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
25; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
26; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
27
28; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
Matt Arsenaulte823d922017-02-18 18:29:53 +000029
30; At most 2 digits. Make sure src_shared_base is not counted as a high
31; number SGPR.
32
33; CI: NumSgprs: {{[0-9][0-9]+}}
34; GFX9: NumSgprs: {{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +000036 %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
37 store volatile i32 7, i32 addrspace(4)* %stof
38 ret void
39}
40
41; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
42; HSA: enable_sgpr_private_segment_buffer = 1
43; HSA: enable_sgpr_dispatch_ptr = 0
Matt Arsenaulte823d922017-02-18 18:29:53 +000044; CI: enable_sgpr_queue_ptr = 1
45; GFX9: enable_sgpr_queue_ptr = 0
Matt Arsenault99c14522016-04-25 19:27:24 +000046
Matt Arsenaulte823d922017-02-18 18:29:53 +000047; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
48; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
49; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
Matt Arsenault99c14522016-04-25 19:27:24 +000050
Matt Arsenaulte823d922017-02-18 18:29:53 +000051; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
Konstantin Zhuravlyov4b3847e2017-04-06 23:02:33 +000052; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(15, 0, 16)
53; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16
54; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]]
55
56; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
Matt Arsenaulte823d922017-02-18 18:29:53 +000057
Matt Arsenault99c14522016-04-25 19:27:24 +000058; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
59
Matt Arsenault971c85e2017-03-13 19:47:31 +000060; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0
Matt Arsenault99c14522016-04-25 19:27:24 +000061; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
62; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
63; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
64
65; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
Matt Arsenaulte823d922017-02-18 18:29:53 +000066
67; CI: NumSgprs: {{[0-9][0-9]+}}
68; GFX9: NumSgprs: {{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000069define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +000070 %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
71 store volatile i32 7, i32 addrspace(4)* %stof
72 ret void
73}
74
75; no-op
76; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
77; HSA: enable_sgpr_queue_ptr = 0
78
79; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
80; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
81; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
82; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
83; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000084define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +000085 %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
86 store volatile i32 7, i32 addrspace(4)* %stof
87 ret void
88}
89
90; no-op
91; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
92; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
93; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
94; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
95; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000096define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +000097 %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
98 %ld = load volatile i32, i32 addrspace(4)* %stof
99 ret void
100}
101
102; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
103; HSA: enable_sgpr_private_segment_buffer = 1
104; HSA: enable_sgpr_dispatch_ptr = 0
105; HSA: enable_sgpr_queue_ptr = 0
106
107; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000108; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000109; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
110; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
111; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
112; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000114 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
115 store volatile i32 0, i32 addrspace(3)* %ftos
116 ret void
117}
118
119; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
120; HSA: enable_sgpr_private_segment_buffer = 1
121; HSA: enable_sgpr_dispatch_ptr = 0
122; HSA: enable_sgpr_queue_ptr = 0
123
124; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000125; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000126; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
Matt Arsenault971c85e2017-03-13 19:47:31 +0000127; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
Matt Arsenault99c14522016-04-25 19:27:24 +0000128; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
129; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000130define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000131 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
132 store volatile i32 0, i32* %ftos
133 ret void
134}
135
136; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
137; HSA: enable_sgpr_queue_ptr = 0
138
139; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
140; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
141; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
142; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
143; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000144define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000145 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
146 store volatile i32 0, i32 addrspace(1)* %ftos
147 ret void
148}
149
150; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
151; HSA: enable_sgpr_queue_ptr = 0
152
153; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
154; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000156 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
157 load volatile i32, i32 addrspace(2)* %ftos
158 ret void
159}
160
161; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000162; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
163; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
Konstantin Zhuravlyov4b3847e2017-04-06 23:02:33 +0000164; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16)
165; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
166; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
167
168; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
Matt Arsenaulte823d922017-02-18 18:29:53 +0000169
Matt Arsenault99c14522016-04-25 19:27:24 +0000170; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
171; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
172; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000173define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000174 %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000175 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000176 ret void
177}
178
179; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
180; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
181; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
182; HSA: ds_write_b32 [[PTR]], [[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000183define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000184 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000185 store volatile i32 7, i32 addrspace(3)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000186 ret void
187}
188
189; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
Matt Arsenault99c14522016-04-25 19:27:24 +0000190; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
Tom Stellardcb6ba622016-04-30 00:23:06 +0000191; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000192; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
193; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000195 %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000196 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000197 ret void
198}
199
200; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
201; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
202; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
203; HSA: ds_write_b32 [[PTR]], [[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000204define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000205 %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000206 store volatile i32 7, i32 addrspace(3)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000207 ret void
208}
209
Matt Arsenault971c85e2017-03-13 19:47:31 +0000210; FIXME: Shouldn't need to enable queue ptr
Matt Arsenault99c14522016-04-25 19:27:24 +0000211; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
Matt Arsenault971c85e2017-03-13 19:47:31 +0000212; CI: enable_sgpr_queue_ptr = 1
213; GFX9: enable_sgpr_queue_ptr = 0
Matt Arsenaulte823d922017-02-18 18:29:53 +0000214
Matt Arsenault99c14522016-04-25 19:27:24 +0000215; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
216; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
Matt Arsenault971c85e2017-03-13 19:47:31 +0000217; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
Matt Arsenault99c14522016-04-25 19:27:24 +0000218; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000219define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000220 %cast = addrspacecast i32* null to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000221 store volatile i32 7, i32 addrspace(4)* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000222 ret void
223}
224
225; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
Matt Arsenault0774ea22017-04-24 19:40:59 +0000226; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
227; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000228define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
Matt Arsenault99c14522016-04-25 19:27:24 +0000229 %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000230 store volatile i32 7, i32* %cast
Matt Arsenault99c14522016-04-25 19:27:24 +0000231 ret void
232}
Matt Arsenault592d0682015-12-01 23:04:05 +0000233
234; Disable optimizations in case there are optimizations added that
235; specialize away generic pointer accesses.
236
Matt Arsenault99c14522016-04-25 19:27:24 +0000237; HSA-LABEL: {{^}}branch_use_flat_i32:
238; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
239; HSA: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000240define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
Matt Arsenault592d0682015-12-01 23:04:05 +0000241entry:
242 %cmp = icmp ne i32 %c, 0
243 br i1 %cmp, label %local, label %global
244
245local:
246 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
247 br label %end
248
249global:
250 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
251 br label %end
252
253end:
254 %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
Matt Arsenault417e0072017-02-08 06:16:04 +0000255 store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
Matt Arsenault592d0682015-12-01 23:04:05 +0000256; %val = load i32, i32 addrspace(4)* %fptr, align 4
257; store i32 %val, i32 addrspace(1)* %out, align 4
258 ret void
259}
260
Matt Arsenault592d0682015-12-01 23:04:05 +0000261; Check for prologue initializing special SGPRs pointing to scratch.
Matt Arsenault99c14522016-04-25 19:27:24 +0000262; HSA-LABEL: {{^}}store_flat_scratch:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000263; CI-DAG: s_mov_b32 flat_scratch_lo, s9
264; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
265; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
266
267; GFX9: s_add_u32 flat_scratch_lo, s6, s9
268; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
269
Matt Arsenault99c14522016-04-25 19:27:24 +0000270; HSA: flat_store_dword
271; HSA: s_barrier
272; HSA: flat_load_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000273define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
Matt Arsenault592d0682015-12-01 23:04:05 +0000274 %alloca = alloca i32, i32 9, align 4
Matt Arsenault99c14522016-04-25 19:27:24 +0000275 %x = call i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000276 %pptr = getelementptr i32, i32* %alloca, i32 %x
277 %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
Matt Arsenault417e0072017-02-08 06:16:04 +0000278 store volatile i32 %x, i32 addrspace(4)* %fptr
Matt Arsenault592d0682015-12-01 23:04:05 +0000279 ; Dummy call
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000280 call void @llvm.amdgcn.s.barrier() #1
Matt Arsenault417e0072017-02-08 06:16:04 +0000281 %reload = load volatile i32, i32 addrspace(4)* %fptr, align 4
282 store volatile i32 %reload, i32 addrspace(1)* %out, align 4
Matt Arsenault592d0682015-12-01 23:04:05 +0000283 ret void
284}
285
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000286declare void @llvm.amdgcn.s.barrier() #1
Matt Arsenault99c14522016-04-25 19:27:24 +0000287declare i32 @llvm.amdgcn.workitem.id.x() #2
Matt Arsenault592d0682015-12-01 23:04:05 +0000288
289attributes #0 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000290attributes #1 = { nounwind convergent }
Matt Arsenault99c14522016-04-25 19:27:24 +0000291attributes #2 = { nounwind readnone }