blob: d17179e96bc74601c3d913a60bebe37151c2ff3e [file] [log] [blame]
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
5declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
6
7declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
8declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
9
10declare i32 @llvm.amdgcn.workitem.id.x() #1
11
12; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
13; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
14; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
15define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
16 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
17 store i32 %result, i32 addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
22; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
23; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
24define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
25 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
26 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
27 store i32 %result, i32 addrspace(1)* %out
28 ret void
29}
30
31; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
32; GCN: s_load_dword [[SPTR:s[0-9]+]],
33; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
34; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
35; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
36define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
37 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
38 ret void
39}
40
41; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
42; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
43; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
44define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
45 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
46 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
47 ret void
48}
49
50; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
51; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +000052; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000053define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
54 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
55 store i32 %result, i32 addrspace(1)* %out
56 ret void
57}
58
59; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
60; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +000061; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000062define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
63 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
64 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
65 store i32 %result, i32 addrspace(1)* %out
66 ret void
67}
68
69; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000070; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000071define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
72 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
73 ret void
74}
75
76; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
77; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +000078; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000079define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
80 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
81 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
82 ret void
83}
84
85; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
86; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
87; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
88; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
89define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
90 %id = call i32 @llvm.amdgcn.workitem.id.x()
91 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
92 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
93 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
94 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
95 store i32 %result, i32 addrspace(1)* %out.gep
96 ret void
97}
98
99; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
100; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
101; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
102; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
103define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
104 %id = call i32 @llvm.amdgcn.workitem.id.x()
105 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
106 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
107 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
108 ret void
109}
110
111@lds0 = addrspace(3) global [512 x i32] undef
112
113; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
114; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
115; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
116define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
117 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
118 %idx.0 = add nsw i32 %tid.x, 2
119 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
120 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
121 store i32 %idx.0, i32 addrspace(1)* %add_use
122 store i32 %val0, i32 addrspace(1)* %out
123 ret void
124}
125
126; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
127; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
128; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
129; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
130define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
131 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
132 store i64 %result, i64 addrspace(1)* %out
133 ret void
134}
135
136; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
137; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
138; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
139; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
140define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
141 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
142 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
143 store i64 %result, i64 addrspace(1)* %out
144 ret void
145}
146
147; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
148; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
149; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
150; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
151define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
152 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
153 ret void
154}
155
156; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
157; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
158; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
159; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
160define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
161 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
162 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
163 ret void
164}
165
166; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
167; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
168; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000169; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000170define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
171 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
172 store i64 %result, i64 addrspace(1)* %out
173 ret void
174}
175
176; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
177; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
178; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000179; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000180define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
181 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
182 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
183 store i64 %result, i64 addrspace(1)* %out
184 ret void
185}
186
187; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
188; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
189; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000190; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000191define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
192 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
193 ret void
194}
195
196; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
197; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
198; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000199; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000200define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
201 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
202 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
203 ret void
204}
205
206; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
207; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
208; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
209; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
210; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
211define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
212 %id = call i32 @llvm.amdgcn.workitem.id.x()
213 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
214 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
215 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
216 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
217 store i64 %result, i64 addrspace(1)* %out.gep
218 ret void
219}
220
221; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
222; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
223; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
224; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
225; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
226define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
227 %id = call i32 @llvm.amdgcn.workitem.id.x()
228 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
229 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
230 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
231 ret void
232}
233
234@lds1 = addrspace(3) global [512 x i64] undef, align 8
235
236; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
237; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
238; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
239define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
240 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
241 %idx.0 = add nsw i32 %tid.x, 2
242 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
243 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
244 store i32 %idx.0, i32 addrspace(1)* %add_use
245 store i64 %val0, i64 addrspace(1)* %out
246 ret void
247}
248
249attributes #0 = { nounwind }
250attributes #1 = { nounwind readnone }
251attributes #2 = { nounwind argmemonly }