blob: 405534ea4b5736ea914602c0a8360d2cb84e5266 [file] [log] [blame]
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +00003
Matt Arsenault79f837c2017-03-30 22:21:40 +00004declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
5declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
6declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +00007
Matt Arsenault79f837c2017-03-30 22:21:40 +00008declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
9declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
10declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000011
12declare i32 @llvm.amdgcn.workitem.id.x() #1
13
Matt Arsenault79f837c2017-03-30 22:21:40 +000014; Make sure no crash on invalid non-constant
15; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
16define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
17 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
18 store i32 %result, i32 addrspace(1)* %out
19 ret void
20}
21
22; Make sure no crash on invalid non-constant
23; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
24define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
25 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
26 store i32 %result, i32 addrspace(1)* %out
27 ret void
28}
29
30; Make sure no crash on invalid non-constant
31; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
32define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
33 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
34 store i32 %result, i32 addrspace(1)* %out
35 ret void
36}
37
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000038; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
39; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
40; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000041define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +000042 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000043 store i32 %result, i32 addrspace(1)* %out
44 ret void
45}
46
47; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
48; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
49; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000050define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000051 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +000052 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000053 store i32 %result, i32 addrspace(1)* %out
54 ret void
55}
56
57; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
58; GCN: s_load_dword [[SPTR:s[0-9]+]],
59; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
60; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
61; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +000063 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000064 ret void
65}
66
67; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
68; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
69; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000070define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000071 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +000072 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000073 ret void
74}
75
76; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
77; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +000078; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000079define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +000080 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000081 store i32 %result, i32 addrspace(1)* %out
82 ret void
83}
84
85; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
86; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +000087; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000089 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +000090 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000091 store i32 %result, i32 addrspace(1)* %out
92 ret void
93}
94
95; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000096; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +000098 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +000099 ret void
100}
101
102; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
103; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000104; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000106 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000107 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000108 ret void
109}
110
111; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
112; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
113; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
114; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000115define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000116 %id = call i32 @llvm.amdgcn.workitem.id.x()
117 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
118 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
119 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000120 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000121 store i32 %result, i32 addrspace(1)* %out.gep
122 ret void
123}
124
125; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
126; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
127; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
128; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000129define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000130 %id = call i32 @llvm.amdgcn.workitem.id.x()
131 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
132 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000133 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000134 ret void
135}
136
Matt Arsenault7757c592016-06-09 23:42:54 +0000137; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
138; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
139; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000141 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000142 store i32 %result, i32 addrspace(4)* %out
143 ret void
144}
145
146; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
147; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
148; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000150 %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000151 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000152 store i32 %result, i32 addrspace(4)* %out
153 ret void
154}
155
156; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32:
157; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000159 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000160 ret void
161}
162
163; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
164; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
165; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
Matt Arsenault7757c592016-06-09 23:42:54 +0000167 %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000168 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000169 ret void
170}
171
172; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
173; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
174; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000176 %id = call i32 @llvm.amdgcn.workitem.id.x()
177 %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
178 %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
179 %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000180 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000181 store i32 %result, i32 addrspace(4)* %out.gep
182 ret void
183}
184
185; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
186; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
187; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000188define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000189 %id = call i32 @llvm.amdgcn.workitem.id.x()
190 %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
191 %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000192 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000193 ret void
194}
195
196; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
197; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
198; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
199; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000200define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000201 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000202 store i64 %result, i64 addrspace(4)* %out
203 ret void
204}
205
206; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
207; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
208; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
209; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000210define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000211 %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000212 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000213 store i64 %result, i64 addrspace(4)* %out
214 ret void
215}
216
217; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64:
218; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
219; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
220; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000221define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000222 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000223 ret void
224}
225
226; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
227; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
228; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
229; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000230define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
Matt Arsenault7757c592016-06-09 23:42:54 +0000231 %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000232 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000233 ret void
234}
235
236; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
237; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
238; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
239; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000240define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000241 %id = call i32 @llvm.amdgcn.workitem.id.x()
242 %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
243 %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
244 %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000245 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000246 store i64 %result, i64 addrspace(4)* %out.gep
247 ret void
248}
249
250; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
251; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
252; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
253; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000254define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
Matt Arsenault7757c592016-06-09 23:42:54 +0000255 %id = call i32 @llvm.amdgcn.workitem.id.x()
256 %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
257 %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000258 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenault7757c592016-06-09 23:42:54 +0000259 ret void
260}
261
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000262@lds0 = addrspace(3) global [512 x i32] undef
263
264; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
265; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
266; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000267define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000268 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
269 %idx.0 = add nsw i32 %tid.x, 2
270 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
Matt Arsenault79f837c2017-03-30 22:21:40 +0000271 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000272 store i32 %idx.0, i32 addrspace(1)* %add_use
273 store i32 %val0, i32 addrspace(1)* %out
274 ret void
275}
276
277; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
278; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
279; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
280; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000281define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000282 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000283 store i64 %result, i64 addrspace(1)* %out
284 ret void
285}
286
287; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
288; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
289; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
290; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000291define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000292 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000293 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000294 store i64 %result, i64 addrspace(1)* %out
295 ret void
296}
297
298; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
299; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
300; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
301; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000303 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000304 ret void
305}
306
307; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
308; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
309; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
310; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000311define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000312 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000313 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000314 ret void
315}
316
317; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
318; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
319; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000320; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000321define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000322 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000323 store i64 %result, i64 addrspace(1)* %out
324 ret void
325}
326
327; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
328; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
329; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000330; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000331define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000332 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000333 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000334 store i64 %result, i64 addrspace(1)* %out
335 ret void
336}
337
338; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
339; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
340; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000341; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000342define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
Matt Arsenault79f837c2017-03-30 22:21:40 +0000343 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000344 ret void
345}
346
347; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
348; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
349; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
Nikolay Haustov4f672a32016-04-29 09:02:30 +0000350; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000351define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000352 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
Matt Arsenault79f837c2017-03-30 22:21:40 +0000353 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000354 ret void
355}
356
357; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
358; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
359; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
360; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
361; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000362define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000363 %id = call i32 @llvm.amdgcn.workitem.id.x()
364 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
365 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
366 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000367 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000368 store i64 %result, i64 addrspace(1)* %out.gep
369 ret void
370}
371
372; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
373; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
374; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
375; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
376; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000377define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000378 %id = call i32 @llvm.amdgcn.workitem.id.x()
379 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
380 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
Matt Arsenault79f837c2017-03-30 22:21:40 +0000381 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000382 ret void
383}
384
385@lds1 = addrspace(3) global [512 x i64] undef, align 8
386
387; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
388; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
389; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000390define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000391 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
392 %idx.0 = add nsw i32 %tid.x, 2
393 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
Matt Arsenault79f837c2017-03-30 22:21:40 +0000394 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
Matt Arsenaulta9dbdca2016-04-12 14:05:04 +0000395 store i32 %idx.0, i32 addrspace(1)* %add_use
396 store i64 %val0, i64 addrspace(1)* %out
397 ret void
398}
399
400attributes #0 = { nounwind }
401attributes #1 = { nounwind readnone }
402attributes #2 = { nounwind argmemonly }
Matt Arsenault7757c592016-06-09 23:42:54 +0000403
404
405
406
407
408
409
410
411