blob: 99c2138bbe64e4d890e85f2002b5d242afc1ae69 [file] [log] [blame]
Matt Arsenault18f56be2016-12-22 16:27:11 +00001; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s
3
4;;;==========================================================================;;;
5;; 16-bit integer comparisons
6;;;==========================================================================;;;
7
8; GCN-LABEL: {{^}}i16_eq:
9; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
10; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @i16_eq(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +000012entry:
13 %tid = call i32 @llvm.amdgcn.workitem.id.x()
14 %tid.ext = sext i32 %tid to i64
15 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
16 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
17 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
18 %a = load i16, i16 addrspace(1)* %a.gep
19 %b = load i16, i16 addrspace(1)* %b.gep
20 %tmp0 = icmp eq i16 %a, %b
21 %tmp1 = sext i1 %tmp0 to i32
22 store i32 %tmp1, i32 addrspace(1)* %out.gep
23 ret void
24}
25
26; GCN-LABEL: {{^}}i16_ne:
27; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
28; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000029define amdgpu_kernel void @i16_ne(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +000030entry:
31 %tid = call i32 @llvm.amdgcn.workitem.id.x()
32 %tid.ext = sext i32 %tid to i64
33 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
34 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
35 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
36 %a = load i16, i16 addrspace(1)* %a.gep
37 %b = load i16, i16 addrspace(1)* %b.gep
38 %tmp0 = icmp ne i16 %a, %b
39 %tmp1 = sext i1 %tmp0 to i32
40 store i32 %tmp1, i32 addrspace(1)* %out.gep
41 ret void
42}
43
44; GCN-LABEL: {{^}}i16_ugt:
45; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
46; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @i16_ugt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +000048entry:
49 %tid = call i32 @llvm.amdgcn.workitem.id.x()
50 %tid.ext = sext i32 %tid to i64
51 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
52 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
53 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
54 %a = load i16, i16 addrspace(1)* %a.gep
55 %b = load i16, i16 addrspace(1)* %b.gep
56 %tmp0 = icmp ugt i16 %a, %b
57 %tmp1 = sext i1 %tmp0 to i32
58 store i32 %tmp1, i32 addrspace(1)* %out.gep
59 ret void
60}
61
62; GCN-LABEL: {{^}}i16_uge:
63; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
64; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @i16_uge(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +000066entry:
67 %tid = call i32 @llvm.amdgcn.workitem.id.x()
68 %tid.ext = sext i32 %tid to i64
69 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
70 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
71 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
72 %a = load i16, i16 addrspace(1)* %a.gep
73 %b = load i16, i16 addrspace(1)* %b.gep
74 %tmp0 = icmp uge i16 %a, %b
75 %tmp1 = sext i1 %tmp0 to i32
76 store i32 %tmp1, i32 addrspace(1)* %out.gep
77 ret void
78}
79
80; GCN-LABEL: {{^}}i16_ult:
81; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
82; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @i16_ult(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +000084entry:
85 %tid = call i32 @llvm.amdgcn.workitem.id.x()
86 %tid.ext = sext i32 %tid to i64
87 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
88 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
89 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
90 %a = load i16, i16 addrspace(1)* %a.gep
91 %b = load i16, i16 addrspace(1)* %b.gep
92 %tmp0 = icmp ult i16 %a, %b
93 %tmp1 = sext i1 %tmp0 to i32
94 store i32 %tmp1, i32 addrspace(1)* %out.gep
95 ret void
96}
97
98; GCN-LABEL: {{^}}i16_ule:
99; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
100; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @i16_ule(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000102entry:
103 %tid = call i32 @llvm.amdgcn.workitem.id.x()
104 %tid.ext = sext i32 %tid to i64
105 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
106 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
107 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
108 %a = load i16, i16 addrspace(1)* %a.gep
109 %b = load i16, i16 addrspace(1)* %b.gep
110 %tmp0 = icmp ule i16 %a, %b
111 %tmp1 = sext i1 %tmp0 to i32
112 store i32 %tmp1, i32 addrspace(1)* %out.gep
113 ret void
114
115}
116
117; GCN-LABEL: {{^}}i16_sgt:
118; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
119; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @i16_sgt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000121entry:
122 %tid = call i32 @llvm.amdgcn.workitem.id.x()
123 %tid.ext = sext i32 %tid to i64
124 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
125 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
126 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
127 %a = load i16, i16 addrspace(1)* %a.gep
128 %b = load i16, i16 addrspace(1)* %b.gep
129 %tmp0 = icmp sgt i16 %a, %b
130 %tmp1 = sext i1 %tmp0 to i32
131 store i32 %tmp1, i32 addrspace(1)* %out.gep
132 ret void
133}
134
135; GCN-LABEL: {{^}}i16_sge:
136; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
137; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000138define amdgpu_kernel void @i16_sge(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000139entry:
140 %tid = call i32 @llvm.amdgcn.workitem.id.x()
141 %tid.ext = sext i32 %tid to i64
142 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
143 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
144 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
145 %a = load i16, i16 addrspace(1)* %a.gep
146 %b = load i16, i16 addrspace(1)* %b.gep
147 %tmp0 = icmp sge i16 %a, %b
148 %tmp1 = sext i1 %tmp0 to i32
149 store i32 %tmp1, i32 addrspace(1)* %out.gep
150 ret void
151}
152
153; GCN-LABEL: {{^}}i16_slt:
154; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
155; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000156define amdgpu_kernel void @i16_slt(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000157entry:
158 %tid = call i32 @llvm.amdgcn.workitem.id.x()
159 %tid.ext = sext i32 %tid to i64
160 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
161 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
162 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
163 %a = load i16, i16 addrspace(1)* %a.gep
164 %b = load i16, i16 addrspace(1)* %b.gep
165 %tmp0 = icmp slt i16 %a, %b
166 %tmp1 = sext i1 %tmp0 to i32
167 store i32 %tmp1, i32 addrspace(1)* %out.gep
168 ret void
169}
170
171; GCN-LABEL: {{^}}i16_sle:
172; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
173; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000174define amdgpu_kernel void @i16_sle(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000175entry:
176 %tid = call i32 @llvm.amdgcn.workitem.id.x()
177 %tid.ext = sext i32 %tid to i64
178 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
179 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr, i64 %tid.ext
180 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
181 %a = load i16, i16 addrspace(1)* %a.gep
182 %b = load i16, i16 addrspace(1)* %b.gep
183 %tmp0 = icmp sle i16 %a, %b
184 %tmp1 = sext i1 %tmp0 to i32
185 store i32 %tmp1, i32 addrspace(1)* %out.gep
186 ret void
187}
188
189; These should be commuted to reduce code size
190; GCN-LABEL: {{^}}i16_eq_v_s:
191; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
192; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @i16_eq_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000194entry:
195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
196 %tid.ext = sext i32 %tid to i64
197 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
198 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
199 %a = load i16, i16 addrspace(1)* %a.gep
200 %tmp0 = icmp eq i16 %a, %b
201 %tmp1 = sext i1 %tmp0 to i32
202 store i32 %tmp1, i32 addrspace(1)* %out.gep
203 ret void
204}
205
206; GCN-LABEL: {{^}}i16_ne_v_s:
207; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
208; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000209define amdgpu_kernel void @i16_ne_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000210entry:
211 %tid = call i32 @llvm.amdgcn.workitem.id.x()
212 %tid.ext = sext i32 %tid to i64
213 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
214 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
215 %a = load i16, i16 addrspace(1)* %a.gep
216 %tmp0 = icmp ne i16 %a, %b
217 %tmp1 = sext i1 %tmp0 to i32
218 store i32 %tmp1, i32 addrspace(1)* %out.gep
219 ret void
220}
221
222; GCN-LABEL: {{^}}i16_ugt_v_s:
223; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
224; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000225define amdgpu_kernel void @i16_ugt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000226entry:
227 %tid = call i32 @llvm.amdgcn.workitem.id.x()
228 %tid.ext = sext i32 %tid to i64
229 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
230 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
231 %a = load i16, i16 addrspace(1)* %a.gep
232 %tmp0 = icmp ugt i16 %a, %b
233 %tmp1 = sext i1 %tmp0 to i32
234 store i32 %tmp1, i32 addrspace(1)* %out.gep
235 ret void
236}
237
238; GCN-LABEL: {{^}}i16_uge_v_s:
239; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
240; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000241define amdgpu_kernel void @i16_uge_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000242entry:
243 %tid = call i32 @llvm.amdgcn.workitem.id.x()
244 %tid.ext = sext i32 %tid to i64
245 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
246 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
247 %a = load i16, i16 addrspace(1)* %a.gep
248 %tmp0 = icmp uge i16 %a, %b
249 %tmp1 = sext i1 %tmp0 to i32
250 store i32 %tmp1, i32 addrspace(1)* %out.gep
251 ret void
252}
253
254; GCN-LABEL: {{^}}i16_ult_v_s:
255; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
256; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000257define amdgpu_kernel void @i16_ult_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000258entry:
259 %tid = call i32 @llvm.amdgcn.workitem.id.x()
260 %tid.ext = sext i32 %tid to i64
261 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
262 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
263 %a = load i16, i16 addrspace(1)* %a.gep
264 %tmp0 = icmp ult i16 %a, %b
265 %tmp1 = sext i1 %tmp0 to i32
266 store i32 %tmp1, i32 addrspace(1)* %out.gep
267 ret void
268}
269
270; GCN-LABEL: {{^}}i16_ule_v_s:
271; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
272; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000273define amdgpu_kernel void @i16_ule_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000274entry:
275 %tid = call i32 @llvm.amdgcn.workitem.id.x()
276 %tid.ext = sext i32 %tid to i64
277 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
278 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
279 %a = load i16, i16 addrspace(1)* %a.gep
280 %tmp0 = icmp ule i16 %a, %b
281 %tmp1 = sext i1 %tmp0 to i32
282 store i32 %tmp1, i32 addrspace(1)* %out.gep
283 ret void
284}
285
286; GCN-LABEL: {{^}}i16_sgt_v_s:
287; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
288; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000289define amdgpu_kernel void @i16_sgt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000290entry:
291 %tid = call i32 @llvm.amdgcn.workitem.id.x()
292 %tid.ext = sext i32 %tid to i64
293 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
294 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
295 %a = load i16, i16 addrspace(1)* %a.gep
296 %tmp0 = icmp sgt i16 %a, %b
297 %tmp1 = sext i1 %tmp0 to i32
298 store i32 %tmp1, i32 addrspace(1)* %out.gep
299 ret void
300}
301
302; GCN-LABEL: {{^}}i16_sge_v_s:
303; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
304; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000305define amdgpu_kernel void @i16_sge_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000306entry:
307 %tid = call i32 @llvm.amdgcn.workitem.id.x()
308 %tid.ext = sext i32 %tid to i64
309 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
310 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
311 %a = load i16, i16 addrspace(1)* %a.gep
312 %tmp0 = icmp sge i16 %a, %b
313 %tmp1 = sext i1 %tmp0 to i32
314 store i32 %tmp1, i32 addrspace(1)* %out.gep
315 ret void
316}
317
318; GCN-LABEL: {{^}}i16_slt_v_s:
319; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
320; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000321define amdgpu_kernel void @i16_slt_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000322entry:
323 %tid = call i32 @llvm.amdgcn.workitem.id.x()
324 %tid.ext = sext i32 %tid to i64
325 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
326 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
327 %a = load i16, i16 addrspace(1)* %a.gep
328 %tmp0 = icmp slt i16 %a, %b
329 %tmp1 = sext i1 %tmp0 to i32
330 store i32 %tmp1, i32 addrspace(1)* %out.gep
331 ret void
332}
333
334; GCN-LABEL: {{^}}i16_sle_v_s:
335; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
336; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000337define amdgpu_kernel void @i16_sle_v_s(i32 addrspace(1)* %out, i16 addrspace(1)* %a.ptr, i16 %b) #0 {
Matt Arsenault18f56be2016-12-22 16:27:11 +0000338entry:
339 %tid = call i32 @llvm.amdgcn.workitem.id.x()
340 %tid.ext = sext i32 %tid to i64
341 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i64 %tid.ext
342 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
343 %a = load i16, i16 addrspace(1)* %a.gep
344 %tmp0 = icmp sle i16 %a, %b
345 %tmp1 = sext i1 %tmp0 to i32
346 store i32 %tmp1, i32 addrspace(1)* %out.gep
347 ret void
348}
349
350declare i32 @llvm.amdgcn.workitem.id.x() #1
351
352attributes #0 = { nounwind }
353attributes #1 = { nounwind readnone }