blob: b2d62f5e13a31732c33099375e9139ed2603ed08 [file] [log] [blame]
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +00005
Matt Arsenaultfabab4b2015-12-11 23:16:47 +00006; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +00007; GCN: v_min_i32_e32
Jan Vesely2da0cba2016-06-09 16:04:00 +00008
9; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000010define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000011 %tid = call i32 @llvm.r600.read.tidig.x()
12 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
13 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
14 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
15 %a = load i32, i32 addrspace(1)* %a.gep, align 4
16 %b = load i32, i32 addrspace(1)* %b.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +000017 %cmp = icmp sle i32 %a, %b
18 %val = select i1 %cmp, i32 %a, i32 %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +000019 store i32 %val, i32 addrspace(1)* %out.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +000020 ret void
21}
22
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000023; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +000024; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +000025
26; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000027define amdgpu_kernel void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +000028 %cmp = icmp sle i32 %a, %b
29 %val = select i1 %cmp, i32 %a, i32 %b
30 store i32 %val, i32 addrspace(1)* %out, align 4
31 ret void
32}
33
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000034; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +000035; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +000036
37; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000038define amdgpu_kernel void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000039 %cmp = icmp sle <1 x i32> %a, %b
40 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
41 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
42 ret void
43}
44
45; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +000046; GCN: s_min_i32
47; GCN: s_min_i32
48; GCN: s_min_i32
49; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +000050
51; EG: MIN_INT
52; EG: MIN_INT
53; EG: MIN_INT
54; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000056 %cmp = icmp sle <4 x i32> %a, %b
57 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
58 store <4 x i32> %val, <4 x i32> addrspace(1)* %out
59 ret void
60}
61
62; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
Matt Arsenault6dca5422017-01-09 18:52:39 +000063; GCN: s_load_dword
64; GCN: s_load_dword
65; GCN: s_sext_i32_i8
66; GCN: s_sext_i32_i8
67; GCN: s_min_i32
Matt Arsenault8c4a3522018-06-26 19:10:00 +000068define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32], i8 %a, [8 x i32], i8 %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000069 %cmp = icmp sle i8 %a, %b
70 %val = select i1 %cmp, i8 %a, i8 %b
71 store i8 %val, i8 addrspace(1)* %out
72 ret void
73}
74
Matt Arsenault8c4a3522018-06-26 19:10:00 +000075; FIXME: Why vector and sdwa for last element?
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000076; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
Matt Arsenault90083d32018-06-07 09:54:49 +000077; GCN: s_load_dword s
78; GCN: s_load_dword s
79; GCN-NOT: _load_
Matt Arsenault10a50922015-12-19 01:39:48 +000080
Matt Arsenault90083d32018-06-07 09:54:49 +000081; SI: s_min_i32
82; SI: s_min_i32
83; SI: s_min_i32
84; SI: s_min_i32
Matt Arsenault10a50922015-12-19 01:39:48 +000085
Matt Arsenault90083d32018-06-07 09:54:49 +000086; VI: s_min_i32
87; VI: s_min_i32
88; VI: s_min_i32
Matt Arsenault8c4a3522018-06-26 19:10:00 +000089; VI: v_min_i32_sdwa
Matt Arsenault6dca5422017-01-09 18:52:39 +000090
Matt Arsenaulteb522e62017-02-27 22:15:25 +000091; GFX9: v_min_i16
92; GFX9: v_min_i16
93; GFX9: v_min_i16
94; GFX9: v_min_i16
95
Jan Vesely2da0cba2016-06-09 16:04:00 +000096; EG: MIN_INT
97; EG: MIN_INT
98; EG: MIN_INT
99; EG: MIN_INT
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000100define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8 x i32], <4 x i8> %a, [8 x i32], <4 x i8> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000101 %cmp = icmp sle <4 x i8> %a, %b
102 %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
103 store <4 x i8> %val, <4 x i8> addrspace(1)* %out
104 ret void
105}
106
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000107; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000108; GCN: s_load_dword s
109; GCN: s_load_dword s
110
111; SI: s_ashr_i32
Matt Arsenault90083d32018-06-07 09:54:49 +0000112; SI: s_sext_i32_i16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000113; SI: s_ashr_i32
Matt Arsenault90083d32018-06-07 09:54:49 +0000114; SI: s_sext_i32_i16
115; SI: s_min_i32
116; SI: s_min_i32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000117
Matt Arsenault1349a042018-05-22 06:32:10 +0000118; VI: s_sext_i32_i16
119; VI: s_sext_i32_i16
120; VI: s_min_i32
121; VI: s_min_i32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000122
123; GFX9: v_pk_min_i16
124
125; EG: MIN_INT
126; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000127define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000128 %cmp = icmp sle <2 x i16> %a, %b
129 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
130 store <2 x i16> %val, <2 x i16> addrspace(1)* %out
131 ret void
132}
133
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000134; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000135; SI-NOT: buffer_load
136; SI: s_min_i32
137; SI: s_min_i32
138; SI: s_min_i32
139; SI: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000140
Matt Arsenault1349a042018-05-22 06:32:10 +0000141; VI: s_min_i32
142; VI: s_min_i32
143; VI: s_min_i32
144; VI: s_min_i32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000145
146; GFX9: v_pk_min_i16
147; GFX9: v_pk_min_i16
148
Jan Vesely2da0cba2016-06-09 16:04:00 +0000149; EG: MIN_INT
150; EG: MIN_INT
151; EG: MIN_INT
152; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000153define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000154 %cmp = icmp sle <4 x i16> %a, %b
155 %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
156 store <4 x i16> %val, <4 x i16> addrspace(1)* %out
157 ret void
158}
159
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000160; FUNC-LABEL: @v_test_imin_slt_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000161; GCN: v_min_i32_e32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000162
163; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000164define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000165 %tid = call i32 @llvm.r600.read.tidig.x()
166 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
167 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
168 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
169 %a = load i32, i32 addrspace(1)* %a.gep, align 4
170 %b = load i32, i32 addrspace(1)* %b.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000171 %cmp = icmp slt i32 %a, %b
172 %val = select i1 %cmp, i32 %a, i32 %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000173 store i32 %val, i32 addrspace(1)* %out.gep, align 4
174 ret void
175}
176
177; FUNC-LABEL: @v_test_imin_slt_i16
178; SI: v_min_i32_e32
179
180; GFX89: v_min_i16_e32
181
182; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000183define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000184 %tid = call i32 @llvm.r600.read.tidig.x()
185 %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
186 %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
187 %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
188
189 %a = load i16, i16 addrspace(1)* %a.gep
190 %b = load i16, i16 addrspace(1)* %b.gep
191 %cmp = icmp slt i16 %a, %b
192 %val = select i1 %cmp, i16 %a, i16 %b
193 store i16 %val, i16 addrspace(1)* %out.gep
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000194 ret void
195}
196
197; FUNC-LABEL: @s_test_imin_slt_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000198; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000199
200; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000201define amdgpu_kernel void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000202 %cmp = icmp slt i32 %a, %b
203 %val = select i1 %cmp, i32 %a, i32 %b
204 store i32 %val, i32 addrspace(1)* %out, align 4
205 ret void
206}
207
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000208; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +0000209; GCN: s_min_i32
210; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000211
212; EG: MIN_INT
213; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000214define amdgpu_kernel void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000215 %cmp = icmp slt <2 x i32> %a, %b
216 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
217 store <2 x i32> %val, <2 x i32> addrspace(1)* %out
218 ret void
219}
220
Matt Arsenault314eac72015-03-13 16:43:48 +0000221; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +0000222; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
Jan Vesely2da0cba2016-06-09 16:04:00 +0000223
224; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000225define amdgpu_kernel void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenault314eac72015-03-13 16:43:48 +0000226 %cmp = icmp slt i32 %a, 8
227 %val = select i1 %cmp, i32 %a, i32 8
228 store i32 %val, i32 addrspace(1)* %out, align 4
229 ret void
230}
231
232; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +0000233; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
Jan Vesely2da0cba2016-06-09 16:04:00 +0000234
235; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000236define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenault314eac72015-03-13 16:43:48 +0000237 %cmp = icmp sle i32 %a, 8
238 %val = select i1 %cmp, i32 %a, i32 8
239 store i32 %val, i32 addrspace(1)* %out, align 4
240 ret void
241}
242
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000243; FUNC-LABEL: @v_test_umin_ule_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000244; GCN: v_min_u32_e32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000245
246; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000247define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000248 %tid = call i32 @llvm.r600.read.tidig.x()
249 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
250 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
251 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
252 %a = load i32, i32 addrspace(1)* %a.gep, align 4
253 %b = load i32, i32 addrspace(1)* %b.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000254 %cmp = icmp ule i32 %a, %b
255 %val = select i1 %cmp, i32 %a, i32 %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000256 store i32 %val, i32 addrspace(1)* %out.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000257 ret void
258}
259
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000260; FUNC-LABEL: @v_test_umin_ule_v3i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000261; GCN: v_min_u32_e32
262; GCN: v_min_u32_e32
263; GCN: v_min_u32_e32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000264; GCN-NOT: v_min_u32_e32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000265; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000266
267; EG: MIN_UINT
268; EG: MIN_UINT
269; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000270define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000271 %tid = call i32 @llvm.r600.read.tidig.x()
272 %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
273 %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
274 %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
275
276 %a = load <3 x i32>, <3 x i32> addrspace(1)* %a.gep
277 %b = load <3 x i32>, <3 x i32> addrspace(1)* %b.gep
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000278 %cmp = icmp ule <3 x i32> %a, %b
279 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000280 store <3 x i32> %val, <3 x i32> addrspace(1)* %out.gep
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000281 ret void
282}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000283
284; FIXME: Reduce unused packed component to scalar
285; FUNC-LABEL: @v_test_umin_ule_v3i16{{$}}
286; SI: v_min_u32_e32
287; SI: v_min_u32_e32
288; SI: v_min_u32_e32
289; SI-NOT: v_min_u32_e32
290
291; VI: v_min_u16_e32
Matt Arsenault67a98152018-05-16 11:47:30 +0000292; VI: v_min_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000293; VI: v_min_u16_e32
Matt Arsenault67a98152018-05-16 11:47:30 +0000294; VI-NOT: v_min_u16
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000295
296; GFX9: v_pk_min_u16
297; GFX9: v_pk_min_u16
298
299; GCN: s_endpgm
300
301; EG: MIN_UINT
302; EG: MIN_UINT
303; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000304define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000305 %tid = call i32 @llvm.r600.read.tidig.x()
306 %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
307 %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
308 %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
309
310 %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.gep
311 %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.gep
312 %cmp = icmp ule <3 x i16> %a, %b
313 %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
314 store <3 x i16> %val, <3 x i16> addrspace(1)* %out.gep
315 ret void
316}
317
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000318; FUNC-LABEL: @s_test_umin_ule_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000319; GCN: s_min_u32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000320
321; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000322define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000323 %cmp = icmp ule i32 %a, %b
324 %val = select i1 %cmp, i32 %a, i32 %b
325 store i32 %val, i32 addrspace(1)* %out, align 4
326 ret void
327}
328
329; FUNC-LABEL: @v_test_umin_ult_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000330; GCN: v_min_u32_e32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000331
332; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000333define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000334 %tid = call i32 @llvm.r600.read.tidig.x()
335 %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
336 %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
337 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
338 %a = load i32, i32 addrspace(1)* %a.gep, align 4
339 %b = load i32, i32 addrspace(1)* %b.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000340 %cmp = icmp ult i32 %a, %b
341 %val = select i1 %cmp, i32 %a, i32 %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000342 store i32 %val, i32 addrspace(1)* %out.gep, align 4
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000343 ret void
344}
345
Matt Arsenault10a50922015-12-19 01:39:48 +0000346; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000347; SI: {{buffer|flat|global}}_load_ubyte
348; SI: {{buffer|flat|global}}_load_ubyte
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000349; SI: v_min_u32_e32
350
Matt Arsenault4e309b02017-07-29 01:03:53 +0000351; GFX89: {{flat|global}}_load_ubyte
352; GFX89: {{flat|global}}_load_ubyte
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000353; GFX89: v_min_u16_e32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000354
355; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000356define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000357 %tid = call i32 @llvm.r600.read.tidig.x()
358 %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
359 %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
360 %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
361
362 %a = load i8, i8 addrspace(1)* %a.gep, align 1
363 %b = load i8, i8 addrspace(1)* %b.gep, align 1
Matt Arsenault10a50922015-12-19 01:39:48 +0000364 %cmp = icmp ult i8 %a, %b
365 %val = select i1 %cmp, i8 %a, i8 %b
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000366 store i8 %val, i8 addrspace(1)* %out.gep, align 1
Matt Arsenault10a50922015-12-19 01:39:48 +0000367 ret void
368}
369
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000370; FUNC-LABEL: @s_test_umin_ult_i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000371; GCN: s_min_u32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000372
373; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000374define amdgpu_kernel void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaultd28a7fd2014-11-14 18:30:06 +0000375 %cmp = icmp ult i32 %a, %b
376 %val = select i1 %cmp, i32 %a, i32 %b
377 store i32 %val, i32 addrspace(1)* %out, align 4
378 ret void
379}
Matt Arsenaultdc103072014-12-19 23:15:30 +0000380
381; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
382; SI-NOT: v_min
Matt Arsenault6dca5422017-01-09 18:52:39 +0000383; GCN: v_cmp_lt_u32
Matt Arsenaultdc103072014-12-19 23:15:30 +0000384; SI-NEXT: v_cndmask_b32
385; SI-NOT: v_min
Matt Arsenault6dca5422017-01-09 18:52:39 +0000386; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000387
388; EG-NOT: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000389define amdgpu_kernel void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
Jan Vesely2da0cba2016-06-09 16:04:00 +0000390 %a = load i32, i32 addrspace(1)* %aptr, align 4
391 %b = load i32, i32 addrspace(1)* %bptr, align 4
Matt Arsenaultdc103072014-12-19 23:15:30 +0000392 %cmp = icmp ult i32 %a, %b
393 %val = select i1 %cmp, i32 %a, i32 %b
Jan Vesely2da0cba2016-06-09 16:04:00 +0000394 store i32 %val, i32 addrspace(1)* %out0, align 4
395 store i1 %cmp, i1 addrspace(1)* %out1
Matt Arsenaultdc103072014-12-19 23:15:30 +0000396 ret void
397}
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000398
Matt Arsenault6dca5422017-01-09 18:52:39 +0000399; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
400; GCN-NOT: v_min
401; GCN: v_cmp_lt_u32
402; GCN-NEXT: v_cndmask_b32
403; GCN-NOT: v_min
404; GCN: s_endpgm
405
406; EG-NOT: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000407define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
Matt Arsenault6dca5422017-01-09 18:52:39 +0000408 %a = load i16, i16 addrspace(1)* %aptr, align 2
409 %b = load i16, i16 addrspace(1)* %bptr, align 2
410 %cmp = icmp ult i16 %a, %b
411 %val = select i1 %cmp, i16 %a, i16 %b
412 store i16 %val, i16 addrspace(1)* %out0, align 2
413 store i1 %cmp, i1 addrspace(1)* %out1
414 ret void
415}
416
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000417
418; FUNC-LABEL: @s_test_umin_ult_v1i32
Matt Arsenault6dca5422017-01-09 18:52:39 +0000419; GCN: s_min_u32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000420
421; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000422define amdgpu_kernel void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000423 %cmp = icmp ult <1 x i32> %a, %b
424 %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
425 store <1 x i32> %val, <1 x i32> addrspace(1)* %out
426 ret void
427}
428
429; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
Matt Arsenault6dca5422017-01-09 18:52:39 +0000430; GCN: s_min_u32
431; GCN: s_min_u32
432; GCN: s_min_u32
433; GCN: s_min_u32
434; GCN: s_min_u32
435; GCN: s_min_u32
436; GCN: s_min_u32
437; GCN: s_min_u32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000438
439; EG: MIN_UINT
440; EG: MIN_UINT
441; EG: MIN_UINT
442; EG: MIN_UINT
443; EG: MIN_UINT
444; EG: MIN_UINT
445; EG: MIN_UINT
446; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000447define amdgpu_kernel void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000448 %cmp = icmp ult <8 x i32> %a, %b
449 %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
450 store <8 x i32> %val, <8 x i32> addrspace(1)* %out
451 ret void
452}
453
454; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
Matt Arsenault90083d32018-06-07 09:54:49 +0000455; GCN-NOT: {{buffer|flat|global}}_load
456; SI: s_min_u32
457; SI: s_min_u32
458; SI: s_min_u32
459; SI: s_min_u32
460; SI: s_min_u32
461; SI: s_min_u32
462; SI: s_min_u32
463; SI: s_min_u32
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000464
Matt Arsenault1349a042018-05-22 06:32:10 +0000465; VI: s_min_u32
466; VI: s_min_u32
467; VI: s_min_u32
468; VI: s_min_u32
469; VI: s_min_u32
470; VI: s_min_u32
471; VI: s_min_u32
472; VI: s_min_u32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000473
474; EG: MIN_UINT
475; EG: MIN_UINT
476; EG: MIN_UINT
477; EG: MIN_UINT
478; EG: MIN_UINT
479; EG: MIN_UINT
480; EG: MIN_UINT
481; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000482define amdgpu_kernel void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000483 %cmp = icmp ult <8 x i16> %a, %b
484 %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
485 store <8 x i16> %val, <8 x i16> addrspace(1)* %out
486 ret void
487}
488
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000489; Make sure redundant and removed
490; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000491; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
492; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
Matt Arsenault6dca5422017-01-09 18:52:39 +0000493; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
494; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
495; GCN: buffer_store_dword [[VMIN]]
Jan Vesely2da0cba2016-06-09 16:04:00 +0000496
497; EG: MIN_UINT
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000498define amdgpu_kernel void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 {
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000499 %a.ext = zext i16 %a to i32
500 %b.ext = zext i16 %b to i32
501 %cmp = icmp ult i32 %a.ext, %b.ext
502 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
503 %mask = and i32 %val, 65535
504 store i32 %mask, i32 addrspace(1)* %out
505 ret void
506}
507
508; Make sure redundant sign_extend_inreg removed.
509
510; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000511; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
512; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
513; GCN-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]]
514; GCN-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]]
515
516; GCN: s_min_i32 [[MIN:s[0-9]+]], [[EXT_A]], [[EXT_B]]
Matt Arsenault6dca5422017-01-09 18:52:39 +0000517; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
518; GCN: buffer_store_dword [[VMIN]]
Jan Vesely2da0cba2016-06-09 16:04:00 +0000519
520; EG: MIN_INT
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000521define amdgpu_kernel void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) #0 {
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000522 %a.ext = sext i16 %a to i32
523 %b.ext = sext i16 %b to i32
524 %cmp = icmp slt i32 %a.ext, %b.ext
525 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
526 %shl = shl i32 %val, 16
527 %sextinreg = ashr i32 %shl, 16
528 store i32 %sextinreg, i32 addrspace(1)* %out
529 ret void
530}
531
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000532; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
Matt Arsenault6dca5422017-01-09 18:52:39 +0000533; GCN: s_min_i32
Jan Vesely2da0cba2016-06-09 16:04:00 +0000534
535; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000536define amdgpu_kernel void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #0 {
Matt Arsenault705eb8f2015-06-09 00:52:41 +0000537 %cmp = icmp sle i16 %a, %b
538 %val = select i1 %cmp, i16 %a, i16 %b
539 store i16 %val, i16 addrspace(1)* %out
540 ret void
541}
Jan Vesely2da0cba2016-06-09 16:04:00 +0000542
543; 64 bit
544; FUNC-LABEL: {{^}}test_umin_ult_i64
Matt Arsenault6dca5422017-01-09 18:52:39 +0000545; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000546
547; EG: MIN_UINT
548; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000549define amdgpu_kernel void @test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
Jan Vesely2da0cba2016-06-09 16:04:00 +0000550 %tmp = icmp ult i64 %a, %b
551 %val = select i1 %tmp, i64 %a, i64 %b
552 store i64 %val, i64 addrspace(1)* %out, align 8
553 ret void
554}
555
556; FUNC-LABEL: {{^}}test_umin_ule_i64
Matt Arsenault6dca5422017-01-09 18:52:39 +0000557; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000558
559; EG: MIN_UINT
560; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000561define amdgpu_kernel void @test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
Jan Vesely2da0cba2016-06-09 16:04:00 +0000562 %tmp = icmp ule i64 %a, %b
563 %val = select i1 %tmp, i64 %a, i64 %b
564 store i64 %val, i64 addrspace(1)* %out, align 8
565 ret void
566}
567
568; FUNC-LABEL: {{^}}test_imin_slt_i64
Matt Arsenault6dca5422017-01-09 18:52:39 +0000569; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000570
571; EG-DAG: MIN_UINT
572; EG-DAG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000573define amdgpu_kernel void @test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
Jan Vesely2da0cba2016-06-09 16:04:00 +0000574 %tmp = icmp slt i64 %a, %b
575 %val = select i1 %tmp, i64 %a, i64 %b
576 store i64 %val, i64 addrspace(1)* %out, align 8
577 ret void
578}
579
580; FUNC-LABEL: {{^}}test_imin_sle_i64
Matt Arsenault6dca5422017-01-09 18:52:39 +0000581; GCN: s_endpgm
Jan Vesely2da0cba2016-06-09 16:04:00 +0000582
583; EG-DAG: MIN_UINT
584; EG-DAG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000585define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 {
Jan Vesely2da0cba2016-06-09 16:04:00 +0000586 %tmp = icmp sle i64 %a, %b
587 %val = select i1 %tmp, i64 %a, i64 %b
588 store i64 %val, i64 addrspace(1)* %out, align 8
589 ret void
590}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000591
592; FUNC-LABEL: {{^}}v_test_imin_sle_v2i16:
593; SI: v_min_i32
594; SI: v_min_i32
595
596; VI: v_min_i16
597; VI: v_min_i16
598
599; GFX9: v_pk_min_i16
600
601; EG: MIN_INT
602; EG: MIN_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000603define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000604 %tid = call i32 @llvm.r600.read.tidig.x()
605 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
606 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
607 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
608 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
609 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
610 %cmp = icmp sle <2 x i16> %a, %b
611 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
612 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
613 ret void
614}
615
616; FIXME: i16 min
617; FUNC-LABEL: {{^}}v_test_imin_ule_v2i16:
618; SI: v_min_u32
619; SI: v_min_u32
620
621; VI: v_min_u16
622; VI: v_min_u16
623
624; GFX9: v_pk_min_u16
625
626; EG: MIN_UINT
627; EG: MIN_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000628define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000629 %tid = call i32 @llvm.r600.read.tidig.x()
630 %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
631 %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
632 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
633 %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.gep
634 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep
635 %cmp = icmp ule <2 x i16> %a, %b
636 %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b
637 store <2 x i16> %val, <2 x i16> addrspace(1)* %out.gep
638 ret void
639}
640
641declare i32 @llvm.r600.read.tidig.x() #1
642
643attributes #0 = { nounwind }
644attributes #1 = { nounwind readnone }