blob: 32a1cdf0f171ccfef29229db76a269acc3b69f43 [file] [log] [blame]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
8
9;
10; vXi64
11;
12
13define i64 @test_v2i64(<2 x i64> %a0) {
14; SSE2-LABEL: test_v2i64:
15; SSE2: # %bb.0:
16; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Craig Topperf6d84002018-10-09 19:05:50 +000017; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +000018; SSE2-NEXT: movdqa %xmm0, %xmm3
19; SSE2-NEXT: pxor %xmm2, %xmm3
20; SSE2-NEXT: pxor %xmm1, %xmm2
21; SSE2-NEXT: movdqa %xmm2, %xmm4
22; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
23; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
25; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26; SSE2-NEXT: pand %xmm5, %xmm2
27; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28; SSE2-NEXT: por %xmm2, %xmm3
29; SSE2-NEXT: pand %xmm3, %xmm0
30; SSE2-NEXT: pandn %xmm1, %xmm3
31; SSE2-NEXT: por %xmm0, %xmm3
32; SSE2-NEXT: movq %xmm3, %rax
33; SSE2-NEXT: retq
34;
35; SSE41-LABEL: test_v2i64:
36; SSE41: # %bb.0:
37; SSE41-NEXT: movdqa %xmm0, %xmm1
38; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
Craig Topperf6d84002018-10-09 19:05:50 +000039; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +000040; SSE41-NEXT: movdqa %xmm1, %xmm3
41; SSE41-NEXT: pxor %xmm0, %xmm3
42; SSE41-NEXT: pxor %xmm2, %xmm0
43; SSE41-NEXT: movdqa %xmm0, %xmm4
44; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
45; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
46; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
47; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
48; SSE41-NEXT: pand %xmm5, %xmm3
49; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
50; SSE41-NEXT: por %xmm3, %xmm0
51; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
52; SSE41-NEXT: movq %xmm2, %rax
53; SSE41-NEXT: retq
54;
55; AVX-LABEL: test_v2i64:
56; AVX: # %bb.0:
57; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
58; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
59; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
60; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
61; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
62; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
63; AVX-NEXT: vmovq %xmm0, %rax
64; AVX-NEXT: retq
65;
66; AVX512BW-LABEL: test_v2i64:
67; AVX512BW: # %bb.0:
68; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
69; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
70; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
71; AVX512BW-NEXT: vmovq %xmm0, %rax
72; AVX512BW-NEXT: vzeroupper
73; AVX512BW-NEXT: retq
74;
75; AVX512VL-LABEL: test_v2i64:
76; AVX512VL: # %bb.0:
77; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
78; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
79; AVX512VL-NEXT: vmovq %xmm0, %rax
80; AVX512VL-NEXT: retq
81 %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %a0)
82 ret i64 %1
83}
84
85define i64 @test_v4i64(<4 x i64> %a0) {
86; SSE2-LABEL: test_v4i64:
87; SSE2: # %bb.0:
Craig Topperf6d84002018-10-09 19:05:50 +000088; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +000089; SSE2-NEXT: movdqa %xmm0, %xmm3
90; SSE2-NEXT: pxor %xmm2, %xmm3
91; SSE2-NEXT: movdqa %xmm1, %xmm4
92; SSE2-NEXT: pxor %xmm2, %xmm4
93; SSE2-NEXT: movdqa %xmm4, %xmm5
94; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
95; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
96; SSE2-NEXT: pcmpeqd %xmm3, %xmm4
97; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
98; SSE2-NEXT: pand %xmm6, %xmm3
99; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
100; SSE2-NEXT: por %xmm3, %xmm4
101; SSE2-NEXT: pand %xmm4, %xmm0
102; SSE2-NEXT: pandn %xmm1, %xmm4
103; SSE2-NEXT: por %xmm0, %xmm4
104; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
105; SSE2-NEXT: movdqa %xmm4, %xmm1
106; SSE2-NEXT: pxor %xmm2, %xmm1
107; SSE2-NEXT: pxor %xmm0, %xmm2
108; SSE2-NEXT: movdqa %xmm2, %xmm3
109; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
110; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
111; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
112; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
113; SSE2-NEXT: pand %xmm5, %xmm1
114; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
115; SSE2-NEXT: por %xmm1, %xmm2
116; SSE2-NEXT: pand %xmm2, %xmm4
117; SSE2-NEXT: pandn %xmm0, %xmm2
118; SSE2-NEXT: por %xmm4, %xmm2
119; SSE2-NEXT: movq %xmm2, %rax
120; SSE2-NEXT: retq
121;
122; SSE41-LABEL: test_v4i64:
123; SSE41: # %bb.0:
124; SSE41-NEXT: movdqa %xmm0, %xmm2
Craig Topperf6d84002018-10-09 19:05:50 +0000125; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +0000126; SSE41-NEXT: pxor %xmm3, %xmm0
127; SSE41-NEXT: movdqa %xmm1, %xmm4
128; SSE41-NEXT: pxor %xmm3, %xmm4
129; SSE41-NEXT: movdqa %xmm4, %xmm5
130; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
131; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
132; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
133; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
134; SSE41-NEXT: pand %xmm6, %xmm4
135; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
136; SSE41-NEXT: por %xmm4, %xmm0
137; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
138; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
139; SSE41-NEXT: movdqa %xmm1, %xmm0
140; SSE41-NEXT: pxor %xmm3, %xmm0
141; SSE41-NEXT: pxor %xmm2, %xmm3
142; SSE41-NEXT: movdqa %xmm3, %xmm4
143; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
144; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
145; SSE41-NEXT: pcmpeqd %xmm0, %xmm3
146; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
147; SSE41-NEXT: pand %xmm5, %xmm3
148; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
149; SSE41-NEXT: por %xmm3, %xmm0
150; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
151; SSE41-NEXT: movq %xmm2, %rax
152; SSE41-NEXT: retq
153;
154; AVX1-LABEL: test_v4i64:
155; AVX1: # %bb.0:
156; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
157; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
158; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
159; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4
160; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
161; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4
162; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
163; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
164; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
165; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
Sanjay Patel6ebf2182018-09-09 14:13:22 +0000166; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
167; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Simon Pilgrim57d32402018-04-05 17:25:40 +0000168; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
169; AVX1-NEXT: vmovq %xmm0, %rax
170; AVX1-NEXT: vzeroupper
171; AVX1-NEXT: retq
172;
173; AVX2-LABEL: test_v4i64:
174; AVX2: # %bb.0:
175; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
176; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
177; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
178; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
179; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
180; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
181; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
182; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
183; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
184; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
185; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
186; AVX2-NEXT: vmovq %xmm0, %rax
187; AVX2-NEXT: vzeroupper
188; AVX2-NEXT: retq
189;
190; AVX512BW-LABEL: test_v4i64:
191; AVX512BW: # %bb.0:
192; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
193; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
194; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
195; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
196; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
197; AVX512BW-NEXT: vmovq %xmm0, %rax
198; AVX512BW-NEXT: vzeroupper
199; AVX512BW-NEXT: retq
200;
201; AVX512VL-LABEL: test_v4i64:
202; AVX512VL: # %bb.0:
203; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
204; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
205; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
206; AVX512VL-NEXT: vpminuq %ymm1, %ymm0, %ymm0
207; AVX512VL-NEXT: vmovq %xmm0, %rax
208; AVX512VL-NEXT: vzeroupper
209; AVX512VL-NEXT: retq
210 %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> %a0)
211 ret i64 %1
212}
213
214define i64 @test_v8i64(<8 x i64> %a0) {
215; SSE2-LABEL: test_v8i64:
216; SSE2: # %bb.0:
Craig Topperf6d84002018-10-09 19:05:50 +0000217; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +0000218; SSE2-NEXT: movdqa %xmm1, %xmm5
219; SSE2-NEXT: pxor %xmm4, %xmm5
220; SSE2-NEXT: movdqa %xmm3, %xmm6
221; SSE2-NEXT: pxor %xmm4, %xmm6
222; SSE2-NEXT: movdqa %xmm6, %xmm7
223; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
224; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
225; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
226; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
227; SSE2-NEXT: pand %xmm8, %xmm6
228; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
229; SSE2-NEXT: por %xmm6, %xmm5
230; SSE2-NEXT: pand %xmm5, %xmm1
231; SSE2-NEXT: pandn %xmm3, %xmm5
232; SSE2-NEXT: por %xmm1, %xmm5
233; SSE2-NEXT: movdqa %xmm0, %xmm1
234; SSE2-NEXT: pxor %xmm4, %xmm1
235; SSE2-NEXT: movdqa %xmm2, %xmm3
236; SSE2-NEXT: pxor %xmm4, %xmm3
237; SSE2-NEXT: movdqa %xmm3, %xmm6
238; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
239; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
240; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
241; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
242; SSE2-NEXT: pand %xmm7, %xmm1
243; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
244; SSE2-NEXT: por %xmm1, %xmm3
245; SSE2-NEXT: pand %xmm3, %xmm0
246; SSE2-NEXT: pandn %xmm2, %xmm3
247; SSE2-NEXT: por %xmm0, %xmm3
248; SSE2-NEXT: movdqa %xmm3, %xmm0
249; SSE2-NEXT: pxor %xmm4, %xmm0
250; SSE2-NEXT: movdqa %xmm5, %xmm1
251; SSE2-NEXT: pxor %xmm4, %xmm1
252; SSE2-NEXT: movdqa %xmm1, %xmm2
253; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
254; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
255; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
256; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
257; SSE2-NEXT: pand %xmm6, %xmm0
258; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
259; SSE2-NEXT: por %xmm0, %xmm1
260; SSE2-NEXT: pand %xmm1, %xmm3
261; SSE2-NEXT: pandn %xmm5, %xmm1
262; SSE2-NEXT: por %xmm3, %xmm1
263; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
264; SSE2-NEXT: movdqa %xmm1, %xmm2
265; SSE2-NEXT: pxor %xmm4, %xmm2
266; SSE2-NEXT: pxor %xmm0, %xmm4
267; SSE2-NEXT: movdqa %xmm4, %xmm3
268; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
269; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
270; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
271; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
272; SSE2-NEXT: pand %xmm5, %xmm2
273; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
274; SSE2-NEXT: por %xmm2, %xmm3
275; SSE2-NEXT: pand %xmm3, %xmm1
276; SSE2-NEXT: pandn %xmm0, %xmm3
277; SSE2-NEXT: por %xmm1, %xmm3
278; SSE2-NEXT: movq %xmm3, %rax
279; SSE2-NEXT: retq
280;
281; SSE41-LABEL: test_v8i64:
282; SSE41: # %bb.0:
283; SSE41-NEXT: movdqa %xmm0, %xmm8
Craig Topperf6d84002018-10-09 19:05:50 +0000284; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +0000285; SSE41-NEXT: movdqa %xmm1, %xmm0
286; SSE41-NEXT: pxor %xmm5, %xmm0
287; SSE41-NEXT: movdqa %xmm3, %xmm6
288; SSE41-NEXT: pxor %xmm5, %xmm6
289; SSE41-NEXT: movdqa %xmm6, %xmm7
290; SSE41-NEXT: pcmpgtd %xmm0, %xmm7
291; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
292; SSE41-NEXT: pcmpeqd %xmm0, %xmm6
293; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
294; SSE41-NEXT: pand %xmm4, %xmm6
295; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
296; SSE41-NEXT: por %xmm6, %xmm0
297; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
298; SSE41-NEXT: movdqa %xmm8, %xmm0
299; SSE41-NEXT: pxor %xmm5, %xmm0
300; SSE41-NEXT: movdqa %xmm2, %xmm1
301; SSE41-NEXT: pxor %xmm5, %xmm1
302; SSE41-NEXT: movdqa %xmm1, %xmm4
303; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
304; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
305; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
306; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
307; SSE41-NEXT: pand %xmm6, %xmm1
308; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
309; SSE41-NEXT: por %xmm1, %xmm0
310; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
311; SSE41-NEXT: movapd %xmm2, %xmm0
312; SSE41-NEXT: xorpd %xmm5, %xmm0
313; SSE41-NEXT: movapd %xmm3, %xmm1
314; SSE41-NEXT: xorpd %xmm5, %xmm1
315; SSE41-NEXT: movapd %xmm1, %xmm4
316; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
317; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
318; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
319; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
320; SSE41-NEXT: pand %xmm6, %xmm1
321; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
322; SSE41-NEXT: por %xmm1, %xmm0
323; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
324; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
325; SSE41-NEXT: movdqa %xmm3, %xmm0
326; SSE41-NEXT: pxor %xmm5, %xmm0
327; SSE41-NEXT: pxor %xmm1, %xmm5
328; SSE41-NEXT: movdqa %xmm5, %xmm2
329; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
330; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
331; SSE41-NEXT: pcmpeqd %xmm0, %xmm5
332; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
333; SSE41-NEXT: pand %xmm4, %xmm5
334; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
335; SSE41-NEXT: por %xmm5, %xmm0
336; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
337; SSE41-NEXT: movq %xmm1, %rax
338; SSE41-NEXT: retq
339;
340; AVX1-LABEL: test_v8i64:
341; AVX1: # %bb.0:
342; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
343; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
344; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
345; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
346; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
347; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
348; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
349; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5
350; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
351; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2
352; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
354; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
355; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
356; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
357; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4
358; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
359; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
360; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
361; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
Sanjay Patel6ebf2182018-09-09 14:13:22 +0000362; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
363; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Simon Pilgrim57d32402018-04-05 17:25:40 +0000364; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
365; AVX1-NEXT: vmovq %xmm0, %rax
366; AVX1-NEXT: vzeroupper
367; AVX1-NEXT: retq
368;
369; AVX2-LABEL: test_v8i64:
370; AVX2: # %bb.0:
371; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
372; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
373; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
374; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
375; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
376; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
377; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
378; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4
379; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
380; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
381; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
382; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
383; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
384; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
385; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
386; AVX2-NEXT: vmovq %xmm0, %rax
387; AVX2-NEXT: vzeroupper
388; AVX2-NEXT: retq
389;
390; AVX512-LABEL: test_v8i64:
391; AVX512: # %bb.0:
392; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
393; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
394; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
395; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
396; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
397; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
398; AVX512-NEXT: vmovq %xmm0, %rax
399; AVX512-NEXT: vzeroupper
400; AVX512-NEXT: retq
401 %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
402 ret i64 %1
403}
404
405define i64 @test_v16i64(<16 x i64> %a0) {
406; SSE2-LABEL: test_v16i64:
407; SSE2: # %bb.0:
Craig Topperf6d84002018-10-09 19:05:50 +0000408; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +0000409; SSE2-NEXT: movdqa %xmm2, %xmm9
410; SSE2-NEXT: pxor %xmm8, %xmm9
411; SSE2-NEXT: movdqa %xmm6, %xmm10
412; SSE2-NEXT: pxor %xmm8, %xmm10
413; SSE2-NEXT: movdqa %xmm10, %xmm11
414; SSE2-NEXT: pcmpgtd %xmm9, %xmm11
415; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
416; SSE2-NEXT: pcmpeqd %xmm9, %xmm10
417; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
418; SSE2-NEXT: pand %xmm12, %xmm10
419; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm11[1,1,3,3]
420; SSE2-NEXT: por %xmm10, %xmm9
421; SSE2-NEXT: pand %xmm9, %xmm2
422; SSE2-NEXT: pandn %xmm6, %xmm9
423; SSE2-NEXT: por %xmm2, %xmm9
424; SSE2-NEXT: movdqa %xmm0, %xmm2
425; SSE2-NEXT: pxor %xmm8, %xmm2
426; SSE2-NEXT: movdqa %xmm4, %xmm6
427; SSE2-NEXT: pxor %xmm8, %xmm6
428; SSE2-NEXT: movdqa %xmm6, %xmm10
429; SSE2-NEXT: pcmpgtd %xmm2, %xmm10
430; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
431; SSE2-NEXT: pcmpeqd %xmm2, %xmm6
432; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
433; SSE2-NEXT: pand %xmm11, %xmm6
434; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3]
435; SSE2-NEXT: por %xmm6, %xmm2
436; SSE2-NEXT: pand %xmm2, %xmm0
437; SSE2-NEXT: pandn %xmm4, %xmm2
438; SSE2-NEXT: por %xmm0, %xmm2
439; SSE2-NEXT: movdqa %xmm3, %xmm0
440; SSE2-NEXT: pxor %xmm8, %xmm0
441; SSE2-NEXT: movdqa %xmm7, %xmm4
442; SSE2-NEXT: pxor %xmm8, %xmm4
443; SSE2-NEXT: movdqa %xmm4, %xmm6
444; SSE2-NEXT: pcmpgtd %xmm0, %xmm6
445; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2]
446; SSE2-NEXT: pcmpeqd %xmm0, %xmm4
447; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
448; SSE2-NEXT: pand %xmm10, %xmm4
449; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
450; SSE2-NEXT: por %xmm4, %xmm0
451; SSE2-NEXT: pand %xmm0, %xmm3
452; SSE2-NEXT: pandn %xmm7, %xmm0
453; SSE2-NEXT: por %xmm3, %xmm0
454; SSE2-NEXT: movdqa %xmm1, %xmm3
455; SSE2-NEXT: pxor %xmm8, %xmm3
456; SSE2-NEXT: movdqa %xmm5, %xmm4
457; SSE2-NEXT: pxor %xmm8, %xmm4
458; SSE2-NEXT: movdqa %xmm4, %xmm6
459; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
460; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
461; SSE2-NEXT: pcmpeqd %xmm3, %xmm4
462; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
463; SSE2-NEXT: pand %xmm7, %xmm3
464; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
465; SSE2-NEXT: por %xmm3, %xmm4
466; SSE2-NEXT: pand %xmm4, %xmm1
467; SSE2-NEXT: pandn %xmm5, %xmm4
468; SSE2-NEXT: por %xmm1, %xmm4
469; SSE2-NEXT: movdqa %xmm4, %xmm1
470; SSE2-NEXT: pxor %xmm8, %xmm1
471; SSE2-NEXT: movdqa %xmm0, %xmm3
472; SSE2-NEXT: pxor %xmm8, %xmm3
473; SSE2-NEXT: movdqa %xmm3, %xmm5
474; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
475; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
476; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
477; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
478; SSE2-NEXT: pand %xmm6, %xmm3
479; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
480; SSE2-NEXT: por %xmm3, %xmm1
481; SSE2-NEXT: pand %xmm1, %xmm4
482; SSE2-NEXT: pandn %xmm0, %xmm1
483; SSE2-NEXT: por %xmm4, %xmm1
484; SSE2-NEXT: movdqa %xmm2, %xmm0
485; SSE2-NEXT: pxor %xmm8, %xmm0
486; SSE2-NEXT: movdqa %xmm9, %xmm3
487; SSE2-NEXT: pxor %xmm8, %xmm3
488; SSE2-NEXT: movdqa %xmm3, %xmm4
489; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
490; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
491; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
492; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
493; SSE2-NEXT: pand %xmm5, %xmm0
494; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
495; SSE2-NEXT: por %xmm0, %xmm3
496; SSE2-NEXT: pand %xmm3, %xmm2
497; SSE2-NEXT: pandn %xmm9, %xmm3
498; SSE2-NEXT: por %xmm2, %xmm3
499; SSE2-NEXT: movdqa %xmm3, %xmm0
500; SSE2-NEXT: pxor %xmm8, %xmm0
501; SSE2-NEXT: movdqa %xmm1, %xmm2
502; SSE2-NEXT: pxor %xmm8, %xmm2
503; SSE2-NEXT: movdqa %xmm2, %xmm4
504; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
505; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
506; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
507; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
508; SSE2-NEXT: pand %xmm5, %xmm0
509; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
510; SSE2-NEXT: por %xmm0, %xmm2
511; SSE2-NEXT: pand %xmm2, %xmm3
512; SSE2-NEXT: pandn %xmm1, %xmm2
513; SSE2-NEXT: por %xmm3, %xmm2
514; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
515; SSE2-NEXT: movdqa %xmm2, %xmm1
516; SSE2-NEXT: pxor %xmm8, %xmm1
517; SSE2-NEXT: pxor %xmm0, %xmm8
518; SSE2-NEXT: movdqa %xmm8, %xmm3
519; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
520; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
521; SSE2-NEXT: pcmpeqd %xmm1, %xmm8
522; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3]
523; SSE2-NEXT: pand %xmm4, %xmm1
524; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
525; SSE2-NEXT: por %xmm1, %xmm3
526; SSE2-NEXT: pand %xmm3, %xmm2
527; SSE2-NEXT: pandn %xmm0, %xmm3
528; SSE2-NEXT: por %xmm2, %xmm3
529; SSE2-NEXT: movq %xmm3, %rax
530; SSE2-NEXT: retq
531;
532; SSE41-LABEL: test_v16i64:
533; SSE41: # %bb.0:
534; SSE41-NEXT: movdqa %xmm0, %xmm8
Craig Topperf6d84002018-10-09 19:05:50 +0000535; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [9223372039002259456,9223372039002259456]
Simon Pilgrim57d32402018-04-05 17:25:40 +0000536; SSE41-NEXT: movdqa %xmm2, %xmm10
537; SSE41-NEXT: pxor %xmm9, %xmm10
538; SSE41-NEXT: movdqa %xmm6, %xmm0
539; SSE41-NEXT: pxor %xmm9, %xmm0
540; SSE41-NEXT: movdqa %xmm0, %xmm11
541; SSE41-NEXT: pcmpgtd %xmm10, %xmm11
542; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
543; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
544; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3]
545; SSE41-NEXT: pand %xmm12, %xmm10
546; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm11[1,1,3,3]
547; SSE41-NEXT: por %xmm10, %xmm0
548; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6
549; SSE41-NEXT: movdqa %xmm8, %xmm0
550; SSE41-NEXT: pxor %xmm9, %xmm0
551; SSE41-NEXT: movdqa %xmm4, %xmm2
552; SSE41-NEXT: pxor %xmm9, %xmm2
553; SSE41-NEXT: movdqa %xmm2, %xmm10
554; SSE41-NEXT: pcmpgtd %xmm0, %xmm10
555; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
556; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
557; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
558; SSE41-NEXT: pand %xmm11, %xmm2
559; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
560; SSE41-NEXT: por %xmm2, %xmm0
561; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4
562; SSE41-NEXT: movdqa %xmm3, %xmm0
563; SSE41-NEXT: pxor %xmm9, %xmm0
564; SSE41-NEXT: movdqa %xmm7, %xmm2
565; SSE41-NEXT: pxor %xmm9, %xmm2
566; SSE41-NEXT: movdqa %xmm2, %xmm8
567; SSE41-NEXT: pcmpgtd %xmm0, %xmm8
568; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2]
569; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
570; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
571; SSE41-NEXT: pand %xmm10, %xmm2
572; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm8[1,1,3,3]
573; SSE41-NEXT: por %xmm2, %xmm0
574; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7
575; SSE41-NEXT: movdqa %xmm1, %xmm0
576; SSE41-NEXT: pxor %xmm9, %xmm0
577; SSE41-NEXT: movdqa %xmm5, %xmm2
578; SSE41-NEXT: pxor %xmm9, %xmm2
579; SSE41-NEXT: movdqa %xmm2, %xmm3
580; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
581; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2]
582; SSE41-NEXT: pcmpeqd %xmm0, %xmm2
583; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
584; SSE41-NEXT: pand %xmm8, %xmm2
585; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
586; SSE41-NEXT: por %xmm2, %xmm0
587; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5
588; SSE41-NEXT: movapd %xmm5, %xmm0
589; SSE41-NEXT: xorpd %xmm9, %xmm0
590; SSE41-NEXT: movapd %xmm7, %xmm1
591; SSE41-NEXT: xorpd %xmm9, %xmm1
592; SSE41-NEXT: movapd %xmm1, %xmm2
593; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
594; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
595; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
596; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
597; SSE41-NEXT: pand %xmm3, %xmm1
598; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
599; SSE41-NEXT: por %xmm1, %xmm0
600; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm7
601; SSE41-NEXT: movapd %xmm4, %xmm0
602; SSE41-NEXT: xorpd %xmm9, %xmm0
603; SSE41-NEXT: movapd %xmm6, %xmm1
604; SSE41-NEXT: xorpd %xmm9, %xmm1
605; SSE41-NEXT: movapd %xmm1, %xmm2
606; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
607; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
608; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
609; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
610; SSE41-NEXT: pand %xmm3, %xmm1
611; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
612; SSE41-NEXT: por %xmm1, %xmm0
613; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm6
614; SSE41-NEXT: movapd %xmm6, %xmm0
615; SSE41-NEXT: xorpd %xmm9, %xmm0
616; SSE41-NEXT: movapd %xmm7, %xmm1
617; SSE41-NEXT: xorpd %xmm9, %xmm1
618; SSE41-NEXT: movapd %xmm1, %xmm2
619; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
620; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
621; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
622; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
623; SSE41-NEXT: pand %xmm3, %xmm1
624; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
625; SSE41-NEXT: por %xmm1, %xmm0
626; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm7
627; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm7[2,3,0,1]
628; SSE41-NEXT: movdqa %xmm7, %xmm0
629; SSE41-NEXT: pxor %xmm9, %xmm0
630; SSE41-NEXT: pxor %xmm1, %xmm9
631; SSE41-NEXT: movdqa %xmm9, %xmm2
632; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
633; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
634; SSE41-NEXT: pcmpeqd %xmm0, %xmm9
635; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3]
636; SSE41-NEXT: pand %xmm3, %xmm4
637; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
638; SSE41-NEXT: por %xmm4, %xmm0
639; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1
640; SSE41-NEXT: movq %xmm1, %rax
641; SSE41-NEXT: retq
642;
643; AVX1-LABEL: test_v16i64:
644; AVX1: # %bb.0:
645; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
646; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
647; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm5
648; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
649; AVX1-NEXT: vpxor %xmm4, %xmm6, %xmm6
650; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
651; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm6
652; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm7
653; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
654; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
655; AVX1-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
656; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
657; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
658; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
659; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm5
660; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
661; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
662; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm6
663; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
664; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
665; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
666; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
667; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
668; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
669; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
670; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
671; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm3
672; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm5
673; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
674; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
675; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
676; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
677; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
678; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm3
679; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
680; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
681; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
682; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
683; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
684; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
685; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
686; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
Simon Pilgrim57d32402018-04-05 17:25:40 +0000687; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
688; AVX1-NEXT: vmovq %xmm0, %rax
689; AVX1-NEXT: vzeroupper
690; AVX1-NEXT: retq
691;
692; AVX2-LABEL: test_v16i64:
693; AVX2: # %bb.0:
694; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
695; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
696; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6
697; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
698; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
699; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm3
700; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm5
701; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3
702; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
703; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
704; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
705; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
706; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
707; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
708; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
709; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
710; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
711; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
712; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
713; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm2
714; AVX2-NEXT: vxorpd %ymm4, %ymm1, %ymm3
715; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
716; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
717; AVX2-NEXT: vmovq %xmm0, %rax
718; AVX2-NEXT: vzeroupper
719; AVX2-NEXT: retq
720;
721; AVX512-LABEL: test_v16i64:
722; AVX512: # %bb.0:
723; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
724; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
725; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
726; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
727; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
728; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
729; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
730; AVX512-NEXT: vmovq %xmm0, %rax
731; AVX512-NEXT: vzeroupper
732; AVX512-NEXT: retq
733 %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
734 ret i64 %1
735}
736
737;
738; vXi32
739;
740
741define i32 @test_v4i32(<4 x i32> %a0) {
742; SSE2-LABEL: test_v4i32:
743; SSE2: # %bb.0:
744; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
745; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
746; SSE2-NEXT: movdqa %xmm0, %xmm3
747; SSE2-NEXT: pxor %xmm2, %xmm3
748; SSE2-NEXT: movdqa %xmm1, %xmm4
749; SSE2-NEXT: pxor %xmm2, %xmm4
750; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
751; SSE2-NEXT: pand %xmm4, %xmm0
752; SSE2-NEXT: pandn %xmm1, %xmm4
753; SSE2-NEXT: por %xmm0, %xmm4
754; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,2,3]
755; SSE2-NEXT: movdqa %xmm4, %xmm1
756; SSE2-NEXT: pxor %xmm2, %xmm1
757; SSE2-NEXT: pxor %xmm0, %xmm2
758; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
759; SSE2-NEXT: pand %xmm2, %xmm4
760; SSE2-NEXT: pandn %xmm0, %xmm2
761; SSE2-NEXT: por %xmm4, %xmm2
762; SSE2-NEXT: movd %xmm2, %eax
763; SSE2-NEXT: retq
764;
765; SSE41-LABEL: test_v4i32:
766; SSE41: # %bb.0:
767; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
768; SSE41-NEXT: pminud %xmm0, %xmm1
769; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
770; SSE41-NEXT: pminud %xmm1, %xmm0
771; SSE41-NEXT: movd %xmm0, %eax
772; SSE41-NEXT: retq
773;
774; AVX-LABEL: test_v4i32:
775; AVX: # %bb.0:
776; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
777; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
778; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
779; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
780; AVX-NEXT: vmovd %xmm0, %eax
781; AVX-NEXT: retq
782;
783; AVX512-LABEL: test_v4i32:
784; AVX512: # %bb.0:
785; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
786; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
787; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
788; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
789; AVX512-NEXT: vmovd %xmm0, %eax
790; AVX512-NEXT: retq
791 %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %a0)
792 ret i32 %1
793}
794
795define i32 @test_v8i32(<8 x i32> %a0) {
796; SSE2-LABEL: test_v8i32:
797; SSE2: # %bb.0:
798; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
799; SSE2-NEXT: movdqa %xmm0, %xmm3
800; SSE2-NEXT: pxor %xmm2, %xmm3
801; SSE2-NEXT: movdqa %xmm1, %xmm4
802; SSE2-NEXT: pxor %xmm2, %xmm4
803; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
804; SSE2-NEXT: pand %xmm4, %xmm0
805; SSE2-NEXT: pandn %xmm1, %xmm4
806; SSE2-NEXT: por %xmm0, %xmm4
807; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
808; SSE2-NEXT: movdqa %xmm4, %xmm1
809; SSE2-NEXT: pxor %xmm2, %xmm1
810; SSE2-NEXT: movdqa %xmm0, %xmm3
811; SSE2-NEXT: pxor %xmm2, %xmm3
812; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
813; SSE2-NEXT: pand %xmm3, %xmm4
814; SSE2-NEXT: pandn %xmm0, %xmm3
815; SSE2-NEXT: por %xmm4, %xmm3
816; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
817; SSE2-NEXT: movdqa %xmm3, %xmm1
818; SSE2-NEXT: pxor %xmm2, %xmm1
819; SSE2-NEXT: pxor %xmm0, %xmm2
820; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
821; SSE2-NEXT: pand %xmm2, %xmm3
822; SSE2-NEXT: pandn %xmm0, %xmm2
823; SSE2-NEXT: por %xmm3, %xmm2
824; SSE2-NEXT: movd %xmm2, %eax
825; SSE2-NEXT: retq
826;
827; SSE41-LABEL: test_v8i32:
828; SSE41: # %bb.0:
829; SSE41-NEXT: pminud %xmm1, %xmm0
830; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
831; SSE41-NEXT: pminud %xmm0, %xmm1
832; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
833; SSE41-NEXT: pminud %xmm1, %xmm0
834; SSE41-NEXT: movd %xmm0, %eax
835; SSE41-NEXT: retq
836;
837; AVX1-LABEL: test_v8i32:
838; AVX1: # %bb.0:
839; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
840; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
841; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
842; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
843; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
844; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
845; AVX1-NEXT: vmovd %xmm0, %eax
846; AVX1-NEXT: vzeroupper
847; AVX1-NEXT: retq
848;
849; AVX2-LABEL: test_v8i32:
850; AVX2: # %bb.0:
851; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
852; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
853; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
854; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
855; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
856; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
857; AVX2-NEXT: vmovd %xmm0, %eax
858; AVX2-NEXT: vzeroupper
859; AVX2-NEXT: retq
860;
861; AVX512-LABEL: test_v8i32:
862; AVX512: # %bb.0:
863; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
864; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
865; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
866; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
867; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
868; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
869; AVX512-NEXT: vmovd %xmm0, %eax
870; AVX512-NEXT: vzeroupper
871; AVX512-NEXT: retq
872 %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> %a0)
873 ret i32 %1
874}
875
876define i32 @test_v16i32(<16 x i32> %a0) {
877; SSE2-LABEL: test_v16i32:
878; SSE2: # %bb.0:
879; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
880; SSE2-NEXT: movdqa %xmm1, %xmm5
881; SSE2-NEXT: pxor %xmm4, %xmm5
882; SSE2-NEXT: movdqa %xmm3, %xmm6
883; SSE2-NEXT: pxor %xmm4, %xmm6
884; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
885; SSE2-NEXT: pand %xmm6, %xmm1
886; SSE2-NEXT: pandn %xmm3, %xmm6
887; SSE2-NEXT: por %xmm1, %xmm6
888; SSE2-NEXT: movdqa %xmm0, %xmm1
889; SSE2-NEXT: pxor %xmm4, %xmm1
890; SSE2-NEXT: movdqa %xmm2, %xmm3
891; SSE2-NEXT: pxor %xmm4, %xmm3
892; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
893; SSE2-NEXT: pand %xmm3, %xmm0
894; SSE2-NEXT: pandn %xmm2, %xmm3
895; SSE2-NEXT: por %xmm0, %xmm3
896; SSE2-NEXT: movdqa %xmm3, %xmm0
897; SSE2-NEXT: pxor %xmm4, %xmm0
898; SSE2-NEXT: movdqa %xmm6, %xmm1
899; SSE2-NEXT: pxor %xmm4, %xmm1
900; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
901; SSE2-NEXT: pand %xmm1, %xmm3
902; SSE2-NEXT: pandn %xmm6, %xmm1
903; SSE2-NEXT: por %xmm3, %xmm1
904; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
905; SSE2-NEXT: movdqa %xmm1, %xmm2
906; SSE2-NEXT: pxor %xmm4, %xmm2
907; SSE2-NEXT: movdqa %xmm0, %xmm3
908; SSE2-NEXT: pxor %xmm4, %xmm3
909; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
910; SSE2-NEXT: pand %xmm3, %xmm1
911; SSE2-NEXT: pandn %xmm0, %xmm3
912; SSE2-NEXT: por %xmm1, %xmm3
913; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
914; SSE2-NEXT: movdqa %xmm3, %xmm1
915; SSE2-NEXT: pxor %xmm4, %xmm1
916; SSE2-NEXT: pxor %xmm0, %xmm4
917; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
918; SSE2-NEXT: pand %xmm4, %xmm3
919; SSE2-NEXT: pandn %xmm0, %xmm4
920; SSE2-NEXT: por %xmm3, %xmm4
921; SSE2-NEXT: movd %xmm4, %eax
922; SSE2-NEXT: retq
923;
924; SSE41-LABEL: test_v16i32:
925; SSE41: # %bb.0:
926; SSE41-NEXT: pminud %xmm3, %xmm1
927; SSE41-NEXT: pminud %xmm2, %xmm0
928; SSE41-NEXT: pminud %xmm1, %xmm0
929; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
930; SSE41-NEXT: pminud %xmm0, %xmm1
931; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
932; SSE41-NEXT: pminud %xmm1, %xmm0
933; SSE41-NEXT: movd %xmm0, %eax
934; SSE41-NEXT: retq
935;
936; AVX1-LABEL: test_v16i32:
937; AVX1: # %bb.0:
938; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
939; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
940; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
941; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
942; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
943; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
944; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
945; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
946; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
947; AVX1-NEXT: vmovd %xmm0, %eax
948; AVX1-NEXT: vzeroupper
949; AVX1-NEXT: retq
950;
951; AVX2-LABEL: test_v16i32:
952; AVX2: # %bb.0:
953; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
954; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
955; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
956; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
957; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
958; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
959; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
960; AVX2-NEXT: vmovd %xmm0, %eax
961; AVX2-NEXT: vzeroupper
962; AVX2-NEXT: retq
963;
964; AVX512-LABEL: test_v16i32:
965; AVX512: # %bb.0:
966; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
967; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
968; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
969; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
970; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
971; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
972; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
973; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
974; AVX512-NEXT: vmovd %xmm0, %eax
975; AVX512-NEXT: vzeroupper
976; AVX512-NEXT: retq
977 %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %a0)
978 ret i32 %1
979}
980
981define i32 @test_v32i32(<32 x i32> %a0) {
982; SSE2-LABEL: test_v32i32:
983; SSE2: # %bb.0:
984; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648,2147483648,2147483648]
985; SSE2-NEXT: movdqa %xmm2, %xmm10
986; SSE2-NEXT: pxor %xmm8, %xmm10
987; SSE2-NEXT: movdqa %xmm6, %xmm9
988; SSE2-NEXT: pxor %xmm8, %xmm9
989; SSE2-NEXT: pcmpgtd %xmm10, %xmm9
990; SSE2-NEXT: pand %xmm9, %xmm2
991; SSE2-NEXT: pandn %xmm6, %xmm9
992; SSE2-NEXT: por %xmm2, %xmm9
993; SSE2-NEXT: movdqa %xmm0, %xmm6
994; SSE2-NEXT: pxor %xmm8, %xmm6
995; SSE2-NEXT: movdqa %xmm4, %xmm2
996; SSE2-NEXT: pxor %xmm8, %xmm2
997; SSE2-NEXT: pcmpgtd %xmm6, %xmm2
998; SSE2-NEXT: pand %xmm2, %xmm0
999; SSE2-NEXT: pandn %xmm4, %xmm2
1000; SSE2-NEXT: por %xmm0, %xmm2
1001; SSE2-NEXT: movdqa %xmm3, %xmm0
1002; SSE2-NEXT: pxor %xmm8, %xmm0
1003; SSE2-NEXT: movdqa %xmm7, %xmm4
1004; SSE2-NEXT: pxor %xmm8, %xmm4
1005; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1006; SSE2-NEXT: pand %xmm4, %xmm3
1007; SSE2-NEXT: pandn %xmm7, %xmm4
1008; SSE2-NEXT: por %xmm3, %xmm4
1009; SSE2-NEXT: movdqa %xmm1, %xmm0
1010; SSE2-NEXT: pxor %xmm8, %xmm0
1011; SSE2-NEXT: movdqa %xmm5, %xmm3
1012; SSE2-NEXT: pxor %xmm8, %xmm3
1013; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1014; SSE2-NEXT: pand %xmm3, %xmm1
1015; SSE2-NEXT: pandn %xmm5, %xmm3
1016; SSE2-NEXT: por %xmm1, %xmm3
1017; SSE2-NEXT: movdqa %xmm3, %xmm0
1018; SSE2-NEXT: pxor %xmm8, %xmm0
1019; SSE2-NEXT: movdqa %xmm4, %xmm1
1020; SSE2-NEXT: pxor %xmm8, %xmm1
1021; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1022; SSE2-NEXT: pand %xmm1, %xmm3
1023; SSE2-NEXT: pandn %xmm4, %xmm1
1024; SSE2-NEXT: por %xmm3, %xmm1
1025; SSE2-NEXT: movdqa %xmm2, %xmm0
1026; SSE2-NEXT: pxor %xmm8, %xmm0
1027; SSE2-NEXT: movdqa %xmm9, %xmm3
1028; SSE2-NEXT: pxor %xmm8, %xmm3
1029; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1030; SSE2-NEXT: pand %xmm3, %xmm2
1031; SSE2-NEXT: pandn %xmm9, %xmm3
1032; SSE2-NEXT: por %xmm2, %xmm3
1033; SSE2-NEXT: movdqa %xmm3, %xmm0
1034; SSE2-NEXT: pxor %xmm8, %xmm0
1035; SSE2-NEXT: movdqa %xmm1, %xmm2
1036; SSE2-NEXT: pxor %xmm8, %xmm2
1037; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1038; SSE2-NEXT: pand %xmm2, %xmm3
1039; SSE2-NEXT: pandn %xmm1, %xmm2
1040; SSE2-NEXT: por %xmm3, %xmm2
1041; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1042; SSE2-NEXT: movdqa %xmm2, %xmm1
1043; SSE2-NEXT: pxor %xmm8, %xmm1
1044; SSE2-NEXT: movdqa %xmm0, %xmm3
1045; SSE2-NEXT: pxor %xmm8, %xmm3
1046; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1047; SSE2-NEXT: pand %xmm3, %xmm2
1048; SSE2-NEXT: pandn %xmm0, %xmm3
1049; SSE2-NEXT: por %xmm2, %xmm3
1050; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
1051; SSE2-NEXT: movdqa %xmm3, %xmm1
1052; SSE2-NEXT: pxor %xmm8, %xmm1
1053; SSE2-NEXT: pxor %xmm0, %xmm8
1054; SSE2-NEXT: pcmpgtd %xmm1, %xmm8
1055; SSE2-NEXT: pand %xmm8, %xmm3
1056; SSE2-NEXT: pandn %xmm0, %xmm8
1057; SSE2-NEXT: por %xmm3, %xmm8
1058; SSE2-NEXT: movd %xmm8, %eax
1059; SSE2-NEXT: retq
1060;
1061; SSE41-LABEL: test_v32i32:
1062; SSE41: # %bb.0:
1063; SSE41-NEXT: pminud %xmm6, %xmm2
1064; SSE41-NEXT: pminud %xmm4, %xmm0
1065; SSE41-NEXT: pminud %xmm2, %xmm0
1066; SSE41-NEXT: pminud %xmm7, %xmm3
1067; SSE41-NEXT: pminud %xmm5, %xmm1
1068; SSE41-NEXT: pminud %xmm3, %xmm1
1069; SSE41-NEXT: pminud %xmm0, %xmm1
1070; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1071; SSE41-NEXT: pminud %xmm1, %xmm0
1072; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1073; SSE41-NEXT: pminud %xmm0, %xmm1
1074; SSE41-NEXT: movd %xmm1, %eax
1075; SSE41-NEXT: retq
1076;
1077; AVX1-LABEL: test_v32i32:
1078; AVX1: # %bb.0:
1079; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm4
1080; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm5
1081; AVX1-NEXT: vpminud %xmm4, %xmm5, %xmm4
1082; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1083; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1084; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
1085; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1086; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1087; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
1088; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1089; AVX1-NEXT: vpminud %xmm0, %xmm4, %xmm0
1090; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1091; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1092; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1093; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1094; AVX1-NEXT: vmovd %xmm0, %eax
1095; AVX1-NEXT: vzeroupper
1096; AVX1-NEXT: retq
1097;
1098; AVX2-LABEL: test_v32i32:
1099; AVX2: # %bb.0:
1100; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1
1101; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
1102; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1103; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1104; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1105; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1106; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1107; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1108; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1109; AVX2-NEXT: vmovd %xmm0, %eax
1110; AVX2-NEXT: vzeroupper
1111; AVX2-NEXT: retq
1112;
1113; AVX512-LABEL: test_v32i32:
1114; AVX512: # %bb.0:
1115; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1116; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1117; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1118; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1119; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1120; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1121; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1122; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1123; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1124; AVX512-NEXT: vmovd %xmm0, %eax
1125; AVX512-NEXT: vzeroupper
1126; AVX512-NEXT: retq
1127 %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> %a0)
1128 ret i32 %1
1129}
1130
1131;
1132; vXi16
1133;
1134
1135define i16 @test_v8i16(<8 x i16> %a0) {
1136; SSE2-LABEL: test_v8i16:
1137; SSE2: # %bb.0:
1138; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1139; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
1140; SSE2-NEXT: pxor %xmm2, %xmm0
1141; SSE2-NEXT: pxor %xmm2, %xmm1
1142; SSE2-NEXT: pminsw %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001143; SSE2-NEXT: movdqa %xmm1, %xmm0
1144; SSE2-NEXT: pxor %xmm2, %xmm0
1145; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001146; SSE2-NEXT: pxor %xmm2, %xmm0
1147; SSE2-NEXT: pminsw %xmm1, %xmm0
Simon Pilgrim57d32402018-04-05 17:25:40 +00001148; SSE2-NEXT: movdqa %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001149; SSE2-NEXT: pxor %xmm2, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001150; SSE2-NEXT: psrld $16, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001151; SSE2-NEXT: pxor %xmm2, %xmm1
1152; SSE2-NEXT: pminsw %xmm0, %xmm1
1153; SSE2-NEXT: pxor %xmm2, %xmm1
1154; SSE2-NEXT: movd %xmm1, %eax
1155; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1156; SSE2-NEXT: retq
1157;
1158; SSE41-LABEL: test_v8i16:
1159; SSE41: # %bb.0:
1160; SSE41-NEXT: phminposuw %xmm0, %xmm0
1161; SSE41-NEXT: movd %xmm0, %eax
1162; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
1163; SSE41-NEXT: retq
1164;
1165; AVX-LABEL: test_v8i16:
1166; AVX: # %bb.0:
1167; AVX-NEXT: vphminposuw %xmm0, %xmm0
1168; AVX-NEXT: vmovd %xmm0, %eax
1169; AVX-NEXT: # kill: def $ax killed $ax killed $eax
1170; AVX-NEXT: retq
1171;
1172; AVX512-LABEL: test_v8i16:
1173; AVX512: # %bb.0:
1174; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1175; AVX512-NEXT: vmovd %xmm0, %eax
1176; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1177; AVX512-NEXT: retq
1178 %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %a0)
1179 ret i16 %1
1180}
1181
1182define i16 @test_v16i16(<16 x i16> %a0) {
1183; SSE2-LABEL: test_v16i16:
1184; SSE2: # %bb.0:
1185; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
1186; SSE2-NEXT: pxor %xmm2, %xmm1
1187; SSE2-NEXT: pxor %xmm2, %xmm0
1188; SSE2-NEXT: pminsw %xmm1, %xmm0
Craig Topper06aea172018-10-15 01:51:58 +00001189; SSE2-NEXT: movdqa %xmm0, %xmm1
1190; SSE2-NEXT: pxor %xmm2, %xmm1
1191; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001192; SSE2-NEXT: pxor %xmm2, %xmm1
1193; SSE2-NEXT: pminsw %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001194; SSE2-NEXT: movdqa %xmm1, %xmm0
1195; SSE2-NEXT: pxor %xmm2, %xmm0
1196; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001197; SSE2-NEXT: pxor %xmm2, %xmm0
1198; SSE2-NEXT: pminsw %xmm1, %xmm0
Simon Pilgrim57d32402018-04-05 17:25:40 +00001199; SSE2-NEXT: movdqa %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001200; SSE2-NEXT: pxor %xmm2, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001201; SSE2-NEXT: psrld $16, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001202; SSE2-NEXT: pxor %xmm2, %xmm1
1203; SSE2-NEXT: pminsw %xmm0, %xmm1
1204; SSE2-NEXT: pxor %xmm2, %xmm1
1205; SSE2-NEXT: movd %xmm1, %eax
1206; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1207; SSE2-NEXT: retq
1208;
1209; SSE41-LABEL: test_v16i16:
1210; SSE41: # %bb.0:
1211; SSE41-NEXT: pminuw %xmm1, %xmm0
1212; SSE41-NEXT: phminposuw %xmm0, %xmm0
1213; SSE41-NEXT: movd %xmm0, %eax
1214; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
1215; SSE41-NEXT: retq
1216;
1217; AVX1-LABEL: test_v16i16:
1218; AVX1: # %bb.0:
1219; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1220; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1221; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1222; AVX1-NEXT: vmovd %xmm0, %eax
1223; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
1224; AVX1-NEXT: vzeroupper
1225; AVX1-NEXT: retq
1226;
1227; AVX2-LABEL: test_v16i16:
1228; AVX2: # %bb.0:
1229; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1230; AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1231; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1232; AVX2-NEXT: vmovd %xmm0, %eax
1233; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
1234; AVX2-NEXT: vzeroupper
1235; AVX2-NEXT: retq
1236;
1237; AVX512-LABEL: test_v16i16:
1238; AVX512: # %bb.0:
1239; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1240; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1241; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1242; AVX512-NEXT: vmovd %xmm0, %eax
1243; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1244; AVX512-NEXT: vzeroupper
1245; AVX512-NEXT: retq
1246 %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %a0)
1247 ret i16 %1
1248}
1249
1250define i16 @test_v32i16(<32 x i16> %a0) {
1251; SSE2-LABEL: test_v32i16:
1252; SSE2: # %bb.0:
1253; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001254; SSE2-NEXT: pxor %xmm4, %xmm3
1255; SSE2-NEXT: pxor %xmm4, %xmm1
1256; SSE2-NEXT: pminsw %xmm3, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001257; SSE2-NEXT: pxor %xmm4, %xmm2
1258; SSE2-NEXT: pxor %xmm4, %xmm0
1259; SSE2-NEXT: pminsw %xmm2, %xmm0
Craig Topper06aea172018-10-15 01:51:58 +00001260; SSE2-NEXT: pminsw %xmm1, %xmm0
1261; SSE2-NEXT: movdqa %xmm0, %xmm1
1262; SSE2-NEXT: pxor %xmm4, %xmm1
1263; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1264; SSE2-NEXT: pxor %xmm4, %xmm1
1265; SSE2-NEXT: pminsw %xmm0, %xmm1
1266; SSE2-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim57d32402018-04-05 17:25:40 +00001267; SSE2-NEXT: pxor %xmm4, %xmm0
Craig Topper06aea172018-10-15 01:51:58 +00001268; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001269; SSE2-NEXT: pxor %xmm4, %xmm0
Craig Topper06aea172018-10-15 01:51:58 +00001270; SSE2-NEXT: pminsw %xmm1, %xmm0
1271; SSE2-NEXT: movdqa %xmm0, %xmm1
1272; SSE2-NEXT: pxor %xmm4, %xmm1
1273; SSE2-NEXT: psrld $16, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001274; SSE2-NEXT: pxor %xmm4, %xmm1
1275; SSE2-NEXT: pminsw %xmm0, %xmm1
1276; SSE2-NEXT: pxor %xmm4, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001277; SSE2-NEXT: movd %xmm1, %eax
Simon Pilgrim57d32402018-04-05 17:25:40 +00001278; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1279; SSE2-NEXT: retq
1280;
1281; SSE41-LABEL: test_v32i16:
1282; SSE41: # %bb.0:
1283; SSE41-NEXT: pminuw %xmm3, %xmm1
1284; SSE41-NEXT: pminuw %xmm2, %xmm0
1285; SSE41-NEXT: pminuw %xmm1, %xmm0
1286; SSE41-NEXT: phminposuw %xmm0, %xmm0
1287; SSE41-NEXT: movd %xmm0, %eax
1288; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
1289; SSE41-NEXT: retq
1290;
1291; AVX1-LABEL: test_v32i16:
1292; AVX1: # %bb.0:
1293; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1294; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1295; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1296; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1297; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
1298; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1299; AVX1-NEXT: vmovd %xmm0, %eax
1300; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
1301; AVX1-NEXT: vzeroupper
1302; AVX1-NEXT: retq
1303;
1304; AVX2-LABEL: test_v32i16:
1305; AVX2: # %bb.0:
1306; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1307; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1308; AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1309; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1310; AVX2-NEXT: vmovd %xmm0, %eax
1311; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
1312; AVX2-NEXT: vzeroupper
1313; AVX2-NEXT: retq
1314;
1315; AVX512-LABEL: test_v32i16:
1316; AVX512: # %bb.0:
1317; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1318; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1319; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1320; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1321; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1322; AVX512-NEXT: vmovd %xmm0, %eax
1323; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1324; AVX512-NEXT: vzeroupper
1325; AVX512-NEXT: retq
1326 %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> %a0)
1327 ret i16 %1
1328}
1329
1330define i16 @test_v64i16(<64 x i16> %a0) {
1331; SSE2-LABEL: test_v64i16:
1332; SSE2: # %bb.0:
1333; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32768,32768,32768,32768,32768,32768,32768,32768]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001334; SSE2-NEXT: pxor %xmm8, %xmm6
1335; SSE2-NEXT: pxor %xmm8, %xmm2
1336; SSE2-NEXT: pminsw %xmm6, %xmm2
Simon Pilgrim57d32402018-04-05 17:25:40 +00001337; SSE2-NEXT: pxor %xmm8, %xmm4
Craig Topper06aea172018-10-15 01:51:58 +00001338; SSE2-NEXT: pxor %xmm8, %xmm0
1339; SSE2-NEXT: pminsw %xmm4, %xmm0
Simon Pilgrim57d32402018-04-05 17:25:40 +00001340; SSE2-NEXT: pminsw %xmm2, %xmm0
Craig Topper06aea172018-10-15 01:51:58 +00001341; SSE2-NEXT: pxor %xmm8, %xmm7
1342; SSE2-NEXT: pxor %xmm8, %xmm3
1343; SSE2-NEXT: pminsw %xmm7, %xmm3
1344; SSE2-NEXT: pxor %xmm8, %xmm5
1345; SSE2-NEXT: pxor %xmm8, %xmm1
1346; SSE2-NEXT: pminsw %xmm5, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001347; SSE2-NEXT: pminsw %xmm3, %xmm1
Simon Pilgrim57d32402018-04-05 17:25:40 +00001348; SSE2-NEXT: pminsw %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001349; SSE2-NEXT: movdqa %xmm1, %xmm0
1350; SSE2-NEXT: pxor %xmm8, %xmm0
1351; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001352; SSE2-NEXT: pxor %xmm8, %xmm0
1353; SSE2-NEXT: pminsw %xmm1, %xmm0
Simon Pilgrim57d32402018-04-05 17:25:40 +00001354; SSE2-NEXT: movdqa %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001355; SSE2-NEXT: pxor %xmm8, %xmm1
1356; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
Simon Pilgrim57d32402018-04-05 17:25:40 +00001357; SSE2-NEXT: pxor %xmm8, %xmm1
1358; SSE2-NEXT: pminsw %xmm0, %xmm1
Craig Topper06aea172018-10-15 01:51:58 +00001359; SSE2-NEXT: movdqa %xmm1, %xmm0
1360; SSE2-NEXT: pxor %xmm8, %xmm0
1361; SSE2-NEXT: psrld $16, %xmm0
1362; SSE2-NEXT: pxor %xmm8, %xmm0
1363; SSE2-NEXT: pminsw %xmm1, %xmm0
1364; SSE2-NEXT: pxor %xmm8, %xmm0
1365; SSE2-NEXT: movd %xmm0, %eax
Simon Pilgrim57d32402018-04-05 17:25:40 +00001366; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
1367; SSE2-NEXT: retq
1368;
1369; SSE41-LABEL: test_v64i16:
1370; SSE41: # %bb.0:
1371; SSE41-NEXT: pminuw %xmm7, %xmm3
1372; SSE41-NEXT: pminuw %xmm5, %xmm1
1373; SSE41-NEXT: pminuw %xmm3, %xmm1
1374; SSE41-NEXT: pminuw %xmm6, %xmm2
1375; SSE41-NEXT: pminuw %xmm4, %xmm0
1376; SSE41-NEXT: pminuw %xmm2, %xmm0
1377; SSE41-NEXT: pminuw %xmm1, %xmm0
1378; SSE41-NEXT: phminposuw %xmm0, %xmm0
1379; SSE41-NEXT: movd %xmm0, %eax
1380; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
1381; SSE41-NEXT: retq
1382;
1383; AVX1-LABEL: test_v64i16:
1384; AVX1: # %bb.0:
1385; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
1386; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
1387; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
1388; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
1389; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
1390; AVX1-NEXT: vpminuw %xmm5, %xmm6, %xmm5
1391; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
1392; AVX1-NEXT: vpminuw %xmm3, %xmm1, %xmm1
1393; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
1394; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1395; AVX1-NEXT: vpminuw %xmm4, %xmm0, %xmm0
1396; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1397; AVX1-NEXT: vmovd %xmm0, %eax
1398; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
1399; AVX1-NEXT: vzeroupper
1400; AVX1-NEXT: retq
1401;
1402; AVX2-LABEL: test_v64i16:
1403; AVX2: # %bb.0:
1404; AVX2-NEXT: vpminuw %ymm3, %ymm1, %ymm1
1405; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
1406; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1407; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1408; AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1409; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1410; AVX2-NEXT: vmovd %xmm0, %eax
1411; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
1412; AVX2-NEXT: vzeroupper
1413; AVX2-NEXT: retq
1414;
1415; AVX512-LABEL: test_v64i16:
1416; AVX512: # %bb.0:
1417; AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
1418; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1419; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1420; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1421; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1422; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1423; AVX512-NEXT: vmovd %xmm0, %eax
1424; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
1425; AVX512-NEXT: vzeroupper
1426; AVX512-NEXT: retq
1427 %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> %a0)
1428 ret i16 %1
1429}
1430
1431;
1432; vXi8
1433;
1434
1435define i8 @test_v16i8(<16 x i8> %a0) {
1436; SSE2-LABEL: test_v16i8:
1437; SSE2: # %bb.0:
1438; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1439; SSE2-NEXT: pminub %xmm0, %xmm1
1440; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1441; SSE2-NEXT: pminub %xmm1, %xmm0
1442; SSE2-NEXT: movdqa %xmm0, %xmm1
1443; SSE2-NEXT: psrld $16, %xmm1
1444; SSE2-NEXT: pminub %xmm0, %xmm1
1445; SSE2-NEXT: movdqa %xmm1, %xmm0
1446; SSE2-NEXT: psrlw $8, %xmm0
1447; SSE2-NEXT: pminub %xmm1, %xmm0
1448; SSE2-NEXT: movd %xmm0, %eax
1449; SSE2-NEXT: # kill: def $al killed $al killed $eax
1450; SSE2-NEXT: retq
1451;
1452; SSE41-LABEL: test_v16i8:
1453; SSE41: # %bb.0:
1454; SSE41-NEXT: movdqa %xmm0, %xmm1
1455; SSE41-NEXT: psrlw $8, %xmm1
1456; SSE41-NEXT: pminub %xmm0, %xmm1
1457; SSE41-NEXT: phminposuw %xmm1, %xmm0
1458; SSE41-NEXT: pextrb $0, %xmm0, %eax
1459; SSE41-NEXT: # kill: def $al killed $al killed $eax
1460; SSE41-NEXT: retq
1461;
1462; AVX-LABEL: test_v16i8:
1463; AVX: # %bb.0:
1464; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
1465; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1466; AVX-NEXT: vphminposuw %xmm0, %xmm0
1467; AVX-NEXT: vpextrb $0, %xmm0, %eax
1468; AVX-NEXT: # kill: def $al killed $al killed $eax
1469; AVX-NEXT: retq
1470;
1471; AVX512-LABEL: test_v16i8:
1472; AVX512: # %bb.0:
1473; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1474; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1475; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1476; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1477; AVX512-NEXT: # kill: def $al killed $al killed $eax
1478; AVX512-NEXT: retq
1479 %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %a0)
1480 ret i8 %1
1481}
1482
1483define i8 @test_v32i8(<32 x i8> %a0) {
1484; SSE2-LABEL: test_v32i8:
1485; SSE2: # %bb.0:
1486; SSE2-NEXT: pminub %xmm1, %xmm0
1487; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1488; SSE2-NEXT: pminub %xmm0, %xmm1
1489; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1490; SSE2-NEXT: pminub %xmm1, %xmm0
1491; SSE2-NEXT: movdqa %xmm0, %xmm1
1492; SSE2-NEXT: psrld $16, %xmm1
1493; SSE2-NEXT: pminub %xmm0, %xmm1
1494; SSE2-NEXT: movdqa %xmm1, %xmm0
1495; SSE2-NEXT: psrlw $8, %xmm0
1496; SSE2-NEXT: pminub %xmm1, %xmm0
1497; SSE2-NEXT: movd %xmm0, %eax
1498; SSE2-NEXT: # kill: def $al killed $al killed $eax
1499; SSE2-NEXT: retq
1500;
1501; SSE41-LABEL: test_v32i8:
1502; SSE41: # %bb.0:
1503; SSE41-NEXT: pminub %xmm1, %xmm0
1504; SSE41-NEXT: movdqa %xmm0, %xmm1
1505; SSE41-NEXT: psrlw $8, %xmm1
1506; SSE41-NEXT: pminub %xmm0, %xmm1
1507; SSE41-NEXT: phminposuw %xmm1, %xmm0
1508; SSE41-NEXT: pextrb $0, %xmm0, %eax
1509; SSE41-NEXT: # kill: def $al killed $al killed $eax
1510; SSE41-NEXT: retq
1511;
1512; AVX1-LABEL: test_v32i8:
1513; AVX1: # %bb.0:
1514; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1515; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1516; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1517; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1518; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1519; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1520; AVX1-NEXT: # kill: def $al killed $al killed $eax
1521; AVX1-NEXT: vzeroupper
1522; AVX1-NEXT: retq
1523;
1524; AVX2-LABEL: test_v32i8:
1525; AVX2: # %bb.0:
1526; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1527; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1528; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1529; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1530; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1531; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1532; AVX2-NEXT: # kill: def $al killed $al killed $eax
1533; AVX2-NEXT: vzeroupper
1534; AVX2-NEXT: retq
1535;
1536; AVX512-LABEL: test_v32i8:
1537; AVX512: # %bb.0:
1538; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1539; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1540; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1541; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1542; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1543; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1544; AVX512-NEXT: # kill: def $al killed $al killed $eax
1545; AVX512-NEXT: vzeroupper
1546; AVX512-NEXT: retq
1547 %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> %a0)
1548 ret i8 %1
1549}
1550
1551define i8 @test_v64i8(<64 x i8> %a0) {
1552; SSE2-LABEL: test_v64i8:
1553; SSE2: # %bb.0:
1554; SSE2-NEXT: pminub %xmm3, %xmm1
1555; SSE2-NEXT: pminub %xmm2, %xmm0
1556; SSE2-NEXT: pminub %xmm1, %xmm0
1557; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1558; SSE2-NEXT: pminub %xmm0, %xmm1
1559; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1560; SSE2-NEXT: pminub %xmm1, %xmm0
1561; SSE2-NEXT: movdqa %xmm0, %xmm1
1562; SSE2-NEXT: psrld $16, %xmm1
1563; SSE2-NEXT: pminub %xmm0, %xmm1
1564; SSE2-NEXT: movdqa %xmm1, %xmm0
1565; SSE2-NEXT: psrlw $8, %xmm0
1566; SSE2-NEXT: pminub %xmm1, %xmm0
1567; SSE2-NEXT: movd %xmm0, %eax
1568; SSE2-NEXT: # kill: def $al killed $al killed $eax
1569; SSE2-NEXT: retq
1570;
1571; SSE41-LABEL: test_v64i8:
1572; SSE41: # %bb.0:
1573; SSE41-NEXT: pminub %xmm3, %xmm1
1574; SSE41-NEXT: pminub %xmm2, %xmm0
1575; SSE41-NEXT: pminub %xmm1, %xmm0
1576; SSE41-NEXT: movdqa %xmm0, %xmm1
1577; SSE41-NEXT: psrlw $8, %xmm1
1578; SSE41-NEXT: pminub %xmm0, %xmm1
1579; SSE41-NEXT: phminposuw %xmm1, %xmm0
1580; SSE41-NEXT: pextrb $0, %xmm0, %eax
1581; SSE41-NEXT: # kill: def $al killed $al killed $eax
1582; SSE41-NEXT: retq
1583;
1584; AVX1-LABEL: test_v64i8:
1585; AVX1: # %bb.0:
1586; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1587; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1588; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1589; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1590; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1591; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1592; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1593; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1594; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1595; AVX1-NEXT: # kill: def $al killed $al killed $eax
1596; AVX1-NEXT: vzeroupper
1597; AVX1-NEXT: retq
1598;
1599; AVX2-LABEL: test_v64i8:
1600; AVX2: # %bb.0:
1601; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1602; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1603; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1604; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1605; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1606; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1607; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1608; AVX2-NEXT: # kill: def $al killed $al killed $eax
1609; AVX2-NEXT: vzeroupper
1610; AVX2-NEXT: retq
1611;
1612; AVX512-LABEL: test_v64i8:
1613; AVX512: # %bb.0:
1614; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1615; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1616; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1617; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1618; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1619; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1620; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1621; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1622; AVX512-NEXT: # kill: def $al killed $al killed $eax
1623; AVX512-NEXT: vzeroupper
1624; AVX512-NEXT: retq
1625 %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> %a0)
1626 ret i8 %1
1627}
1628
1629define i8 @test_v128i8(<128 x i8> %a0) {
1630; SSE2-LABEL: test_v128i8:
1631; SSE2: # %bb.0:
1632; SSE2-NEXT: pminub %xmm6, %xmm2
1633; SSE2-NEXT: pminub %xmm4, %xmm0
1634; SSE2-NEXT: pminub %xmm2, %xmm0
1635; SSE2-NEXT: pminub %xmm7, %xmm3
1636; SSE2-NEXT: pminub %xmm5, %xmm1
1637; SSE2-NEXT: pminub %xmm3, %xmm1
1638; SSE2-NEXT: pminub %xmm0, %xmm1
1639; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1640; SSE2-NEXT: pminub %xmm1, %xmm0
1641; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1642; SSE2-NEXT: pminub %xmm0, %xmm1
1643; SSE2-NEXT: movdqa %xmm1, %xmm0
1644; SSE2-NEXT: psrld $16, %xmm0
1645; SSE2-NEXT: pminub %xmm1, %xmm0
1646; SSE2-NEXT: movdqa %xmm0, %xmm1
1647; SSE2-NEXT: psrlw $8, %xmm1
1648; SSE2-NEXT: pminub %xmm0, %xmm1
1649; SSE2-NEXT: movd %xmm1, %eax
1650; SSE2-NEXT: # kill: def $al killed $al killed $eax
1651; SSE2-NEXT: retq
1652;
1653; SSE41-LABEL: test_v128i8:
1654; SSE41: # %bb.0:
1655; SSE41-NEXT: pminub %xmm7, %xmm3
1656; SSE41-NEXT: pminub %xmm5, %xmm1
1657; SSE41-NEXT: pminub %xmm3, %xmm1
1658; SSE41-NEXT: pminub %xmm6, %xmm2
1659; SSE41-NEXT: pminub %xmm4, %xmm0
1660; SSE41-NEXT: pminub %xmm2, %xmm0
1661; SSE41-NEXT: pminub %xmm1, %xmm0
1662; SSE41-NEXT: movdqa %xmm0, %xmm1
1663; SSE41-NEXT: psrlw $8, %xmm1
1664; SSE41-NEXT: pminub %xmm0, %xmm1
1665; SSE41-NEXT: phminposuw %xmm1, %xmm0
1666; SSE41-NEXT: pextrb $0, %xmm0, %eax
1667; SSE41-NEXT: # kill: def $al killed $al killed $eax
1668; SSE41-NEXT: retq
1669;
1670; AVX1-LABEL: test_v128i8:
1671; AVX1: # %bb.0:
1672; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
1673; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
1674; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
1675; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
1676; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
1677; AVX1-NEXT: vpminub %xmm5, %xmm6, %xmm5
1678; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
1679; AVX1-NEXT: vpminub %xmm3, %xmm1, %xmm1
1680; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1681; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1682; AVX1-NEXT: vpminub %xmm4, %xmm0, %xmm0
1683; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1684; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1685; AVX1-NEXT: vphminposuw %xmm0, %xmm0
1686; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1687; AVX1-NEXT: # kill: def $al killed $al killed $eax
1688; AVX1-NEXT: vzeroupper
1689; AVX1-NEXT: retq
1690;
1691; AVX2-LABEL: test_v128i8:
1692; AVX2: # %bb.0:
1693; AVX2-NEXT: vpminub %ymm3, %ymm1, %ymm1
1694; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
1695; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1696; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1697; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1698; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1699; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1700; AVX2-NEXT: vphminposuw %xmm0, %xmm0
1701; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1702; AVX2-NEXT: # kill: def $al killed $al killed $eax
1703; AVX2-NEXT: vzeroupper
1704; AVX2-NEXT: retq
1705;
1706; AVX512-LABEL: test_v128i8:
1707; AVX512: # %bb.0:
1708; AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
1709; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1710; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1711; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1712; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1713; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1714; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1715; AVX512-NEXT: vphminposuw %xmm0, %xmm0
1716; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1717; AVX512-NEXT: # kill: def $al killed $al killed $eax
1718; AVX512-NEXT: vzeroupper
1719; AVX512-NEXT: retq
1720 %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> %a0)
1721 ret i8 %1
1722}
1723
1724declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
1725declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>)
1726declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>)
1727declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>)
1728
1729declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
1730declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>)
1731declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
1732declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>)
1733
1734declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
1735declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
1736declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>)
1737declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>)
1738
1739declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
1740declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>)
1741declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>)
1742declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>)