blob: 9728379eb9692a51040f018d9fd12e64a9e11045 [file] [log] [blame]
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000019; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT: pxor %xmm2, %xmm3
23; X86-SSE2-NEXT: pxor %xmm1, %xmm2
24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT: pand %xmm5, %xmm2
30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT: por %xmm2, %xmm3
32; X86-SSE2-NEXT: pand %xmm3, %xmm0
33; X86-SSE2-NEXT: pandn %xmm1, %xmm3
34; X86-SSE2-NEXT: por %xmm0, %xmm3
35; X86-SSE2-NEXT: movd %xmm3, %eax
36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
37; X86-SSE2-NEXT: movd %xmm0, %edx
38; X86-SSE2-NEXT: retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000041; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000042; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
Geoff Berrya2b90112018-02-27 16:59:10 +000043; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
Simon Pilgrim879c5b12017-11-05 19:48:24 +000044; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
45; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
46; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
47; X86-SSE42-NEXT: movd %xmm2, %eax
48; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
49; X86-SSE42-NEXT: retl
50;
51; X86-AVX-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000052; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000053; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
54; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
55; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
56; X86-AVX-NEXT: vmovd %xmm0, %eax
57; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
58; X86-AVX-NEXT: retl
59;
60; X64-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000061; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000062; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
63; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
64; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
65; X64-SSE2-NEXT: pxor %xmm2, %xmm3
66; X64-SSE2-NEXT: pxor %xmm1, %xmm2
67; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
68; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
69; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
70; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
71; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
72; X64-SSE2-NEXT: pand %xmm5, %xmm2
73; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
74; X64-SSE2-NEXT: por %xmm2, %xmm3
75; X64-SSE2-NEXT: pand %xmm3, %xmm0
76; X64-SSE2-NEXT: pandn %xmm1, %xmm3
77; X64-SSE2-NEXT: por %xmm0, %xmm3
78; X64-SSE2-NEXT: movq %xmm3, %rax
79; X64-SSE2-NEXT: retq
80;
81; X64-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000083; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
Geoff Berrya2b90112018-02-27 16:59:10 +000084; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
Simon Pilgrim879c5b12017-11-05 19:48:24 +000085; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
86; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
87; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
88; X64-SSE42-NEXT: movq %xmm2, %rax
89; X64-SSE42-NEXT: retq
90;
91; X64-AVX1-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000093; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
94; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
95; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
96; X64-AVX1-NEXT: vmovq %xmm0, %rax
97; X64-AVX1-NEXT: retq
98;
99; X64-AVX2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000101; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
102; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
103; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
104; X64-AVX2-NEXT: vmovq %xmm0, %rax
105; X64-AVX2-NEXT: retq
106;
107; X64-AVX512-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000109; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
110; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
111; X64-AVX512-NEXT: vmovq %xmm0, %rax
112; X64-AVX512-NEXT: retq
113 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
114 %2 = icmp slt <2 x i64> %a0, %1
115 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
116 %4 = extractelement <2 x i64> %3, i32 0
117 ret i64 %4
118}
119
120define i32 @test_reduce_v4i32(<4 x i32> %a0) {
121; X86-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000122; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000123; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
124; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
125; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
126; X86-SSE2-NEXT: pand %xmm2, %xmm0
127; X86-SSE2-NEXT: pandn %xmm1, %xmm2
128; X86-SSE2-NEXT: por %xmm0, %xmm2
129; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
130; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
131; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
132; X86-SSE2-NEXT: pand %xmm1, %xmm2
133; X86-SSE2-NEXT: pandn %xmm0, %xmm1
134; X86-SSE2-NEXT: por %xmm2, %xmm1
135; X86-SSE2-NEXT: movd %xmm1, %eax
136; X86-SSE2-NEXT: retl
137;
138; X86-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000139; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000140; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
141; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
142; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
143; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
144; X86-SSE42-NEXT: movd %xmm0, %eax
145; X86-SSE42-NEXT: retl
146;
147; X86-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000148; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000149; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
150; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
151; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
152; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
153; X86-AVX-NEXT: vmovd %xmm0, %eax
154; X86-AVX-NEXT: retl
155;
156; X64-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000157; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000158; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
159; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
160; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
161; X64-SSE2-NEXT: pand %xmm2, %xmm0
162; X64-SSE2-NEXT: pandn %xmm1, %xmm2
163; X64-SSE2-NEXT: por %xmm0, %xmm2
164; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
165; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
166; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
167; X64-SSE2-NEXT: pand %xmm1, %xmm2
168; X64-SSE2-NEXT: pandn %xmm0, %xmm1
169; X64-SSE2-NEXT: por %xmm2, %xmm1
170; X64-SSE2-NEXT: movd %xmm1, %eax
171; X64-SSE2-NEXT: retq
172;
173; X64-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000174; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000175; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
176; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
177; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
178; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
179; X64-SSE42-NEXT: movd %xmm0, %eax
180; X64-SSE42-NEXT: retq
181;
182; X64-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000183; X64-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000184; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
185; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
186; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
187; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
188; X64-AVX-NEXT: vmovd %xmm0, %eax
189; X64-AVX-NEXT: retq
190 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
191 %2 = icmp slt <4 x i32> %a0, %1
192 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
193 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
194 %5 = icmp slt <4 x i32> %3, %4
195 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
196 %7 = extractelement <4 x i32> %6, i32 0
197 ret i32 %7
198}
199
200define i16 @test_reduce_v8i16(<8 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000201; X86-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000202; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000203; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
204; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
205; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
206; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
207; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
208; X86-SSE2-NEXT: psrld $16, %xmm1
209; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
210; X86-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000211; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000212; X86-SSE2-NEXT: retl
213;
214; X86-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000215; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000216; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
217; X86-SSE42-NEXT: pxor %xmm1, %xmm0
218; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
219; X86-SSE42-NEXT: pxor %xmm1, %xmm0
220; X86-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000221; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000222; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000223;
224; X86-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000225; X86-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000226; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
227; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
228; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
229; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000230; X86-AVX-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000231; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000232; X86-AVX-NEXT: retl
233;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000234; X64-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000235; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000236; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
237; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
238; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
239; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
240; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
241; X64-SSE2-NEXT: psrld $16, %xmm1
242; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
243; X64-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000244; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000245; X64-SSE2-NEXT: retq
246;
247; X64-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000248; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000249; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
250; X64-SSE42-NEXT: pxor %xmm1, %xmm0
251; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
252; X64-SSE42-NEXT: pxor %xmm1, %xmm0
253; X64-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000254; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000255; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000256;
257; X64-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000258; X64-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000259; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
260; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
261; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
262; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000263; X64-AVX-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000264; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000265; X64-AVX-NEXT: retq
266 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
267 %2 = icmp slt <8 x i16> %a0, %1
268 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
269 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
270 %5 = icmp slt <8 x i16> %3, %4
271 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
272 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
273 %8 = icmp slt <8 x i16> %6, %7
274 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
275 %10 = extractelement <8 x i16> %9, i32 0
276 ret i16 %10
277}
278
279define i8 @test_reduce_v16i8(<16 x i8> %a0) {
280; X86-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000281; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000282; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
283; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
284; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
285; X86-SSE2-NEXT: pand %xmm2, %xmm0
286; X86-SSE2-NEXT: pandn %xmm1, %xmm2
287; X86-SSE2-NEXT: por %xmm0, %xmm2
288; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
289; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
290; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
291; X86-SSE2-NEXT: pand %xmm1, %xmm2
292; X86-SSE2-NEXT: pandn %xmm0, %xmm1
293; X86-SSE2-NEXT: por %xmm2, %xmm1
294; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
295; X86-SSE2-NEXT: psrld $16, %xmm0
296; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
297; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
298; X86-SSE2-NEXT: pand %xmm2, %xmm1
299; X86-SSE2-NEXT: pandn %xmm0, %xmm2
300; X86-SSE2-NEXT: por %xmm1, %xmm2
301; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
302; X86-SSE2-NEXT: psrlw $8, %xmm0
303; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
304; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
305; X86-SSE2-NEXT: pand %xmm1, %xmm2
306; X86-SSE2-NEXT: pandn %xmm0, %xmm1
307; X86-SSE2-NEXT: por %xmm2, %xmm1
308; X86-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000309; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000310; X86-SSE2-NEXT: retl
311;
312; X86-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; X86-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000314; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
315; X86-SSE42-NEXT: pxor %xmm1, %xmm0
316; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
317; X86-SSE42-NEXT: psrlw $8, %xmm2
318; X86-SSE42-NEXT: pminub %xmm0, %xmm2
319; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
320; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000321; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000322; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000323; X86-SSE42-NEXT: retl
324;
325; X86-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000326; X86-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000327; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
328; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
329; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
330; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
331; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
332; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000333; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000334; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000335; X86-AVX-NEXT: retl
336;
337; X64-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000338; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000339; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
340; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
341; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
342; X64-SSE2-NEXT: pand %xmm2, %xmm0
343; X64-SSE2-NEXT: pandn %xmm1, %xmm2
344; X64-SSE2-NEXT: por %xmm0, %xmm2
345; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
346; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
347; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
348; X64-SSE2-NEXT: pand %xmm1, %xmm2
349; X64-SSE2-NEXT: pandn %xmm0, %xmm1
350; X64-SSE2-NEXT: por %xmm2, %xmm1
351; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
352; X64-SSE2-NEXT: psrld $16, %xmm0
353; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
354; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
355; X64-SSE2-NEXT: pand %xmm2, %xmm1
356; X64-SSE2-NEXT: pandn %xmm0, %xmm2
357; X64-SSE2-NEXT: por %xmm1, %xmm2
358; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
359; X64-SSE2-NEXT: psrlw $8, %xmm0
360; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
361; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
362; X64-SSE2-NEXT: pand %xmm1, %xmm2
363; X64-SSE2-NEXT: pandn %xmm0, %xmm1
364; X64-SSE2-NEXT: por %xmm2, %xmm1
365; X64-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000366; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000367; X64-SSE2-NEXT: retq
368;
369; X64-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000370; X64-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000371; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
372; X64-SSE42-NEXT: pxor %xmm1, %xmm0
373; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
374; X64-SSE42-NEXT: psrlw $8, %xmm2
375; X64-SSE42-NEXT: pminub %xmm0, %xmm2
376; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
377; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000378; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000379; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000380; X64-SSE42-NEXT: retq
381;
382; X64-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; X64-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000384; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
385; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
386; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
387; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
388; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
389; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000390; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000391; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000392; X64-AVX-NEXT: retq
393 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
394 %2 = icmp slt <16 x i8> %a0, %1
395 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
396 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
397 %5 = icmp slt <16 x i8> %3, %4
398 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
399 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
400 %8 = icmp slt <16 x i8> %6, %7
401 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
402 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
403 %11 = icmp slt <16 x i8> %9, %10
404 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
405 %13 = extractelement <16 x i8> %12, i32 0
406 ret i8 %13
407}
408
409;
410; 256-bit Vectors
411;
412
413define i64 @test_reduce_v4i64(<4 x i64> %a0) {
414; X86-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000415; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000416; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
417; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
418; X86-SSE2-NEXT: pxor %xmm2, %xmm3
419; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
420; X86-SSE2-NEXT: pxor %xmm2, %xmm4
421; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
422; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
423; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
424; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
425; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
426; X86-SSE2-NEXT: pand %xmm6, %xmm3
427; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
428; X86-SSE2-NEXT: por %xmm3, %xmm4
429; X86-SSE2-NEXT: pand %xmm4, %xmm0
430; X86-SSE2-NEXT: pandn %xmm1, %xmm4
431; X86-SSE2-NEXT: por %xmm0, %xmm4
432; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
433; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
434; X86-SSE2-NEXT: pxor %xmm2, %xmm1
435; X86-SSE2-NEXT: pxor %xmm0, %xmm2
436; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
437; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
438; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
439; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
440; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
441; X86-SSE2-NEXT: pand %xmm5, %xmm1
442; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
443; X86-SSE2-NEXT: por %xmm1, %xmm2
444; X86-SSE2-NEXT: pand %xmm2, %xmm4
445; X86-SSE2-NEXT: pandn %xmm0, %xmm2
446; X86-SSE2-NEXT: por %xmm4, %xmm2
447; X86-SSE2-NEXT: movd %xmm2, %eax
448; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
449; X86-SSE2-NEXT: movd %xmm0, %edx
450; X86-SSE2-NEXT: retl
451;
452; X86-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000453; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000454; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
455; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
456; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
457; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
458; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
459; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
460; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
461; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
462; X86-SSE42-NEXT: movd %xmm2, %eax
463; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
464; X86-SSE42-NEXT: retl
465;
466; X86-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000467; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000468; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
469; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
470; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
471; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
472; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
473; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
474; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
475; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
476; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
477; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
478; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
479; X86-AVX1-NEXT: vmovd %xmm0, %eax
480; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
481; X86-AVX1-NEXT: vzeroupper
482; X86-AVX1-NEXT: retl
483;
484; X86-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000485; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000486; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
487; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
488; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
489; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
490; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
491; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
492; X86-AVX2-NEXT: vmovd %xmm0, %eax
493; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
494; X86-AVX2-NEXT: vzeroupper
495; X86-AVX2-NEXT: retl
496;
497; X64-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000498; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000499; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
500; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
501; X64-SSE2-NEXT: pxor %xmm2, %xmm3
502; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
503; X64-SSE2-NEXT: pxor %xmm2, %xmm4
504; X64-SSE2-NEXT: movdqa %xmm4, %xmm5
505; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
506; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
507; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
508; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
509; X64-SSE2-NEXT: pand %xmm6, %xmm3
510; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
511; X64-SSE2-NEXT: por %xmm3, %xmm4
512; X64-SSE2-NEXT: pand %xmm4, %xmm0
513; X64-SSE2-NEXT: pandn %xmm1, %xmm4
514; X64-SSE2-NEXT: por %xmm0, %xmm4
515; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
516; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
517; X64-SSE2-NEXT: pxor %xmm2, %xmm1
518; X64-SSE2-NEXT: pxor %xmm0, %xmm2
519; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
520; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
521; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
522; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
523; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
524; X64-SSE2-NEXT: pand %xmm5, %xmm1
525; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
526; X64-SSE2-NEXT: por %xmm1, %xmm2
527; X64-SSE2-NEXT: pand %xmm2, %xmm4
528; X64-SSE2-NEXT: pandn %xmm0, %xmm2
529; X64-SSE2-NEXT: por %xmm4, %xmm2
530; X64-SSE2-NEXT: movq %xmm2, %rax
531; X64-SSE2-NEXT: retq
532;
533; X64-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000534; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000535; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
536; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
537; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
538; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
539; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
540; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
541; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
542; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
543; X64-SSE42-NEXT: movq %xmm2, %rax
544; X64-SSE42-NEXT: retq
545;
546; X64-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000547; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000548; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
549; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
550; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
551; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
552; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
553; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
554; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
555; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
556; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
557; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
558; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
559; X64-AVX1-NEXT: vmovq %xmm0, %rax
560; X64-AVX1-NEXT: vzeroupper
561; X64-AVX1-NEXT: retq
562;
563; X64-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000564; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000565; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
566; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
567; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
568; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
569; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
570; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
571; X64-AVX2-NEXT: vmovq %xmm0, %rax
572; X64-AVX2-NEXT: vzeroupper
573; X64-AVX2-NEXT: retq
574;
575; X64-AVX512-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000576; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000577; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
578; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
579; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
580; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
581; X64-AVX512-NEXT: vmovq %xmm0, %rax
582; X64-AVX512-NEXT: vzeroupper
583; X64-AVX512-NEXT: retq
584 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
585 %2 = icmp slt <4 x i64> %a0, %1
586 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
587 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
588 %5 = icmp slt <4 x i64> %3, %4
589 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
590 %7 = extractelement <4 x i64> %6, i32 0
591 ret i64 %7
592}
593
594define i32 @test_reduce_v8i32(<8 x i32> %a0) {
595; X86-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000596; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000597; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
598; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
599; X86-SSE2-NEXT: pand %xmm2, %xmm0
600; X86-SSE2-NEXT: pandn %xmm1, %xmm2
601; X86-SSE2-NEXT: por %xmm0, %xmm2
602; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
603; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
604; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
605; X86-SSE2-NEXT: pand %xmm1, %xmm2
606; X86-SSE2-NEXT: pandn %xmm0, %xmm1
607; X86-SSE2-NEXT: por %xmm2, %xmm1
608; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
609; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
610; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
611; X86-SSE2-NEXT: pand %xmm2, %xmm1
612; X86-SSE2-NEXT: pandn %xmm0, %xmm2
613; X86-SSE2-NEXT: por %xmm1, %xmm2
614; X86-SSE2-NEXT: movd %xmm2, %eax
615; X86-SSE2-NEXT: retl
616;
617; X86-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000618; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000619; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
620; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
621; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
622; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
623; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
624; X86-SSE42-NEXT: movd %xmm0, %eax
625; X86-SSE42-NEXT: retl
626;
627; X86-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000628; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000629; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
630; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
631; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
632; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
633; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
634; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
635; X86-AVX1-NEXT: vmovd %xmm0, %eax
636; X86-AVX1-NEXT: vzeroupper
637; X86-AVX1-NEXT: retl
638;
639; X86-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000640; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000641; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
642; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
643; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
644; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
645; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
646; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
647; X86-AVX2-NEXT: vmovd %xmm0, %eax
648; X86-AVX2-NEXT: vzeroupper
649; X86-AVX2-NEXT: retl
650;
651; X64-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000652; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000653; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
654; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
655; X64-SSE2-NEXT: pand %xmm2, %xmm0
656; X64-SSE2-NEXT: pandn %xmm1, %xmm2
657; X64-SSE2-NEXT: por %xmm0, %xmm2
658; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
659; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
660; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
661; X64-SSE2-NEXT: pand %xmm1, %xmm2
662; X64-SSE2-NEXT: pandn %xmm0, %xmm1
663; X64-SSE2-NEXT: por %xmm2, %xmm1
664; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
665; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
666; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
667; X64-SSE2-NEXT: pand %xmm2, %xmm1
668; X64-SSE2-NEXT: pandn %xmm0, %xmm2
669; X64-SSE2-NEXT: por %xmm1, %xmm2
670; X64-SSE2-NEXT: movd %xmm2, %eax
671; X64-SSE2-NEXT: retq
672;
673; X64-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000674; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000675; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
676; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
677; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
678; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
679; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
680; X64-SSE42-NEXT: movd %xmm0, %eax
681; X64-SSE42-NEXT: retq
682;
683; X64-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000684; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000685; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
686; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
687; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
688; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
689; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
690; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
691; X64-AVX1-NEXT: vmovd %xmm0, %eax
692; X64-AVX1-NEXT: vzeroupper
693; X64-AVX1-NEXT: retq
694;
695; X64-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000696; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000697; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
698; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
699; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
700; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
701; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
702; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
703; X64-AVX2-NEXT: vmovd %xmm0, %eax
704; X64-AVX2-NEXT: vzeroupper
705; X64-AVX2-NEXT: retq
706;
707; X64-AVX512-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000708; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000709; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
710; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
711; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
712; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
713; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
714; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
715; X64-AVX512-NEXT: vmovd %xmm0, %eax
716; X64-AVX512-NEXT: vzeroupper
717; X64-AVX512-NEXT: retq
718 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
719 %2 = icmp slt <8 x i32> %a0, %1
720 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
721 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %5 = icmp slt <8 x i32> %3, %4
723 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
724 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
725 %8 = icmp slt <8 x i32> %6, %7
726 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
727 %10 = extractelement <8 x i32> %9, i32 0
728 ret i32 %10
729}
730
731define i16 @test_reduce_v16i16(<16 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000732; X86-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000733; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000734; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
735; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
736; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
737; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
738; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
739; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
740; X86-SSE2-NEXT: psrld $16, %xmm1
741; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
742; X86-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000743; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000744; X86-SSE2-NEXT: retl
745;
746; X86-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000747; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000748; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
749; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
750; X86-SSE42-NEXT: pxor %xmm1, %xmm0
751; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
752; X86-SSE42-NEXT: pxor %xmm1, %xmm0
753; X86-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000754; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000755; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000756;
757; X86-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000758; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000759; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
760; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000761; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
762; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
763; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
764; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000765; X86-AVX1-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000766; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000767; X86-AVX1-NEXT: vzeroupper
768; X86-AVX1-NEXT: retl
769;
770; X86-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000771; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000772; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000773; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
774; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
775; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
776; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
777; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000778; X86-AVX2-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000779; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000780; X86-AVX2-NEXT: vzeroupper
781; X86-AVX2-NEXT: retl
782;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000783; X64-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000784; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000785; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
786; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
787; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
788; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
789; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
790; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
791; X64-SSE2-NEXT: psrld $16, %xmm1
792; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
793; X64-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000794; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000795; X64-SSE2-NEXT: retq
796;
797; X64-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000798; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000799; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
800; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
801; X64-SSE42-NEXT: pxor %xmm1, %xmm0
802; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
803; X64-SSE42-NEXT: pxor %xmm1, %xmm0
804; X64-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000805; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000806; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000807;
808; X64-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000809; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000810; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
811; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000812; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
813; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
814; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
815; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000816; X64-AVX1-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000817; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000818; X64-AVX1-NEXT: vzeroupper
819; X64-AVX1-NEXT: retq
820;
821; X64-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000822; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000823; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000824; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
825; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
826; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
827; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
828; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000829; X64-AVX2-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000830; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000831; X64-AVX2-NEXT: vzeroupper
832; X64-AVX2-NEXT: retq
833;
834; X64-AVX512-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000835; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000836; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000837; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
838; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
839; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
840; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
841; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000842; X64-AVX512-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000843; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000844; X64-AVX512-NEXT: vzeroupper
845; X64-AVX512-NEXT: retq
846 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
847 %2 = icmp slt <16 x i16> %a0, %1
848 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
849 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
850 %5 = icmp slt <16 x i16> %3, %4
851 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
852 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
853 %8 = icmp slt <16 x i16> %6, %7
854 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
855 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
856 %11 = icmp slt <16 x i16> %9, %10
857 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
858 %13 = extractelement <16 x i16> %12, i32 0
859 ret i16 %13
860}
861
862define i8 @test_reduce_v32i8(<32 x i8> %a0) {
863; X86-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000864; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000865; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
866; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
867; X86-SSE2-NEXT: pand %xmm2, %xmm0
868; X86-SSE2-NEXT: pandn %xmm1, %xmm2
869; X86-SSE2-NEXT: por %xmm0, %xmm2
870; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
871; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
872; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
873; X86-SSE2-NEXT: pand %xmm1, %xmm2
874; X86-SSE2-NEXT: pandn %xmm0, %xmm1
875; X86-SSE2-NEXT: por %xmm2, %xmm1
876; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
877; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
878; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
879; X86-SSE2-NEXT: pand %xmm2, %xmm1
880; X86-SSE2-NEXT: pandn %xmm0, %xmm2
881; X86-SSE2-NEXT: por %xmm1, %xmm2
882; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
883; X86-SSE2-NEXT: psrld $16, %xmm0
884; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
885; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
886; X86-SSE2-NEXT: pand %xmm1, %xmm2
887; X86-SSE2-NEXT: pandn %xmm0, %xmm1
888; X86-SSE2-NEXT: por %xmm2, %xmm1
889; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
890; X86-SSE2-NEXT: psrlw $8, %xmm0
891; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
892; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
893; X86-SSE2-NEXT: pand %xmm2, %xmm1
894; X86-SSE2-NEXT: pandn %xmm0, %xmm2
895; X86-SSE2-NEXT: por %xmm1, %xmm2
896; X86-SSE2-NEXT: movd %xmm2, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000897; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000898; X86-SSE2-NEXT: retl
899;
900; X86-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000901; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000902; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000903; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
904; X86-SSE42-NEXT: pxor %xmm1, %xmm0
905; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
906; X86-SSE42-NEXT: psrlw $8, %xmm2
907; X86-SSE42-NEXT: pminub %xmm0, %xmm2
908; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
909; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000910; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000911; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000912; X86-SSE42-NEXT: retl
913;
914; X86-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000915; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000916; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
917; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000918; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
919; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
920; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
921; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
922; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
923; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000924; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000925; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000926; X86-AVX1-NEXT: vzeroupper
927; X86-AVX1-NEXT: retl
928;
929; X86-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000930; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000931; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000932; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
933; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
934; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
935; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
936; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
937; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
938; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000939; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000940; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000941; X86-AVX2-NEXT: vzeroupper
942; X86-AVX2-NEXT: retl
943;
944; X64-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000945; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000946; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
947; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
948; X64-SSE2-NEXT: pand %xmm2, %xmm0
949; X64-SSE2-NEXT: pandn %xmm1, %xmm2
950; X64-SSE2-NEXT: por %xmm0, %xmm2
951; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
952; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
953; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
954; X64-SSE2-NEXT: pand %xmm1, %xmm2
955; X64-SSE2-NEXT: pandn %xmm0, %xmm1
956; X64-SSE2-NEXT: por %xmm2, %xmm1
957; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
958; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
959; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
960; X64-SSE2-NEXT: pand %xmm2, %xmm1
961; X64-SSE2-NEXT: pandn %xmm0, %xmm2
962; X64-SSE2-NEXT: por %xmm1, %xmm2
963; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
964; X64-SSE2-NEXT: psrld $16, %xmm0
965; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
966; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
967; X64-SSE2-NEXT: pand %xmm1, %xmm2
968; X64-SSE2-NEXT: pandn %xmm0, %xmm1
969; X64-SSE2-NEXT: por %xmm2, %xmm1
970; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
971; X64-SSE2-NEXT: psrlw $8, %xmm0
972; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
973; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
974; X64-SSE2-NEXT: pand %xmm2, %xmm1
975; X64-SSE2-NEXT: pandn %xmm0, %xmm2
976; X64-SSE2-NEXT: por %xmm1, %xmm2
977; X64-SSE2-NEXT: movd %xmm2, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000978; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000979; X64-SSE2-NEXT: retq
980;
981; X64-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000982; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000983; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000984; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
985; X64-SSE42-NEXT: pxor %xmm1, %xmm0
986; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
987; X64-SSE42-NEXT: psrlw $8, %xmm2
988; X64-SSE42-NEXT: pminub %xmm0, %xmm2
989; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
990; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000991; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000992; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000993; X64-SSE42-NEXT: retq
994;
995; X64-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000996; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000997; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
998; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000999; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1000; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1001; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1002; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1003; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1004; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001005; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001006; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001007; X64-AVX1-NEXT: vzeroupper
1008; X64-AVX1-NEXT: retq
1009;
1010; X64-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001011; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001012; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001013; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1014; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1015; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1016; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1017; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1018; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1019; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001020; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001021; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001022; X64-AVX2-NEXT: vzeroupper
1023; X64-AVX2-NEXT: retq
1024;
1025; X64-AVX512-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001026; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001027; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001028; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1029; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1030; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1031; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1032; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1033; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1034; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001035; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001036; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001037; X64-AVX512-NEXT: vzeroupper
1038; X64-AVX512-NEXT: retq
1039 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1040 %2 = icmp slt <32 x i8> %a0, %1
1041 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1042 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1043 %5 = icmp slt <32 x i8> %3, %4
1044 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1045 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1046 %8 = icmp slt <32 x i8> %6, %7
1047 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1048 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1049 %11 = icmp slt <32 x i8> %9, %10
1050 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1051 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1052 %14 = icmp slt <32 x i8> %12, %13
1053 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1054 %16 = extractelement <32 x i8> %15, i32 0
1055 ret i8 %16
1056}
1057
1058;
1059; 512-bit Vectors
1060;
1061
1062define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1063; X86-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001064; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001065; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001066; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001067; X86-SSE2-NEXT: pxor %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001068; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001069; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1070; X86-SSE2-NEXT: movdqa %xmm6, %xmm7
1071; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1072; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1073; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1074; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1075; X86-SSE2-NEXT: pand %xmm5, %xmm6
1076; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1077; X86-SSE2-NEXT: por %xmm6, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001078; X86-SSE2-NEXT: pand %xmm5, %xmm1
1079; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1080; X86-SSE2-NEXT: por %xmm1, %xmm5
1081; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001082; X86-SSE2-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001083; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1084; X86-SSE2-NEXT: pxor %xmm4, %xmm3
1085; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1086; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1087; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1088; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
1089; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1090; X86-SSE2-NEXT: pand %xmm1, %xmm3
1091; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
1092; X86-SSE2-NEXT: por %xmm3, %xmm1
1093; X86-SSE2-NEXT: pand %xmm1, %xmm0
1094; X86-SSE2-NEXT: pandn %xmm2, %xmm1
1095; X86-SSE2-NEXT: por %xmm0, %xmm1
1096; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1097; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1098; X86-SSE2-NEXT: movdqa %xmm5, %xmm2
1099; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1100; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1101; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1102; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1103; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1104; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1105; X86-SSE2-NEXT: pand %xmm0, %xmm2
1106; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1107; X86-SSE2-NEXT: por %xmm2, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001108; X86-SSE2-NEXT: pand %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001109; X86-SSE2-NEXT: pandn %xmm5, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001110; X86-SSE2-NEXT: por %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001111; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1112; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1113; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1114; X86-SSE2-NEXT: pxor %xmm1, %xmm4
1115; X86-SSE2-NEXT: movdqa %xmm4, %xmm3
1116; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1117; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1118; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1119; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1120; X86-SSE2-NEXT: pand %xmm2, %xmm4
1121; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1122; X86-SSE2-NEXT: por %xmm4, %xmm2
1123; X86-SSE2-NEXT: pand %xmm2, %xmm0
1124; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1125; X86-SSE2-NEXT: por %xmm0, %xmm2
1126; X86-SSE2-NEXT: movd %xmm2, %eax
1127; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1128; X86-SSE2-NEXT: movd %xmm0, %edx
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001129; X86-SSE2-NEXT: retl
1130;
1131; X86-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001132; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001133; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001134; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
1135; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1136; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001137; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
1138; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001139; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1140; X86-SSE42-NEXT: movapd %xmm3, %xmm0
1141; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1142; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1143; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1144; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
1145; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1146; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1147; X86-SSE42-NEXT: movd %xmm1, %eax
1148; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx
1149; X86-SSE42-NEXT: retl
1150;
1151; X86-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001152; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001153; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1154; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1155; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1156; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1157; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1158; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1159; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1160; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1161; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1162; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1163; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1164; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1165; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1166; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1167; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
1168; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1169; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1170; X86-AVX1-NEXT: vmovd %xmm0, %eax
1171; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
1172; X86-AVX1-NEXT: vzeroupper
1173; X86-AVX1-NEXT: retl
1174;
1175; X86-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001176; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001177; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1178; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1179; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1180; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1181; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1182; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1183; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1184; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1185; X86-AVX2-NEXT: vmovd %xmm0, %eax
1186; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
1187; X86-AVX2-NEXT: vzeroupper
1188; X86-AVX2-NEXT: retl
1189;
1190; X64-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001191; X64-SSE2: ## %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001192; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1193; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1194; X64-SSE2-NEXT: pxor %xmm4, %xmm5
1195; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1196; X64-SSE2-NEXT: pxor %xmm4, %xmm6
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001197; X64-SSE2-NEXT: movdqa %xmm6, %xmm7
1198; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1199; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1200; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1201; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1202; X64-SSE2-NEXT: pand %xmm8, %xmm6
1203; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1204; X64-SSE2-NEXT: por %xmm6, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001205; X64-SSE2-NEXT: pand %xmm5, %xmm1
1206; X64-SSE2-NEXT: pandn %xmm3, %xmm5
1207; X64-SSE2-NEXT: por %xmm1, %xmm5
1208; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1209; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1210; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
1211; X64-SSE2-NEXT: pxor %xmm4, %xmm3
1212; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1213; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1214; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1215; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1216; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1217; X64-SSE2-NEXT: pand %xmm7, %xmm1
1218; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1219; X64-SSE2-NEXT: por %xmm1, %xmm3
1220; X64-SSE2-NEXT: pand %xmm3, %xmm0
1221; X64-SSE2-NEXT: pandn %xmm2, %xmm3
1222; X64-SSE2-NEXT: por %xmm0, %xmm3
1223; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1224; X64-SSE2-NEXT: pxor %xmm4, %xmm0
1225; X64-SSE2-NEXT: movdqa %xmm5, %xmm1
1226; X64-SSE2-NEXT: pxor %xmm4, %xmm1
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001227; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1228; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001229; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001230; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1231; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001232; X64-SSE2-NEXT: pand %xmm6, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001233; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1234; X64-SSE2-NEXT: por %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001235; X64-SSE2-NEXT: pand %xmm1, %xmm3
1236; X64-SSE2-NEXT: pandn %xmm5, %xmm1
1237; X64-SSE2-NEXT: por %xmm3, %xmm1
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001238; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1239; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001240; X64-SSE2-NEXT: pxor %xmm4, %xmm2
1241; X64-SSE2-NEXT: pxor %xmm0, %xmm4
1242; X64-SSE2-NEXT: movdqa %xmm4, %xmm3
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001243; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001244; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1245; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1246; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1247; X64-SSE2-NEXT: pand %xmm5, %xmm2
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001248; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1249; X64-SSE2-NEXT: por %xmm2, %xmm3
1250; X64-SSE2-NEXT: pand %xmm3, %xmm1
1251; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1252; X64-SSE2-NEXT: por %xmm1, %xmm3
1253; X64-SSE2-NEXT: movq %xmm3, %rax
1254; X64-SSE2-NEXT: retq
1255;
1256; X64-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001257; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001258; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001259; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
1260; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1261; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001262; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
1263; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001264; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1265; X64-SSE42-NEXT: movapd %xmm3, %xmm0
1266; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1267; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1268; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1269; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
1270; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1271; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1272; X64-SSE42-NEXT: movq %xmm1, %rax
1273; X64-SSE42-NEXT: retq
1274;
1275; X64-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001276; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001277; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1278; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1279; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1280; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1281; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1282; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1283; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1284; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1285; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1286; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1287; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1288; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1289; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1290; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1291; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
1292; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1293; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1294; X64-AVX1-NEXT: vmovq %xmm0, %rax
1295; X64-AVX1-NEXT: vzeroupper
1296; X64-AVX1-NEXT: retq
1297;
1298; X64-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001299; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001300; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1301; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1302; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1303; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1304; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1305; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1306; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1307; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1308; X64-AVX2-NEXT: vmovq %xmm0, %rax
1309; X64-AVX2-NEXT: vzeroupper
1310; X64-AVX2-NEXT: retq
1311;
1312; X64-AVX512-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001313; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001314; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1315; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1316; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1317; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1318; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1319; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1320; X64-AVX512-NEXT: vmovq %xmm0, %rax
1321; X64-AVX512-NEXT: vzeroupper
1322; X64-AVX512-NEXT: retq
1323 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1324 %2 = icmp slt <8 x i64> %a0, %1
1325 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1326 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1327 %5 = icmp slt <8 x i64> %3, %4
1328 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1329 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1330 %8 = icmp slt <8 x i64> %6, %7
1331 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1332 %10 = extractelement <8 x i64> %9, i32 0
1333 ret i64 %10
1334}
1335
1336define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1337; X86-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001338; X86-SSE2: ## %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001339; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
1340; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1341; X86-SSE2-NEXT: pand %xmm4, %xmm0
1342; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1343; X86-SSE2-NEXT: por %xmm0, %xmm4
1344; X86-SSE2-NEXT: movdqa %xmm3, %xmm0
1345; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1346; X86-SSE2-NEXT: pand %xmm0, %xmm1
1347; X86-SSE2-NEXT: pandn %xmm3, %xmm0
1348; X86-SSE2-NEXT: por %xmm1, %xmm0
1349; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1350; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1
1351; X86-SSE2-NEXT: pand %xmm1, %xmm4
1352; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1353; X86-SSE2-NEXT: por %xmm4, %xmm1
1354; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1355; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1356; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1357; X86-SSE2-NEXT: pand %xmm2, %xmm1
1358; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1359; X86-SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001360; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1361; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1362; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1363; X86-SSE2-NEXT: pand %xmm1, %xmm2
1364; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1365; X86-SSE2-NEXT: por %xmm2, %xmm1
1366; X86-SSE2-NEXT: movd %xmm1, %eax
1367; X86-SSE2-NEXT: retl
1368;
1369; X86-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001370; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001371; X86-SSE42-NEXT: pminsd %xmm3, %xmm1
1372; X86-SSE42-NEXT: pminsd %xmm2, %xmm0
1373; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1374; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1375; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
1376; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1377; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1378; X86-SSE42-NEXT: movd %xmm0, %eax
1379; X86-SSE42-NEXT: retl
1380;
1381; X86-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001382; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001383; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1384; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1385; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1386; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1387; X86-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1388; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1389; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1390; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1391; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1392; X86-AVX1-NEXT: vmovd %xmm0, %eax
1393; X86-AVX1-NEXT: vzeroupper
1394; X86-AVX1-NEXT: retl
1395;
1396; X86-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001397; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001398; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1399; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1400; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1401; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1402; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1403; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1404; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1405; X86-AVX2-NEXT: vmovd %xmm0, %eax
1406; X86-AVX2-NEXT: vzeroupper
1407; X86-AVX2-NEXT: retl
1408;
1409; X64-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001410; X64-SSE2: ## %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001411; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
1412; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1413; X64-SSE2-NEXT: pand %xmm4, %xmm0
1414; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1415; X64-SSE2-NEXT: por %xmm0, %xmm4
1416; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1417; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1418; X64-SSE2-NEXT: pand %xmm0, %xmm1
1419; X64-SSE2-NEXT: pandn %xmm3, %xmm0
1420; X64-SSE2-NEXT: por %xmm1, %xmm0
1421; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1422; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1
1423; X64-SSE2-NEXT: pand %xmm1, %xmm4
1424; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1425; X64-SSE2-NEXT: por %xmm4, %xmm1
1426; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1427; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1428; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1429; X64-SSE2-NEXT: pand %xmm2, %xmm1
1430; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1431; X64-SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001432; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1433; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1434; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1435; X64-SSE2-NEXT: pand %xmm1, %xmm2
1436; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1437; X64-SSE2-NEXT: por %xmm2, %xmm1
1438; X64-SSE2-NEXT: movd %xmm1, %eax
1439; X64-SSE2-NEXT: retq
1440;
1441; X64-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001442; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001443; X64-SSE42-NEXT: pminsd %xmm3, %xmm1
1444; X64-SSE42-NEXT: pminsd %xmm2, %xmm0
1445; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1446; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1447; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
1448; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1449; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1450; X64-SSE42-NEXT: movd %xmm0, %eax
1451; X64-SSE42-NEXT: retq
1452;
1453; X64-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001454; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001455; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1456; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1457; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1458; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1459; X64-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1460; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1461; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1462; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1463; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1464; X64-AVX1-NEXT: vmovd %xmm0, %eax
1465; X64-AVX1-NEXT: vzeroupper
1466; X64-AVX1-NEXT: retq
1467;
1468; X64-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001469; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001470; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1471; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1472; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1473; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1474; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1475; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1476; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1477; X64-AVX2-NEXT: vmovd %xmm0, %eax
1478; X64-AVX2-NEXT: vzeroupper
1479; X64-AVX2-NEXT: retq
1480;
1481; X64-AVX512-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001482; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001483; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1484; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1485; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1486; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1487; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1488; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1489; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1490; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1491; X64-AVX512-NEXT: vmovd %xmm0, %eax
1492; X64-AVX512-NEXT: vzeroupper
1493; X64-AVX512-NEXT: retq
1494 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1495 %2 = icmp slt <16 x i32> %a0, %1
1496 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1497 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1498 %5 = icmp slt <16 x i32> %3, %4
1499 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1500 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1501 %8 = icmp slt <16 x i32> %6, %7
1502 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1503 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1504 %11 = icmp slt <16 x i32> %9, %10
1505 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1506 %13 = extractelement <16 x i32> %12, i32 0
1507 ret i32 %13
1508}
1509
1510define i16 @test_reduce_v32i16(<32 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001511; X86-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001512; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001513; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
1514; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
1515; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1516; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1517; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1518; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1519; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1520; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1521; X86-SSE2-NEXT: psrld $16, %xmm1
1522; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1523; X86-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001524; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001525; X86-SSE2-NEXT: retl
1526;
1527; X86-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001528; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001529; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
1530; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
1531; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
1532; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1533; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1534; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1535; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1536; X86-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001537; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001538; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001539;
1540; X86-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001541; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001542; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1543; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1544; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1545; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1546; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001547; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1548; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1549; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1550; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001551; X86-AVX1-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001552; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001553; X86-AVX1-NEXT: vzeroupper
1554; X86-AVX1-NEXT: retl
1555;
1556; X86-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001557; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001558; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1559; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001560; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1561; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1562; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1563; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1564; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001565; X86-AVX2-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001566; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001567; X86-AVX2-NEXT: vzeroupper
1568; X86-AVX2-NEXT: retl
1569;
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001570; X64-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001571; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001572; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
1573; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
1574; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1575; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1576; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1577; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1578; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1579; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1580; X64-SSE2-NEXT: psrld $16, %xmm1
1581; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1582; X64-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001583; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001584; X64-SSE2-NEXT: retq
1585;
1586; X64-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001587; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001588; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
1589; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
1590; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
1591; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1592; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1593; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1594; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1595; X64-SSE42-NEXT: movd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001596; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001597; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001598;
1599; X64-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001600; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001601; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1602; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1603; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1604; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1605; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001606; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1607; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1608; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1609; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001610; X64-AVX1-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001611; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001612; X64-AVX1-NEXT: vzeroupper
1613; X64-AVX1-NEXT: retq
1614;
1615; X64-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001616; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001617; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1618; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001619; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1620; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1621; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1622; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1623; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001624; X64-AVX2-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001625; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001626; X64-AVX2-NEXT: vzeroupper
1627; X64-AVX2-NEXT: retq
1628;
1629; X64-AVX512-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001630; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001631; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001632; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001633; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001634; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1635; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1636; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1637; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1638; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001639; X64-AVX512-NEXT: vmovd %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001640; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001641; X64-AVX512-NEXT: vzeroupper
1642; X64-AVX512-NEXT: retq
1643 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1644 %2 = icmp slt <32 x i16> %a0, %1
1645 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1646 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1647 %5 = icmp slt <32 x i16> %3, %4
1648 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1649 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1650 %8 = icmp slt <32 x i16> %6, %7
1651 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1652 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1653 %11 = icmp slt <32 x i16> %9, %10
1654 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1655 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1656 %14 = icmp slt <32 x i16> %12, %13
1657 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1658 %16 = extractelement <32 x i16> %15, i32 0
1659 ret i16 %16
1660}
1661
1662define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1663; X86-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001664; X86-SSE2: ## %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001665; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
1666; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1667; X86-SSE2-NEXT: pand %xmm4, %xmm0
1668; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1669; X86-SSE2-NEXT: por %xmm0, %xmm4
1670; X86-SSE2-NEXT: movdqa %xmm3, %xmm0
1671; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1672; X86-SSE2-NEXT: pand %xmm0, %xmm1
1673; X86-SSE2-NEXT: pandn %xmm3, %xmm0
1674; X86-SSE2-NEXT: por %xmm1, %xmm0
1675; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1676; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm1
1677; X86-SSE2-NEXT: pand %xmm1, %xmm4
1678; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1679; X86-SSE2-NEXT: por %xmm4, %xmm1
1680; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1681; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1682; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1683; X86-SSE2-NEXT: pand %xmm2, %xmm1
1684; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1685; X86-SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001686; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1687; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1688; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1689; X86-SSE2-NEXT: pand %xmm1, %xmm2
1690; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1691; X86-SSE2-NEXT: por %xmm2, %xmm1
1692; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1693; X86-SSE2-NEXT: psrld $16, %xmm0
1694; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1695; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1696; X86-SSE2-NEXT: pand %xmm2, %xmm1
1697; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1698; X86-SSE2-NEXT: por %xmm1, %xmm2
1699; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
1700; X86-SSE2-NEXT: psrlw $8, %xmm0
1701; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1702; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1703; X86-SSE2-NEXT: pand %xmm1, %xmm2
1704; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1705; X86-SSE2-NEXT: por %xmm2, %xmm1
1706; X86-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001707; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001708; X86-SSE2-NEXT: retl
1709;
1710; X86-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001711; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001712; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
1713; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
1714; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001715; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1716; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1717; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
1718; X86-SSE42-NEXT: psrlw $8, %xmm2
1719; X86-SSE42-NEXT: pminub %xmm0, %xmm2
1720; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
1721; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001722; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001723; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001724; X86-SSE42-NEXT: retl
1725;
1726; X86-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001727; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001728; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1729; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1730; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1731; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1732; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001733; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1734; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1735; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1736; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1737; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1738; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001739; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001740; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001741; X86-AVX1-NEXT: vzeroupper
1742; X86-AVX1-NEXT: retl
1743;
1744; X86-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001745; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001746; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1747; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001748; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1749; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1750; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1751; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1752; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1753; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1754; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001755; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001756; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001757; X86-AVX2-NEXT: vzeroupper
1758; X86-AVX2-NEXT: retl
1759;
1760; X64-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001761; X64-SSE2: ## %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001762; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
1763; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1764; X64-SSE2-NEXT: pand %xmm4, %xmm0
1765; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1766; X64-SSE2-NEXT: por %xmm0, %xmm4
1767; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1768; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1769; X64-SSE2-NEXT: pand %xmm0, %xmm1
1770; X64-SSE2-NEXT: pandn %xmm3, %xmm0
1771; X64-SSE2-NEXT: por %xmm1, %xmm0
1772; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1773; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm1
1774; X64-SSE2-NEXT: pand %xmm1, %xmm4
1775; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1776; X64-SSE2-NEXT: por %xmm4, %xmm1
1777; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1778; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1779; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1780; X64-SSE2-NEXT: pand %xmm2, %xmm1
1781; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1782; X64-SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001783; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1784; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1785; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1786; X64-SSE2-NEXT: pand %xmm1, %xmm2
1787; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1788; X64-SSE2-NEXT: por %xmm2, %xmm1
1789; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
1790; X64-SSE2-NEXT: psrld $16, %xmm0
1791; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1792; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1793; X64-SSE2-NEXT: pand %xmm2, %xmm1
1794; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1795; X64-SSE2-NEXT: por %xmm1, %xmm2
1796; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
1797; X64-SSE2-NEXT: psrlw $8, %xmm0
1798; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1799; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1800; X64-SSE2-NEXT: pand %xmm1, %xmm2
1801; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1802; X64-SSE2-NEXT: por %xmm2, %xmm1
1803; X64-SSE2-NEXT: movd %xmm1, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001804; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001805; X64-SSE2-NEXT: retq
1806;
1807; X64-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001808; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001809; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
1810; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
1811; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001812; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1813; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1814; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
1815; X64-SSE42-NEXT: psrlw $8, %xmm2
1816; X64-SSE42-NEXT: pminub %xmm0, %xmm2
1817; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
1818; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001819; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001820; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001821; X64-SSE42-NEXT: retq
1822;
1823; X64-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001824; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001825; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1826; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1827; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1828; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1829; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001830; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1831; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1832; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1833; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1834; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1835; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001836; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001837; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001838; X64-AVX1-NEXT: vzeroupper
1839; X64-AVX1-NEXT: retq
1840;
1841; X64-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001842; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001843; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1844; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001845; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1846; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1847; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1848; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1849; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1850; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1851; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001852; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001853; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001854; X64-AVX2-NEXT: vzeroupper
1855; X64-AVX2-NEXT: retq
1856;
1857; X64-AVX512-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001858; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001859; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001860; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001861; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001862; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1863; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1864; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1865; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1866; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1867; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1868; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001869; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001870; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001871; X64-AVX512-NEXT: vzeroupper
1872; X64-AVX512-NEXT: retq
1873 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1874 %2 = icmp slt <64 x i8> %a0, %1
1875 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1876 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1877 %5 = icmp slt <64 x i8> %3, %4
1878 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1879 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1880 %8 = icmp slt <64 x i8> %6, %7
1881 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1882 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1883 %11 = icmp slt <64 x i8> %9, %10
1884 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1885 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1886 %14 = icmp slt <64 x i8> %12, %13
1887 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1888 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1889 %17 = icmp slt <64 x i8> %15, %16
1890 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1891 %19 = extractelement <64 x i8> %18, i32 0
1892 ret i8 %19
1893}