blob: fa5828a45700fde5107a4dbddc70cc19d8b5b90c [file] [log] [blame]
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000019; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT: pxor %xmm2, %xmm3
23; X86-SSE2-NEXT: pxor %xmm1, %xmm2
24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT: pand %xmm5, %xmm2
30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT: por %xmm2, %xmm3
32; X86-SSE2-NEXT: pand %xmm3, %xmm0
33; X86-SSE2-NEXT: pandn %xmm1, %xmm3
34; X86-SSE2-NEXT: por %xmm0, %xmm3
35; X86-SSE2-NEXT: movd %xmm3, %eax
36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
37; X86-SSE2-NEXT: movd %xmm0, %edx
38; X86-SSE2-NEXT: retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000041; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000042; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
44; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
45; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
46; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
47; X86-SSE42-NEXT: movd %xmm2, %eax
48; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
49; X86-SSE42-NEXT: retl
50;
51; X86-AVX-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000052; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000053; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
54; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
55; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
56; X86-AVX-NEXT: vmovd %xmm0, %eax
57; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
58; X86-AVX-NEXT: retl
59;
60; X64-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000061; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000062; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
63; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
64; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
65; X64-SSE2-NEXT: pxor %xmm2, %xmm3
66; X64-SSE2-NEXT: pxor %xmm1, %xmm2
67; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
68; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
69; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
70; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
71; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
72; X64-SSE2-NEXT: pand %xmm5, %xmm2
73; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
74; X64-SSE2-NEXT: por %xmm2, %xmm3
75; X64-SSE2-NEXT: pand %xmm3, %xmm0
76; X64-SSE2-NEXT: pandn %xmm1, %xmm3
77; X64-SSE2-NEXT: por %xmm0, %xmm3
78; X64-SSE2-NEXT: movq %xmm3, %rax
79; X64-SSE2-NEXT: retq
80;
81; X64-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000083; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
84; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
85; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
86; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
87; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
88; X64-SSE42-NEXT: movq %xmm2, %rax
89; X64-SSE42-NEXT: retq
90;
91; X64-AVX1-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000093; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
94; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
95; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
96; X64-AVX1-NEXT: vmovq %xmm0, %rax
97; X64-AVX1-NEXT: retq
98;
99; X64-AVX2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000101; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
102; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
103; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
104; X64-AVX2-NEXT: vmovq %xmm0, %rax
105; X64-AVX2-NEXT: retq
106;
107; X64-AVX512-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000109; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
110; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
111; X64-AVX512-NEXT: vmovq %xmm0, %rax
112; X64-AVX512-NEXT: retq
113 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
114 %2 = icmp slt <2 x i64> %a0, %1
115 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
116 %4 = extractelement <2 x i64> %3, i32 0
117 ret i64 %4
118}
119
120define i32 @test_reduce_v4i32(<4 x i32> %a0) {
121; X86-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000122; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000123; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
124; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
125; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
126; X86-SSE2-NEXT: pand %xmm2, %xmm0
127; X86-SSE2-NEXT: pandn %xmm1, %xmm2
128; X86-SSE2-NEXT: por %xmm0, %xmm2
129; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
130; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
131; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
132; X86-SSE2-NEXT: pand %xmm1, %xmm2
133; X86-SSE2-NEXT: pandn %xmm0, %xmm1
134; X86-SSE2-NEXT: por %xmm2, %xmm1
135; X86-SSE2-NEXT: movd %xmm1, %eax
136; X86-SSE2-NEXT: retl
137;
138; X86-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000139; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000140; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
141; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
142; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
143; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
144; X86-SSE42-NEXT: movd %xmm0, %eax
145; X86-SSE42-NEXT: retl
146;
147; X86-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000148; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000149; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
150; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
151; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
152; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
153; X86-AVX-NEXT: vmovd %xmm0, %eax
154; X86-AVX-NEXT: retl
155;
156; X64-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000157; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000158; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
159; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
160; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
161; X64-SSE2-NEXT: pand %xmm2, %xmm0
162; X64-SSE2-NEXT: pandn %xmm1, %xmm2
163; X64-SSE2-NEXT: por %xmm0, %xmm2
164; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
165; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
166; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
167; X64-SSE2-NEXT: pand %xmm1, %xmm2
168; X64-SSE2-NEXT: pandn %xmm0, %xmm1
169; X64-SSE2-NEXT: por %xmm2, %xmm1
170; X64-SSE2-NEXT: movd %xmm1, %eax
171; X64-SSE2-NEXT: retq
172;
173; X64-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000174; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000175; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
176; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
177; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
178; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
179; X64-SSE42-NEXT: movd %xmm0, %eax
180; X64-SSE42-NEXT: retq
181;
182; X64-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000183; X64-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000184; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
185; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
186; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
187; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
188; X64-AVX-NEXT: vmovd %xmm0, %eax
189; X64-AVX-NEXT: retq
190 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
191 %2 = icmp slt <4 x i32> %a0, %1
192 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
193 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
194 %5 = icmp slt <4 x i32> %3, %4
195 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
196 %7 = extractelement <4 x i32> %6, i32 0
197 ret i32 %7
198}
199
200define i16 @test_reduce_v8i16(<8 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000201; X86-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000202; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000203; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
204; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
205; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
206; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
207; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
208; X86-SSE2-NEXT: psrld $16, %xmm1
209; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
210; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000211; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000212; X86-SSE2-NEXT: retl
213;
214; X86-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000215; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000216; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
217; X86-SSE42-NEXT: pxor %xmm1, %xmm0
218; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
219; X86-SSE42-NEXT: pxor %xmm1, %xmm0
220; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000221; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000222; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000223;
224; X86-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000225; X86-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000226; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
227; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
228; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
229; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000230; X86-AVX-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000231; X86-AVX-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000232; X86-AVX-NEXT: retl
233;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000234; X64-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000235; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000236; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
237; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
238; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
239; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
240; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
241; X64-SSE2-NEXT: psrld $16, %xmm1
242; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
243; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000244; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000245; X64-SSE2-NEXT: retq
246;
247; X64-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000248; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000249; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
250; X64-SSE42-NEXT: pxor %xmm1, %xmm0
251; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
252; X64-SSE42-NEXT: pxor %xmm1, %xmm0
253; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000254; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000255; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000256;
257; X64-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000258; X64-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000259; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
260; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
261; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
262; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000263; X64-AVX-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000264; X64-AVX-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000265; X64-AVX-NEXT: retq
266 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
267 %2 = icmp slt <8 x i16> %a0, %1
268 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
269 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
270 %5 = icmp slt <8 x i16> %3, %4
271 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
272 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
273 %8 = icmp slt <8 x i16> %6, %7
274 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
275 %10 = extractelement <8 x i16> %9, i32 0
276 ret i16 %10
277}
278
279define i8 @test_reduce_v16i8(<16 x i8> %a0) {
280; X86-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000281; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000282; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
283; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
284; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
285; X86-SSE2-NEXT: pand %xmm2, %xmm0
286; X86-SSE2-NEXT: pandn %xmm1, %xmm2
287; X86-SSE2-NEXT: por %xmm0, %xmm2
288; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
289; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
290; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
291; X86-SSE2-NEXT: pand %xmm1, %xmm2
292; X86-SSE2-NEXT: pandn %xmm0, %xmm1
293; X86-SSE2-NEXT: por %xmm2, %xmm1
294; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
295; X86-SSE2-NEXT: psrld $16, %xmm0
296; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
297; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
298; X86-SSE2-NEXT: pand %xmm2, %xmm1
299; X86-SSE2-NEXT: pandn %xmm0, %xmm2
300; X86-SSE2-NEXT: por %xmm1, %xmm2
301; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
302; X86-SSE2-NEXT: psrlw $8, %xmm0
303; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
304; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
305; X86-SSE2-NEXT: pand %xmm1, %xmm2
306; X86-SSE2-NEXT: pandn %xmm0, %xmm1
307; X86-SSE2-NEXT: por %xmm2, %xmm1
308; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000309; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000310; X86-SSE2-NEXT: retl
311;
312; X86-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; X86-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000314; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
315; X86-SSE42-NEXT: pxor %xmm1, %xmm0
316; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
317; X86-SSE42-NEXT: psrlw $8, %xmm2
318; X86-SSE42-NEXT: pminub %xmm0, %xmm2
319; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
320; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000321; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000322; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000323; X86-SSE42-NEXT: retl
324;
325; X86-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000326; X86-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000327; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
328; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
329; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
330; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
331; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
332; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000333; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000334; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000335; X86-AVX-NEXT: retl
336;
337; X64-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000338; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000339; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
340; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
341; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
342; X64-SSE2-NEXT: pand %xmm2, %xmm0
343; X64-SSE2-NEXT: pandn %xmm1, %xmm2
344; X64-SSE2-NEXT: por %xmm0, %xmm2
345; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
346; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
347; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
348; X64-SSE2-NEXT: pand %xmm1, %xmm2
349; X64-SSE2-NEXT: pandn %xmm0, %xmm1
350; X64-SSE2-NEXT: por %xmm2, %xmm1
351; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
352; X64-SSE2-NEXT: psrld $16, %xmm0
353; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
354; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
355; X64-SSE2-NEXT: pand %xmm2, %xmm1
356; X64-SSE2-NEXT: pandn %xmm0, %xmm2
357; X64-SSE2-NEXT: por %xmm1, %xmm2
358; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
359; X64-SSE2-NEXT: psrlw $8, %xmm0
360; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
361; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
362; X64-SSE2-NEXT: pand %xmm1, %xmm2
363; X64-SSE2-NEXT: pandn %xmm0, %xmm1
364; X64-SSE2-NEXT: por %xmm2, %xmm1
365; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000366; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000367; X64-SSE2-NEXT: retq
368;
369; X64-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000370; X64-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000371; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
372; X64-SSE42-NEXT: pxor %xmm1, %xmm0
373; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
374; X64-SSE42-NEXT: psrlw $8, %xmm2
375; X64-SSE42-NEXT: pminub %xmm0, %xmm2
376; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
377; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000378; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000379; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000380; X64-SSE42-NEXT: retq
381;
382; X64-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; X64-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000384; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
385; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
386; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
387; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
388; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
389; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000390; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000391; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000392; X64-AVX-NEXT: retq
393 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
394 %2 = icmp slt <16 x i8> %a0, %1
395 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
396 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
397 %5 = icmp slt <16 x i8> %3, %4
398 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
399 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
400 %8 = icmp slt <16 x i8> %6, %7
401 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
402 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
403 %11 = icmp slt <16 x i8> %9, %10
404 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
405 %13 = extractelement <16 x i8> %12, i32 0
406 ret i8 %13
407}
408
409;
410; 256-bit Vectors
411;
412
413define i64 @test_reduce_v4i64(<4 x i64> %a0) {
414; X86-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000415; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000416; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
417; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
418; X86-SSE2-NEXT: pxor %xmm2, %xmm3
419; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
420; X86-SSE2-NEXT: pxor %xmm2, %xmm4
421; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
422; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
423; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
424; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
425; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
426; X86-SSE2-NEXT: pand %xmm6, %xmm3
427; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
428; X86-SSE2-NEXT: por %xmm3, %xmm4
429; X86-SSE2-NEXT: pand %xmm4, %xmm0
430; X86-SSE2-NEXT: pandn %xmm1, %xmm4
431; X86-SSE2-NEXT: por %xmm0, %xmm4
432; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
433; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
434; X86-SSE2-NEXT: pxor %xmm2, %xmm1
435; X86-SSE2-NEXT: pxor %xmm0, %xmm2
436; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
437; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
438; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
439; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
440; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
441; X86-SSE2-NEXT: pand %xmm5, %xmm1
442; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
443; X86-SSE2-NEXT: por %xmm1, %xmm2
444; X86-SSE2-NEXT: pand %xmm2, %xmm4
445; X86-SSE2-NEXT: pandn %xmm0, %xmm2
446; X86-SSE2-NEXT: por %xmm4, %xmm2
447; X86-SSE2-NEXT: movd %xmm2, %eax
448; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
449; X86-SSE2-NEXT: movd %xmm0, %edx
450; X86-SSE2-NEXT: retl
451;
452; X86-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000453; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000454; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
455; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
456; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
457; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
458; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
459; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
460; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
461; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
462; X86-SSE42-NEXT: movd %xmm2, %eax
463; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
464; X86-SSE42-NEXT: retl
465;
466; X86-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000467; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000468; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
469; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
470; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
471; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
472; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
473; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
474; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
475; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
476; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
477; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
478; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
479; X86-AVX1-NEXT: vmovd %xmm0, %eax
480; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
481; X86-AVX1-NEXT: vzeroupper
482; X86-AVX1-NEXT: retl
483;
484; X86-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000485; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000486; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
487; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
488; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
489; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
490; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
491; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
492; X86-AVX2-NEXT: vmovd %xmm0, %eax
493; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
494; X86-AVX2-NEXT: vzeroupper
495; X86-AVX2-NEXT: retl
496;
497; X64-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000498; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000499; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
500; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
501; X64-SSE2-NEXT: pxor %xmm2, %xmm3
502; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
503; X64-SSE2-NEXT: pxor %xmm2, %xmm4
504; X64-SSE2-NEXT: movdqa %xmm4, %xmm5
505; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
506; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
507; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
508; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
509; X64-SSE2-NEXT: pand %xmm6, %xmm3
510; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
511; X64-SSE2-NEXT: por %xmm3, %xmm4
512; X64-SSE2-NEXT: pand %xmm4, %xmm0
513; X64-SSE2-NEXT: pandn %xmm1, %xmm4
514; X64-SSE2-NEXT: por %xmm0, %xmm4
515; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
516; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
517; X64-SSE2-NEXT: pxor %xmm2, %xmm1
518; X64-SSE2-NEXT: pxor %xmm0, %xmm2
519; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
520; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
521; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
522; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
523; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
524; X64-SSE2-NEXT: pand %xmm5, %xmm1
525; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
526; X64-SSE2-NEXT: por %xmm1, %xmm2
527; X64-SSE2-NEXT: pand %xmm2, %xmm4
528; X64-SSE2-NEXT: pandn %xmm0, %xmm2
529; X64-SSE2-NEXT: por %xmm4, %xmm2
530; X64-SSE2-NEXT: movq %xmm2, %rax
531; X64-SSE2-NEXT: retq
532;
533; X64-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000534; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000535; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
536; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
537; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
538; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
539; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
540; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
541; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
542; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
543; X64-SSE42-NEXT: movq %xmm2, %rax
544; X64-SSE42-NEXT: retq
545;
546; X64-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000547; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000548; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
549; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
550; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
551; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
552; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
553; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
554; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
555; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
556; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
557; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
558; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
559; X64-AVX1-NEXT: vmovq %xmm0, %rax
560; X64-AVX1-NEXT: vzeroupper
561; X64-AVX1-NEXT: retq
562;
563; X64-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000564; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000565; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
566; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
567; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
568; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
569; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
570; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
571; X64-AVX2-NEXT: vmovq %xmm0, %rax
572; X64-AVX2-NEXT: vzeroupper
573; X64-AVX2-NEXT: retq
574;
575; X64-AVX512-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000576; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000577; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
578; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
579; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
580; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
581; X64-AVX512-NEXT: vmovq %xmm0, %rax
582; X64-AVX512-NEXT: vzeroupper
583; X64-AVX512-NEXT: retq
584 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
585 %2 = icmp slt <4 x i64> %a0, %1
586 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
587 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
588 %5 = icmp slt <4 x i64> %3, %4
589 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
590 %7 = extractelement <4 x i64> %6, i32 0
591 ret i64 %7
592}
593
594define i32 @test_reduce_v8i32(<8 x i32> %a0) {
595; X86-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000596; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000597; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
598; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
599; X86-SSE2-NEXT: pand %xmm2, %xmm0
600; X86-SSE2-NEXT: pandn %xmm1, %xmm2
601; X86-SSE2-NEXT: por %xmm0, %xmm2
602; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
603; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
604; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
605; X86-SSE2-NEXT: pand %xmm1, %xmm2
606; X86-SSE2-NEXT: pandn %xmm0, %xmm1
607; X86-SSE2-NEXT: por %xmm2, %xmm1
608; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
609; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
610; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
611; X86-SSE2-NEXT: pand %xmm2, %xmm1
612; X86-SSE2-NEXT: pandn %xmm0, %xmm2
613; X86-SSE2-NEXT: por %xmm1, %xmm2
614; X86-SSE2-NEXT: movd %xmm2, %eax
615; X86-SSE2-NEXT: retl
616;
617; X86-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000618; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000619; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
620; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
621; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
622; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
623; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
624; X86-SSE42-NEXT: movd %xmm0, %eax
625; X86-SSE42-NEXT: retl
626;
627; X86-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000628; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000629; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
630; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
631; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
632; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
633; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
634; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
635; X86-AVX1-NEXT: vmovd %xmm0, %eax
636; X86-AVX1-NEXT: vzeroupper
637; X86-AVX1-NEXT: retl
638;
639; X86-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000640; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000641; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
642; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
643; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
644; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
645; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
646; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
647; X86-AVX2-NEXT: vmovd %xmm0, %eax
648; X86-AVX2-NEXT: vzeroupper
649; X86-AVX2-NEXT: retl
650;
651; X64-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000652; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000653; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
654; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
655; X64-SSE2-NEXT: pand %xmm2, %xmm0
656; X64-SSE2-NEXT: pandn %xmm1, %xmm2
657; X64-SSE2-NEXT: por %xmm0, %xmm2
658; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
659; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
660; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
661; X64-SSE2-NEXT: pand %xmm1, %xmm2
662; X64-SSE2-NEXT: pandn %xmm0, %xmm1
663; X64-SSE2-NEXT: por %xmm2, %xmm1
664; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
665; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
666; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
667; X64-SSE2-NEXT: pand %xmm2, %xmm1
668; X64-SSE2-NEXT: pandn %xmm0, %xmm2
669; X64-SSE2-NEXT: por %xmm1, %xmm2
670; X64-SSE2-NEXT: movd %xmm2, %eax
671; X64-SSE2-NEXT: retq
672;
673; X64-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000674; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000675; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
676; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
677; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
678; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
679; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
680; X64-SSE42-NEXT: movd %xmm0, %eax
681; X64-SSE42-NEXT: retq
682;
683; X64-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000684; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000685; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
686; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
687; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
688; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
689; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
690; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
691; X64-AVX1-NEXT: vmovd %xmm0, %eax
692; X64-AVX1-NEXT: vzeroupper
693; X64-AVX1-NEXT: retq
694;
695; X64-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000696; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000697; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
698; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
699; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
700; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
701; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
702; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
703; X64-AVX2-NEXT: vmovd %xmm0, %eax
704; X64-AVX2-NEXT: vzeroupper
705; X64-AVX2-NEXT: retq
706;
707; X64-AVX512-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000708; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000709; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
710; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
711; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
712; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
713; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
714; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
715; X64-AVX512-NEXT: vmovd %xmm0, %eax
716; X64-AVX512-NEXT: vzeroupper
717; X64-AVX512-NEXT: retq
718 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
719 %2 = icmp slt <8 x i32> %a0, %1
720 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
721 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %5 = icmp slt <8 x i32> %3, %4
723 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
724 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
725 %8 = icmp slt <8 x i32> %6, %7
726 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
727 %10 = extractelement <8 x i32> %9, i32 0
728 ret i32 %10
729}
730
731define i16 @test_reduce_v16i16(<16 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000732; X86-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000733; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000734; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
735; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
736; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
737; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
738; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
739; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
740; X86-SSE2-NEXT: psrld $16, %xmm1
741; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
742; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000743; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000744; X86-SSE2-NEXT: retl
745;
746; X86-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000747; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000748; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
749; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
750; X86-SSE42-NEXT: pxor %xmm1, %xmm0
751; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
752; X86-SSE42-NEXT: pxor %xmm1, %xmm0
753; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000754; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000755; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000756;
757; X86-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000758; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000759; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
760; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000761; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
762; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
763; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
764; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000765; X86-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000766; X86-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000767; X86-AVX1-NEXT: vzeroupper
768; X86-AVX1-NEXT: retl
769;
770; X86-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000771; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000772; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000773; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
774; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
775; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
776; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
777; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000778; X86-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000779; X86-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000780; X86-AVX2-NEXT: vzeroupper
781; X86-AVX2-NEXT: retl
782;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000783; X64-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000784; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000785; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
786; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
787; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
788; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
789; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
790; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
791; X64-SSE2-NEXT: psrld $16, %xmm1
792; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
793; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000794; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000795; X64-SSE2-NEXT: retq
796;
797; X64-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000798; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000799; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
800; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
801; X64-SSE42-NEXT: pxor %xmm1, %xmm0
802; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
803; X64-SSE42-NEXT: pxor %xmm1, %xmm0
804; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000805; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000806; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000807;
808; X64-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000809; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000810; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
811; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000812; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
813; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
814; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
815; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000816; X64-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000817; X64-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000818; X64-AVX1-NEXT: vzeroupper
819; X64-AVX1-NEXT: retq
820;
821; X64-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000822; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000823; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000824; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
825; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
826; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
827; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
828; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000829; X64-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000830; X64-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000831; X64-AVX2-NEXT: vzeroupper
832; X64-AVX2-NEXT: retq
833;
834; X64-AVX512-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000835; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000836; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000837; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
838; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
839; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
840; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
841; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000842; X64-AVX512-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000843; X64-AVX512-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000844; X64-AVX512-NEXT: vzeroupper
845; X64-AVX512-NEXT: retq
846 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
847 %2 = icmp slt <16 x i16> %a0, %1
848 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
849 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
850 %5 = icmp slt <16 x i16> %3, %4
851 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
852 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
853 %8 = icmp slt <16 x i16> %6, %7
854 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
855 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
856 %11 = icmp slt <16 x i16> %9, %10
857 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
858 %13 = extractelement <16 x i16> %12, i32 0
859 ret i16 %13
860}
861
862define i8 @test_reduce_v32i8(<32 x i8> %a0) {
863; X86-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000864; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000865; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
866; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
867; X86-SSE2-NEXT: pand %xmm2, %xmm0
868; X86-SSE2-NEXT: pandn %xmm1, %xmm2
869; X86-SSE2-NEXT: por %xmm0, %xmm2
870; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
871; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
872; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
873; X86-SSE2-NEXT: pand %xmm1, %xmm2
874; X86-SSE2-NEXT: pandn %xmm0, %xmm1
875; X86-SSE2-NEXT: por %xmm2, %xmm1
876; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
877; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
878; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
879; X86-SSE2-NEXT: pand %xmm2, %xmm1
880; X86-SSE2-NEXT: pandn %xmm0, %xmm2
881; X86-SSE2-NEXT: por %xmm1, %xmm2
882; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
883; X86-SSE2-NEXT: psrld $16, %xmm0
884; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
885; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
886; X86-SSE2-NEXT: pand %xmm1, %xmm2
887; X86-SSE2-NEXT: pandn %xmm0, %xmm1
888; X86-SSE2-NEXT: por %xmm2, %xmm1
889; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
890; X86-SSE2-NEXT: psrlw $8, %xmm0
891; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
892; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
893; X86-SSE2-NEXT: pand %xmm2, %xmm1
894; X86-SSE2-NEXT: pandn %xmm0, %xmm2
895; X86-SSE2-NEXT: por %xmm1, %xmm2
896; X86-SSE2-NEXT: movd %xmm2, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000897; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000898; X86-SSE2-NEXT: retl
899;
900; X86-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000901; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000902; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000903; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
904; X86-SSE42-NEXT: pxor %xmm1, %xmm0
905; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
906; X86-SSE42-NEXT: psrlw $8, %xmm2
907; X86-SSE42-NEXT: pminub %xmm0, %xmm2
908; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
909; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000910; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000911; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000912; X86-SSE42-NEXT: retl
913;
914; X86-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000915; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000916; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
917; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000918; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
919; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
920; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
921; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
922; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
923; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000924; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000925; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000926; X86-AVX1-NEXT: vzeroupper
927; X86-AVX1-NEXT: retl
928;
929; X86-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000930; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000931; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000932; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
933; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
934; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
935; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
936; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
937; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
938; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000939; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000940; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000941; X86-AVX2-NEXT: vzeroupper
942; X86-AVX2-NEXT: retl
943;
944; X64-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000945; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000946; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
947; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
948; X64-SSE2-NEXT: pand %xmm2, %xmm0
949; X64-SSE2-NEXT: pandn %xmm1, %xmm2
950; X64-SSE2-NEXT: por %xmm0, %xmm2
951; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
952; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
953; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
954; X64-SSE2-NEXT: pand %xmm1, %xmm2
955; X64-SSE2-NEXT: pandn %xmm0, %xmm1
956; X64-SSE2-NEXT: por %xmm2, %xmm1
957; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
958; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
959; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
960; X64-SSE2-NEXT: pand %xmm2, %xmm1
961; X64-SSE2-NEXT: pandn %xmm0, %xmm2
962; X64-SSE2-NEXT: por %xmm1, %xmm2
963; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
964; X64-SSE2-NEXT: psrld $16, %xmm0
965; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
966; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
967; X64-SSE2-NEXT: pand %xmm1, %xmm2
968; X64-SSE2-NEXT: pandn %xmm0, %xmm1
969; X64-SSE2-NEXT: por %xmm2, %xmm1
970; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
971; X64-SSE2-NEXT: psrlw $8, %xmm0
972; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
973; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
974; X64-SSE2-NEXT: pand %xmm2, %xmm1
975; X64-SSE2-NEXT: pandn %xmm0, %xmm2
976; X64-SSE2-NEXT: por %xmm1, %xmm2
977; X64-SSE2-NEXT: movd %xmm2, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000978; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000979; X64-SSE2-NEXT: retq
980;
981; X64-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000982; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000983; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000984; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
985; X64-SSE42-NEXT: pxor %xmm1, %xmm0
986; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
987; X64-SSE42-NEXT: psrlw $8, %xmm2
988; X64-SSE42-NEXT: pminub %xmm0, %xmm2
989; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
990; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000991; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000992; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000993; X64-SSE42-NEXT: retq
994;
995; X64-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000996; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000997; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
998; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000999; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1000; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1001; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1002; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1003; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1004; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001005; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001006; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001007; X64-AVX1-NEXT: vzeroupper
1008; X64-AVX1-NEXT: retq
1009;
1010; X64-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001011; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001012; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001013; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1014; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1015; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1016; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1017; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1018; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1019; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001020; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001021; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001022; X64-AVX2-NEXT: vzeroupper
1023; X64-AVX2-NEXT: retq
1024;
1025; X64-AVX512-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001026; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001027; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001028; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1029; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1030; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1031; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1032; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1033; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1034; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001035; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001036; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001037; X64-AVX512-NEXT: vzeroupper
1038; X64-AVX512-NEXT: retq
1039 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1040 %2 = icmp slt <32 x i8> %a0, %1
1041 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1042 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1043 %5 = icmp slt <32 x i8> %3, %4
1044 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1045 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1046 %8 = icmp slt <32 x i8> %6, %7
1047 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1048 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1049 %11 = icmp slt <32 x i8> %9, %10
1050 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1051 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1052 %14 = icmp slt <32 x i8> %12, %13
1053 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1054 %16 = extractelement <32 x i8> %15, i32 0
1055 ret i8 %16
1056}
1057
1058;
1059; 512-bit Vectors
1060;
1061
1062define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1063; X86-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001064; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001065; X86-SSE2-NEXT: subl $28, %esp
1066; X86-SSE2-NEXT: .cfi_def_cfa_offset 32
1067; X86-SSE2-NEXT: movdqa %xmm2, %xmm6
1068; X86-SSE2-NEXT: movdqa %xmm6, (%esp) ## 16-byte Spill
1069; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1070; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1071; X86-SSE2-NEXT: movdqa %xmm2, %xmm5
1072; X86-SSE2-NEXT: pxor %xmm4, %xmm5
1073; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1074; X86-SSE2-NEXT: movdqa %xmm6, %xmm7
1075; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1076; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1077; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1078; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1079; X86-SSE2-NEXT: pand %xmm5, %xmm6
1080; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1081; X86-SSE2-NEXT: por %xmm6, %xmm5
1082; X86-SSE2-NEXT: movdqa %xmm1, %xmm6
1083; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1084; X86-SSE2-NEXT: movdqa %xmm3, %xmm7
1085; X86-SSE2-NEXT: pxor %xmm4, %xmm7
1086; X86-SSE2-NEXT: movdqa %xmm7, %xmm0
1087; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm0
1088; X86-SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1089; X86-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1090; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
1091; X86-SSE2-NEXT: pand %xmm6, %xmm7
1092; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1093; X86-SSE2-NEXT: por %xmm7, %xmm6
1094; X86-SSE2-NEXT: pand %xmm6, %xmm1
1095; X86-SSE2-NEXT: pandn %xmm3, %xmm6
1096; X86-SSE2-NEXT: por %xmm1, %xmm6
1097; X86-SSE2-NEXT: pand %xmm5, %xmm2
1098; X86-SSE2-NEXT: pandn (%esp), %xmm5 ## 16-byte Folded Reload
1099; X86-SSE2-NEXT: por %xmm2, %xmm5
1100; X86-SSE2-NEXT: movdqa %xmm5, %xmm0
1101; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1102; X86-SSE2-NEXT: movdqa %xmm6, %xmm1
1103; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1104; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1105; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1106; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1107; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1108; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1109; X86-SSE2-NEXT: pand %xmm0, %xmm1
1110; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1111; X86-SSE2-NEXT: por %xmm1, %xmm0
1112; X86-SSE2-NEXT: pand %xmm0, %xmm5
1113; X86-SSE2-NEXT: pandn %xmm6, %xmm0
1114; X86-SSE2-NEXT: por %xmm5, %xmm0
1115; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1116; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1117; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1118; X86-SSE2-NEXT: pxor %xmm1, %xmm4
1119; X86-SSE2-NEXT: movdqa %xmm4, %xmm3
1120; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1121; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1122; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1123; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1124; X86-SSE2-NEXT: pand %xmm2, %xmm4
1125; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1126; X86-SSE2-NEXT: por %xmm4, %xmm2
1127; X86-SSE2-NEXT: pand %xmm2, %xmm0
1128; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1129; X86-SSE2-NEXT: por %xmm0, %xmm2
1130; X86-SSE2-NEXT: movd %xmm2, %eax
1131; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1132; X86-SSE2-NEXT: movd %xmm0, %edx
1133; X86-SSE2-NEXT: addl $28, %esp
1134; X86-SSE2-NEXT: retl
1135;
1136; X86-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001137; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001138; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
1139; X86-SSE42-NEXT: movdqa %xmm3, %xmm5
1140; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm5
1141; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
1142; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1143; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1144; X86-SSE42-NEXT: movdqa %xmm5, %xmm0
1145; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1146; X86-SSE42-NEXT: movapd %xmm3, %xmm0
1147; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1148; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1149; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1150; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
1151; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1152; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1153; X86-SSE42-NEXT: movd %xmm1, %eax
1154; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx
1155; X86-SSE42-NEXT: retl
1156;
1157; X86-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001158; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001159; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1160; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1161; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1162; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1163; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1164; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1165; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1166; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1167; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1168; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1169; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1170; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1171; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1172; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1173; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
1174; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1175; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1176; X86-AVX1-NEXT: vmovd %xmm0, %eax
1177; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
1178; X86-AVX1-NEXT: vzeroupper
1179; X86-AVX1-NEXT: retl
1180;
1181; X86-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001182; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001183; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1184; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1185; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1186; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1187; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1188; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1189; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1190; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1191; X86-AVX2-NEXT: vmovd %xmm0, %eax
1192; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
1193; X86-AVX2-NEXT: vzeroupper
1194; X86-AVX2-NEXT: retl
1195;
1196; X64-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001197; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001198; X64-SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
1199; X64-SSE2-NEXT: movdqa %xmm0, %xmm5
1200; X64-SSE2-NEXT: pxor %xmm9, %xmm5
1201; X64-SSE2-NEXT: movdqa %xmm2, %xmm6
1202; X64-SSE2-NEXT: pxor %xmm9, %xmm6
1203; X64-SSE2-NEXT: movdqa %xmm6, %xmm7
1204; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1205; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1206; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1207; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1208; X64-SSE2-NEXT: pand %xmm8, %xmm6
1209; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1210; X64-SSE2-NEXT: por %xmm6, %xmm5
1211; X64-SSE2-NEXT: movdqa %xmm1, %xmm6
1212; X64-SSE2-NEXT: pxor %xmm9, %xmm6
1213; X64-SSE2-NEXT: movdqa %xmm3, %xmm7
1214; X64-SSE2-NEXT: pxor %xmm9, %xmm7
1215; X64-SSE2-NEXT: movdqa %xmm7, %xmm4
1216; X64-SSE2-NEXT: pcmpgtd %xmm6, %xmm4
1217; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm4[0,0,2,2]
1218; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1219; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1220; X64-SSE2-NEXT: pand %xmm8, %xmm7
1221; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1222; X64-SSE2-NEXT: por %xmm7, %xmm6
1223; X64-SSE2-NEXT: pand %xmm6, %xmm1
1224; X64-SSE2-NEXT: pandn %xmm3, %xmm6
1225; X64-SSE2-NEXT: por %xmm1, %xmm6
1226; X64-SSE2-NEXT: pand %xmm5, %xmm0
1227; X64-SSE2-NEXT: pandn %xmm2, %xmm5
1228; X64-SSE2-NEXT: por %xmm0, %xmm5
1229; X64-SSE2-NEXT: movdqa %xmm5, %xmm0
1230; X64-SSE2-NEXT: pxor %xmm9, %xmm0
1231; X64-SSE2-NEXT: movdqa %xmm6, %xmm1
1232; X64-SSE2-NEXT: pxor %xmm9, %xmm1
1233; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1234; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1235; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
1236; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1237; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1238; X64-SSE2-NEXT: pand %xmm3, %xmm0
1239; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1240; X64-SSE2-NEXT: por %xmm0, %xmm1
1241; X64-SSE2-NEXT: pand %xmm1, %xmm5
1242; X64-SSE2-NEXT: pandn %xmm6, %xmm1
1243; X64-SSE2-NEXT: por %xmm5, %xmm1
1244; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1245; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1246; X64-SSE2-NEXT: pxor %xmm9, %xmm2
1247; X64-SSE2-NEXT: pxor %xmm0, %xmm9
1248; X64-SSE2-NEXT: movdqa %xmm9, %xmm3
1249; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1250; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1251; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm9
1252; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[1,1,3,3]
1253; X64-SSE2-NEXT: pand %xmm4, %xmm2
1254; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1255; X64-SSE2-NEXT: por %xmm2, %xmm3
1256; X64-SSE2-NEXT: pand %xmm3, %xmm1
1257; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1258; X64-SSE2-NEXT: por %xmm1, %xmm3
1259; X64-SSE2-NEXT: movq %xmm3, %rax
1260; X64-SSE2-NEXT: retq
1261;
1262; X64-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001263; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001264; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
1265; X64-SSE42-NEXT: movdqa %xmm3, %xmm5
1266; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm5
1267; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
1268; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1269; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1270; X64-SSE42-NEXT: movdqa %xmm5, %xmm0
1271; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1272; X64-SSE42-NEXT: movapd %xmm3, %xmm0
1273; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1274; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1275; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1276; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
1277; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1278; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1279; X64-SSE42-NEXT: movq %xmm1, %rax
1280; X64-SSE42-NEXT: retq
1281;
1282; X64-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001283; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001284; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1285; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1286; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1287; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1288; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1289; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1290; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1291; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1292; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1293; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1294; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1295; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1296; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1297; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1298; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
1299; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1300; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1301; X64-AVX1-NEXT: vmovq %xmm0, %rax
1302; X64-AVX1-NEXT: vzeroupper
1303; X64-AVX1-NEXT: retq
1304;
1305; X64-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001306; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001307; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1308; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1309; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1310; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1311; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1312; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1313; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1314; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1315; X64-AVX2-NEXT: vmovq %xmm0, %rax
1316; X64-AVX2-NEXT: vzeroupper
1317; X64-AVX2-NEXT: retq
1318;
1319; X64-AVX512-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001320; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001321; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1322; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1323; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1324; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1325; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1326; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1327; X64-AVX512-NEXT: vmovq %xmm0, %rax
1328; X64-AVX512-NEXT: vzeroupper
1329; X64-AVX512-NEXT: retq
1330 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1331 %2 = icmp slt <8 x i64> %a0, %1
1332 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1333 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1334 %5 = icmp slt <8 x i64> %3, %4
1335 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1336 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1337 %8 = icmp slt <8 x i64> %6, %7
1338 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1339 %10 = extractelement <8 x i64> %9, i32 0
1340 ret i64 %10
1341}
1342
1343define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1344; X86-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001345; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001346; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
1347; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1348; X86-SSE2-NEXT: movdqa %xmm2, %xmm5
1349; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm5
1350; X86-SSE2-NEXT: pand %xmm5, %xmm0
1351; X86-SSE2-NEXT: pandn %xmm2, %xmm5
1352; X86-SSE2-NEXT: por %xmm0, %xmm5
1353; X86-SSE2-NEXT: pand %xmm4, %xmm1
1354; X86-SSE2-NEXT: pandn %xmm3, %xmm4
1355; X86-SSE2-NEXT: por %xmm1, %xmm4
1356; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1357; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm0
1358; X86-SSE2-NEXT: pand %xmm0, %xmm5
1359; X86-SSE2-NEXT: pandn %xmm4, %xmm0
1360; X86-SSE2-NEXT: por %xmm5, %xmm0
1361; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1362; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1363; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1364; X86-SSE2-NEXT: pand %xmm2, %xmm0
1365; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1366; X86-SSE2-NEXT: por %xmm0, %xmm2
1367; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1368; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1369; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1370; X86-SSE2-NEXT: pand %xmm1, %xmm2
1371; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1372; X86-SSE2-NEXT: por %xmm2, %xmm1
1373; X86-SSE2-NEXT: movd %xmm1, %eax
1374; X86-SSE2-NEXT: retl
1375;
1376; X86-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001377; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001378; X86-SSE42-NEXT: pminsd %xmm3, %xmm1
1379; X86-SSE42-NEXT: pminsd %xmm2, %xmm0
1380; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1381; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1382; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
1383; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1384; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1385; X86-SSE42-NEXT: movd %xmm0, %eax
1386; X86-SSE42-NEXT: retl
1387;
1388; X86-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001389; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001390; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1391; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1392; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1393; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1394; X86-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1395; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1396; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1397; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1398; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1399; X86-AVX1-NEXT: vmovd %xmm0, %eax
1400; X86-AVX1-NEXT: vzeroupper
1401; X86-AVX1-NEXT: retl
1402;
1403; X86-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001404; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001405; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1406; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1407; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1408; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1409; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1410; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1411; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1412; X86-AVX2-NEXT: vmovd %xmm0, %eax
1413; X86-AVX2-NEXT: vzeroupper
1414; X86-AVX2-NEXT: retl
1415;
1416; X64-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001417; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001418; X64-SSE2-NEXT: movdqa %xmm3, %xmm4
1419; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1420; X64-SSE2-NEXT: movdqa %xmm2, %xmm5
1421; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm5
1422; X64-SSE2-NEXT: pand %xmm5, %xmm0
1423; X64-SSE2-NEXT: pandn %xmm2, %xmm5
1424; X64-SSE2-NEXT: por %xmm0, %xmm5
1425; X64-SSE2-NEXT: pand %xmm4, %xmm1
1426; X64-SSE2-NEXT: pandn %xmm3, %xmm4
1427; X64-SSE2-NEXT: por %xmm1, %xmm4
1428; X64-SSE2-NEXT: movdqa %xmm4, %xmm0
1429; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm0
1430; X64-SSE2-NEXT: pand %xmm0, %xmm5
1431; X64-SSE2-NEXT: pandn %xmm4, %xmm0
1432; X64-SSE2-NEXT: por %xmm5, %xmm0
1433; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1434; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1435; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1436; X64-SSE2-NEXT: pand %xmm2, %xmm0
1437; X64-SSE2-NEXT: pandn %xmm1, %xmm2
1438; X64-SSE2-NEXT: por %xmm0, %xmm2
1439; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1440; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1441; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1442; X64-SSE2-NEXT: pand %xmm1, %xmm2
1443; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1444; X64-SSE2-NEXT: por %xmm2, %xmm1
1445; X64-SSE2-NEXT: movd %xmm1, %eax
1446; X64-SSE2-NEXT: retq
1447;
1448; X64-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001449; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001450; X64-SSE42-NEXT: pminsd %xmm3, %xmm1
1451; X64-SSE42-NEXT: pminsd %xmm2, %xmm0
1452; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1453; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1454; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
1455; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1456; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1457; X64-SSE42-NEXT: movd %xmm0, %eax
1458; X64-SSE42-NEXT: retq
1459;
1460; X64-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001461; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001462; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1463; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1464; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1465; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1466; X64-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1467; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1468; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1469; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1470; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1471; X64-AVX1-NEXT: vmovd %xmm0, %eax
1472; X64-AVX1-NEXT: vzeroupper
1473; X64-AVX1-NEXT: retq
1474;
1475; X64-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001476; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001477; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1478; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1479; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1480; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1481; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1482; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1483; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1484; X64-AVX2-NEXT: vmovd %xmm0, %eax
1485; X64-AVX2-NEXT: vzeroupper
1486; X64-AVX2-NEXT: retq
1487;
1488; X64-AVX512-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001489; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001490; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1491; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1492; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1493; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1494; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1495; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1496; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1497; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1498; X64-AVX512-NEXT: vmovd %xmm0, %eax
1499; X64-AVX512-NEXT: vzeroupper
1500; X64-AVX512-NEXT: retq
1501 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1502 %2 = icmp slt <16 x i32> %a0, %1
1503 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1504 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1505 %5 = icmp slt <16 x i32> %3, %4
1506 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1507 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1508 %8 = icmp slt <16 x i32> %6, %7
1509 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1510 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1511 %11 = icmp slt <16 x i32> %9, %10
1512 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1513 %13 = extractelement <16 x i32> %12, i32 0
1514 ret i32 %13
1515}
1516
1517define i16 @test_reduce_v32i16(<32 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001518; X86-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001519; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001520; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
1521; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
1522; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1523; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1524; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1525; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1526; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1527; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1528; X86-SSE2-NEXT: psrld $16, %xmm1
1529; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1530; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001531; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001532; X86-SSE2-NEXT: retl
1533;
1534; X86-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001535; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001536; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
1537; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
1538; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
1539; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1540; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1541; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1542; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1543; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001544; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001545; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001546;
1547; X86-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001548; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001549; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1550; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1551; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1552; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1553; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001554; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1555; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1556; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1557; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001558; X86-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001559; X86-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001560; X86-AVX1-NEXT: vzeroupper
1561; X86-AVX1-NEXT: retl
1562;
1563; X86-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001564; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001565; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1566; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001567; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1568; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1569; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1570; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1571; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001572; X86-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001573; X86-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001574; X86-AVX2-NEXT: vzeroupper
1575; X86-AVX2-NEXT: retl
1576;
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001577; X64-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001578; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001579; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
1580; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
1581; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1582; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1583; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1584; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1585; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1586; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1587; X64-SSE2-NEXT: psrld $16, %xmm1
1588; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1589; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001590; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001591; X64-SSE2-NEXT: retq
1592;
1593; X64-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001594; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001595; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
1596; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
1597; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
1598; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1599; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1600; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1601; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1602; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001603; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001604; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001605;
1606; X64-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001607; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001608; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1609; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1610; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1611; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1612; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001613; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1614; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1615; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1616; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001617; X64-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001618; X64-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001619; X64-AVX1-NEXT: vzeroupper
1620; X64-AVX1-NEXT: retq
1621;
1622; X64-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001623; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001624; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1625; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001626; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1627; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1628; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1629; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1630; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001631; X64-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001632; X64-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001633; X64-AVX2-NEXT: vzeroupper
1634; X64-AVX2-NEXT: retq
1635;
1636; X64-AVX512-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001637; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001638; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001639; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001640; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001641; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1642; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
1643; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1644; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1645; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001646; X64-AVX512-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001647; X64-AVX512-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001648; X64-AVX512-NEXT: vzeroupper
1649; X64-AVX512-NEXT: retq
1650 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1651 %2 = icmp slt <32 x i16> %a0, %1
1652 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1653 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1654 %5 = icmp slt <32 x i16> %3, %4
1655 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1656 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1657 %8 = icmp slt <32 x i16> %6, %7
1658 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1659 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1660 %11 = icmp slt <32 x i16> %9, %10
1661 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1662 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1663 %14 = icmp slt <32 x i16> %12, %13
1664 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1665 %16 = extractelement <32 x i16> %15, i32 0
1666 ret i16 %16
1667}
1668
1669define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1670; X86-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001671; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001672; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
1673; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm4
1674; X86-SSE2-NEXT: movdqa %xmm2, %xmm5
1675; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm5
1676; X86-SSE2-NEXT: pand %xmm5, %xmm0
1677; X86-SSE2-NEXT: pandn %xmm2, %xmm5
1678; X86-SSE2-NEXT: por %xmm0, %xmm5
1679; X86-SSE2-NEXT: pand %xmm4, %xmm1
1680; X86-SSE2-NEXT: pandn %xmm3, %xmm4
1681; X86-SSE2-NEXT: por %xmm1, %xmm4
1682; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1683; X86-SSE2-NEXT: pcmpgtb %xmm5, %xmm0
1684; X86-SSE2-NEXT: pand %xmm0, %xmm5
1685; X86-SSE2-NEXT: pandn %xmm4, %xmm0
1686; X86-SSE2-NEXT: por %xmm5, %xmm0
1687; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1688; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1689; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1690; X86-SSE2-NEXT: pand %xmm2, %xmm0
1691; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1692; X86-SSE2-NEXT: por %xmm0, %xmm2
1693; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1694; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1695; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1696; X86-SSE2-NEXT: pand %xmm1, %xmm2
1697; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1698; X86-SSE2-NEXT: por %xmm2, %xmm1
1699; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1700; X86-SSE2-NEXT: psrld $16, %xmm0
1701; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1702; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1703; X86-SSE2-NEXT: pand %xmm2, %xmm1
1704; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1705; X86-SSE2-NEXT: por %xmm1, %xmm2
1706; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
1707; X86-SSE2-NEXT: psrlw $8, %xmm0
1708; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1709; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1710; X86-SSE2-NEXT: pand %xmm1, %xmm2
1711; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1712; X86-SSE2-NEXT: por %xmm2, %xmm1
1713; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001714; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001715; X86-SSE2-NEXT: retl
1716;
1717; X86-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001718; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001719; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
1720; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
1721; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001722; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1723; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1724; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
1725; X86-SSE42-NEXT: psrlw $8, %xmm2
1726; X86-SSE42-NEXT: pminub %xmm0, %xmm2
1727; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
1728; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001729; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001730; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001731; X86-SSE42-NEXT: retl
1732;
1733; X86-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001734; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001735; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1736; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1737; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1738; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1739; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001740; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1741; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1742; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1743; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1744; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1745; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001746; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001747; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001748; X86-AVX1-NEXT: vzeroupper
1749; X86-AVX1-NEXT: retl
1750;
1751; X86-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001752; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001753; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1754; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001755; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1756; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1757; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1758; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1759; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1760; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1761; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001762; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001763; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001764; X86-AVX2-NEXT: vzeroupper
1765; X86-AVX2-NEXT: retl
1766;
1767; X64-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001768; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001769; X64-SSE2-NEXT: movdqa %xmm3, %xmm4
1770; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm4
1771; X64-SSE2-NEXT: movdqa %xmm2, %xmm5
1772; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm5
1773; X64-SSE2-NEXT: pand %xmm5, %xmm0
1774; X64-SSE2-NEXT: pandn %xmm2, %xmm5
1775; X64-SSE2-NEXT: por %xmm0, %xmm5
1776; X64-SSE2-NEXT: pand %xmm4, %xmm1
1777; X64-SSE2-NEXT: pandn %xmm3, %xmm4
1778; X64-SSE2-NEXT: por %xmm1, %xmm4
1779; X64-SSE2-NEXT: movdqa %xmm4, %xmm0
1780; X64-SSE2-NEXT: pcmpgtb %xmm5, %xmm0
1781; X64-SSE2-NEXT: pand %xmm0, %xmm5
1782; X64-SSE2-NEXT: pandn %xmm4, %xmm0
1783; X64-SSE2-NEXT: por %xmm5, %xmm0
1784; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1785; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1786; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1787; X64-SSE2-NEXT: pand %xmm2, %xmm0
1788; X64-SSE2-NEXT: pandn %xmm1, %xmm2
1789; X64-SSE2-NEXT: por %xmm0, %xmm2
1790; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1791; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1792; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1793; X64-SSE2-NEXT: pand %xmm1, %xmm2
1794; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1795; X64-SSE2-NEXT: por %xmm2, %xmm1
1796; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
1797; X64-SSE2-NEXT: psrld $16, %xmm0
1798; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1799; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1800; X64-SSE2-NEXT: pand %xmm2, %xmm1
1801; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1802; X64-SSE2-NEXT: por %xmm1, %xmm2
1803; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
1804; X64-SSE2-NEXT: psrlw $8, %xmm0
1805; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1806; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1807; X64-SSE2-NEXT: pand %xmm1, %xmm2
1808; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1809; X64-SSE2-NEXT: por %xmm2, %xmm1
1810; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001811; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001812; X64-SSE2-NEXT: retq
1813;
1814; X64-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001815; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001816; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
1817; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
1818; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001819; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1820; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1821; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
1822; X64-SSE42-NEXT: psrlw $8, %xmm2
1823; X64-SSE42-NEXT: pminub %xmm0, %xmm2
1824; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
1825; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001826; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001827; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001828; X64-SSE42-NEXT: retq
1829;
1830; X64-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001831; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001832; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1833; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1834; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1835; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1836; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001837; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1838; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1839; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1840; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1841; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1842; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001843; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001844; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001845; X64-AVX1-NEXT: vzeroupper
1846; X64-AVX1-NEXT: retq
1847;
1848; X64-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001849; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001850; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1851; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001852; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1853; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1854; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1855; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1856; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1857; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1858; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001859; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001860; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001861; X64-AVX2-NEXT: vzeroupper
1862; X64-AVX2-NEXT: retq
1863;
1864; X64-AVX512-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001865; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001866; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001867; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001868; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001869; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1870; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1871; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1872; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1873; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1874; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1875; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001876; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001877; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001878; X64-AVX512-NEXT: vzeroupper
1879; X64-AVX512-NEXT: retq
1880 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1881 %2 = icmp slt <64 x i8> %a0, %1
1882 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1883 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1884 %5 = icmp slt <64 x i8> %3, %4
1885 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1886 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1887 %8 = icmp slt <64 x i8> %6, %7
1888 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1889 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1890 %11 = icmp slt <64 x i8> %9, %10
1891 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1892 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1893 %14 = icmp slt <64 x i8> %12, %13
1894 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1895 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1896 %17 = icmp slt <64 x i8> %15, %16
1897 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1898 %19 = extractelement <64 x i8> %18, i32 0
1899 ret i8 %19
1900}