blob: fa92158ae92d0ce10241c039fafd79fcad53a931 [file] [log] [blame]
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000019; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT: pxor %xmm2, %xmm3
23; X86-SSE2-NEXT: pxor %xmm1, %xmm2
24; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
25; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4
26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT: pand %xmm5, %xmm2
30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT: por %xmm2, %xmm3
32; X86-SSE2-NEXT: pand %xmm3, %xmm0
33; X86-SSE2-NEXT: pandn %xmm1, %xmm3
34; X86-SSE2-NEXT: por %xmm0, %xmm3
35; X86-SSE2-NEXT: movd %xmm3, %eax
36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
37; X86-SSE2-NEXT: movd %xmm0, %edx
38; X86-SSE2-NEXT: retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000041; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000042; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
44; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
45; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
46; X86-SSE42-NEXT: movd %xmm2, %eax
47; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
48; X86-SSE42-NEXT: retl
49;
50; X86-AVX-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000051; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000052; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
53; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
54; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
55; X86-AVX-NEXT: vmovd %xmm0, %eax
56; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
57; X86-AVX-NEXT: retl
58;
59; X64-SSE2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000060; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000061; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
62; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
63; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
64; X64-SSE2-NEXT: pxor %xmm2, %xmm3
65; X64-SSE2-NEXT: pxor %xmm1, %xmm2
66; X64-SSE2-NEXT: movdqa %xmm3, %xmm4
67; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4
68; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
69; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
70; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
71; X64-SSE2-NEXT: pand %xmm5, %xmm2
72; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
73; X64-SSE2-NEXT: por %xmm2, %xmm3
74; X64-SSE2-NEXT: pand %xmm3, %xmm0
75; X64-SSE2-NEXT: pandn %xmm1, %xmm3
76; X64-SSE2-NEXT: por %xmm0, %xmm3
77; X64-SSE2-NEXT: movq %xmm3, %rax
78; X64-SSE2-NEXT: retq
79;
80; X64-SSE42-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000081; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000082; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
83; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
84; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
85; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
86; X64-SSE42-NEXT: movq %xmm2, %rax
87; X64-SSE42-NEXT: retq
88;
89; X64-AVX1-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000090; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000091; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
92; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
93; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
94; X64-AVX1-NEXT: vmovq %xmm0, %rax
95; X64-AVX1-NEXT: retq
96;
97; X64-AVX2-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +000099; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
100; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
101; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
102; X64-AVX2-NEXT: vmovq %xmm0, %rax
103; X64-AVX2-NEXT: retq
104;
105; X64-AVX512-LABEL: test_reduce_v2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000106; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000107; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
108; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
109; X64-AVX512-NEXT: vmovq %xmm0, %rax
110; X64-AVX512-NEXT: retq
111 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
112 %2 = icmp sgt <2 x i64> %a0, %1
113 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
114 %4 = extractelement <2 x i64> %3, i32 0
115 ret i64 %4
116}
117
118define i32 @test_reduce_v4i32(<4 x i32> %a0) {
119; X86-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000120; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000121; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
122; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
123; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
124; X86-SSE2-NEXT: pand %xmm2, %xmm0
125; X86-SSE2-NEXT: pandn %xmm1, %xmm2
126; X86-SSE2-NEXT: por %xmm0, %xmm2
127; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
128; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
129; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
130; X86-SSE2-NEXT: pand %xmm1, %xmm2
131; X86-SSE2-NEXT: pandn %xmm0, %xmm1
132; X86-SSE2-NEXT: por %xmm2, %xmm1
133; X86-SSE2-NEXT: movd %xmm1, %eax
134; X86-SSE2-NEXT: retl
135;
136; X86-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000137; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000138; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
139; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
140; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
141; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
142; X86-SSE42-NEXT: movd %xmm0, %eax
143; X86-SSE42-NEXT: retl
144;
145; X86-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000146; X86-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000147; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
148; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
149; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
150; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
151; X86-AVX-NEXT: vmovd %xmm0, %eax
152; X86-AVX-NEXT: retl
153;
154; X64-SSE2-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000155; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000156; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
157; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
158; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
159; X64-SSE2-NEXT: pand %xmm2, %xmm0
160; X64-SSE2-NEXT: pandn %xmm1, %xmm2
161; X64-SSE2-NEXT: por %xmm0, %xmm2
162; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
163; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
164; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
165; X64-SSE2-NEXT: pand %xmm1, %xmm2
166; X64-SSE2-NEXT: pandn %xmm0, %xmm1
167; X64-SSE2-NEXT: por %xmm2, %xmm1
168; X64-SSE2-NEXT: movd %xmm1, %eax
169; X64-SSE2-NEXT: retq
170;
171; X64-SSE42-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000172; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000173; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
174; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
175; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
176; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
177; X64-SSE42-NEXT: movd %xmm0, %eax
178; X64-SSE42-NEXT: retq
179;
180; X64-AVX-LABEL: test_reduce_v4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000181; X64-AVX: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000182; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
183; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
184; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
185; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
186; X64-AVX-NEXT: vmovd %xmm0, %eax
187; X64-AVX-NEXT: retq
188 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
189 %2 = icmp sgt <4 x i32> %a0, %1
190 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
191 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
192 %5 = icmp sgt <4 x i32> %3, %4
193 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
194 %7 = extractelement <4 x i32> %6, i32 0
195 ret i32 %7
196}
197
198define i16 @test_reduce_v8i16(<8 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000199; X86-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000200; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000201; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
202; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
203; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
204; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
205; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
206; X86-SSE2-NEXT: psrld $16, %xmm1
207; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
208; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000209; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000210; X86-SSE2-NEXT: retl
211;
212; X86-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000213; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000214; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
215; X86-SSE42-NEXT: pxor %xmm1, %xmm0
216; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
217; X86-SSE42-NEXT: pxor %xmm1, %xmm0
218; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000219; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000220; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000221;
222; X86-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; X86-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000224; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
225; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
226; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
227; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000228; X86-AVX-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000229; X86-AVX-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000230; X86-AVX-NEXT: retl
231;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000232; X64-SSE2-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000233; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000234; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
235; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
236; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
237; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
238; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
239; X64-SSE2-NEXT: psrld $16, %xmm1
240; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
241; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000242; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000243; X64-SSE2-NEXT: retq
244;
245; X64-SSE42-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000246; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000247; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
248; X64-SSE42-NEXT: pxor %xmm1, %xmm0
249; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
250; X64-SSE42-NEXT: pxor %xmm1, %xmm0
251; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000252; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000253; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000254;
255; X64-AVX-LABEL: test_reduce_v8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000256; X64-AVX: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000257; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
258; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
259; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
260; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000261; X64-AVX-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000262; X64-AVX-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000263; X64-AVX-NEXT: retq
264 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
265 %2 = icmp sgt <8 x i16> %a0, %1
266 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
267 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
268 %5 = icmp sgt <8 x i16> %3, %4
269 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
270 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
271 %8 = icmp sgt <8 x i16> %6, %7
272 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
273 %10 = extractelement <8 x i16> %9, i32 0
274 ret i16 %10
275}
276
277define i8 @test_reduce_v16i8(<16 x i8> %a0) {
278; X86-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000279; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000280; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
281; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
282; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
283; X86-SSE2-NEXT: pand %xmm2, %xmm0
284; X86-SSE2-NEXT: pandn %xmm1, %xmm2
285; X86-SSE2-NEXT: por %xmm0, %xmm2
286; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
287; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
288; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
289; X86-SSE2-NEXT: pand %xmm1, %xmm2
290; X86-SSE2-NEXT: pandn %xmm0, %xmm1
291; X86-SSE2-NEXT: por %xmm2, %xmm1
292; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
293; X86-SSE2-NEXT: psrld $16, %xmm0
294; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
295; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
296; X86-SSE2-NEXT: pand %xmm2, %xmm1
297; X86-SSE2-NEXT: pandn %xmm0, %xmm2
298; X86-SSE2-NEXT: por %xmm1, %xmm2
299; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
300; X86-SSE2-NEXT: psrlw $8, %xmm0
301; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
302; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
303; X86-SSE2-NEXT: pand %xmm1, %xmm2
304; X86-SSE2-NEXT: pandn %xmm0, %xmm1
305; X86-SSE2-NEXT: por %xmm2, %xmm1
306; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000307; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000308; X86-SSE2-NEXT: retl
309;
310; X86-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000311; X86-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000312; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
313; X86-SSE42-NEXT: pxor %xmm1, %xmm0
314; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
315; X86-SSE42-NEXT: psrlw $8, %xmm2
316; X86-SSE42-NEXT: pminub %xmm0, %xmm2
317; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
318; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000319; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000320; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000321; X86-SSE42-NEXT: retl
322;
323; X86-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000324; X86-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000325; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
326; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
327; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
328; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
329; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
330; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000331; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000332; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000333; X86-AVX-NEXT: retl
334;
335; X64-SSE2-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000336; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000337; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
338; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
339; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
340; X64-SSE2-NEXT: pand %xmm2, %xmm0
341; X64-SSE2-NEXT: pandn %xmm1, %xmm2
342; X64-SSE2-NEXT: por %xmm0, %xmm2
343; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
344; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
345; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
346; X64-SSE2-NEXT: pand %xmm1, %xmm2
347; X64-SSE2-NEXT: pandn %xmm0, %xmm1
348; X64-SSE2-NEXT: por %xmm2, %xmm1
349; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
350; X64-SSE2-NEXT: psrld $16, %xmm0
351; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
352; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
353; X64-SSE2-NEXT: pand %xmm2, %xmm1
354; X64-SSE2-NEXT: pandn %xmm0, %xmm2
355; X64-SSE2-NEXT: por %xmm1, %xmm2
356; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
357; X64-SSE2-NEXT: psrlw $8, %xmm0
358; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
359; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
360; X64-SSE2-NEXT: pand %xmm1, %xmm2
361; X64-SSE2-NEXT: pandn %xmm0, %xmm1
362; X64-SSE2-NEXT: por %xmm2, %xmm1
363; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000364; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000365; X64-SSE2-NEXT: retq
366;
367; X64-SSE42-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000368; X64-SSE42: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000369; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
370; X64-SSE42-NEXT: pxor %xmm1, %xmm0
371; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
372; X64-SSE42-NEXT: psrlw $8, %xmm2
373; X64-SSE42-NEXT: pminub %xmm0, %xmm2
374; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
375; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000376; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000377; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000378; X64-SSE42-NEXT: retq
379;
380; X64-AVX-LABEL: test_reduce_v16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000381; X64-AVX: ## %bb.0:
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000382; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
383; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
384; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
385; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
386; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
387; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000388; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000389; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000390; X64-AVX-NEXT: retq
391 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
392 %2 = icmp sgt <16 x i8> %a0, %1
393 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
394 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
395 %5 = icmp sgt <16 x i8> %3, %4
396 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
397 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
398 %8 = icmp sgt <16 x i8> %6, %7
399 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
400 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
401 %11 = icmp sgt <16 x i8> %9, %10
402 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
403 %13 = extractelement <16 x i8> %12, i32 0
404 ret i8 %13
405}
406
407;
408; 256-bit Vectors
409;
410
411define i64 @test_reduce_v4i64(<4 x i64> %a0) {
412; X86-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000413; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000414; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
415; X86-SSE2-NEXT: movdqa %xmm1, %xmm3
416; X86-SSE2-NEXT: pxor %xmm2, %xmm3
417; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
418; X86-SSE2-NEXT: pxor %xmm2, %xmm4
419; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
420; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
421; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
422; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
423; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
424; X86-SSE2-NEXT: pand %xmm6, %xmm3
425; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
426; X86-SSE2-NEXT: por %xmm3, %xmm4
427; X86-SSE2-NEXT: pand %xmm4, %xmm0
428; X86-SSE2-NEXT: pandn %xmm1, %xmm4
429; X86-SSE2-NEXT: por %xmm0, %xmm4
430; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
431; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
432; X86-SSE2-NEXT: pxor %xmm2, %xmm1
433; X86-SSE2-NEXT: pxor %xmm0, %xmm2
434; X86-SSE2-NEXT: movdqa %xmm1, %xmm3
435; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
436; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
437; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
438; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
439; X86-SSE2-NEXT: pand %xmm5, %xmm1
440; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
441; X86-SSE2-NEXT: por %xmm1, %xmm2
442; X86-SSE2-NEXT: pand %xmm2, %xmm4
443; X86-SSE2-NEXT: pandn %xmm0, %xmm2
444; X86-SSE2-NEXT: por %xmm4, %xmm2
445; X86-SSE2-NEXT: movd %xmm2, %eax
446; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
447; X86-SSE2-NEXT: movd %xmm0, %edx
448; X86-SSE2-NEXT: retl
449;
450; X86-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000451; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000452; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
453; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
454; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
455; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
456; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
457; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
458; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
459; X86-SSE42-NEXT: movd %xmm2, %eax
460; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
461; X86-SSE42-NEXT: retl
462;
463; X86-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000464; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000465; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
466; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
467; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
468; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
469; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
470; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
471; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
472; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
473; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
474; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
475; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
476; X86-AVX1-NEXT: vmovd %xmm0, %eax
477; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
478; X86-AVX1-NEXT: vzeroupper
479; X86-AVX1-NEXT: retl
480;
481; X86-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000482; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000483; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
484; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
485; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
486; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
487; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
488; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
489; X86-AVX2-NEXT: vmovd %xmm0, %eax
490; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
491; X86-AVX2-NEXT: vzeroupper
492; X86-AVX2-NEXT: retl
493;
494; X64-SSE2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000495; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000496; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
497; X64-SSE2-NEXT: movdqa %xmm1, %xmm3
498; X64-SSE2-NEXT: pxor %xmm2, %xmm3
499; X64-SSE2-NEXT: movdqa %xmm0, %xmm4
500; X64-SSE2-NEXT: pxor %xmm2, %xmm4
501; X64-SSE2-NEXT: movdqa %xmm4, %xmm5
502; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
503; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
504; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
505; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
506; X64-SSE2-NEXT: pand %xmm6, %xmm3
507; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
508; X64-SSE2-NEXT: por %xmm3, %xmm4
509; X64-SSE2-NEXT: pand %xmm4, %xmm0
510; X64-SSE2-NEXT: pandn %xmm1, %xmm4
511; X64-SSE2-NEXT: por %xmm0, %xmm4
512; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
513; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
514; X64-SSE2-NEXT: pxor %xmm2, %xmm1
515; X64-SSE2-NEXT: pxor %xmm0, %xmm2
516; X64-SSE2-NEXT: movdqa %xmm1, %xmm3
517; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
518; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
519; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
520; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
521; X64-SSE2-NEXT: pand %xmm5, %xmm1
522; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
523; X64-SSE2-NEXT: por %xmm1, %xmm2
524; X64-SSE2-NEXT: pand %xmm2, %xmm4
525; X64-SSE2-NEXT: pandn %xmm0, %xmm2
526; X64-SSE2-NEXT: por %xmm4, %xmm2
527; X64-SSE2-NEXT: movq %xmm2, %rax
528; X64-SSE2-NEXT: retq
529;
530; X64-SSE42-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000531; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000532; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
533; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
534; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
535; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
536; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
537; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
538; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
539; X64-SSE42-NEXT: movq %xmm2, %rax
540; X64-SSE42-NEXT: retq
541;
542; X64-AVX1-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000543; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000544; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
545; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
546; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
547; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
548; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
549; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
550; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
551; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
552; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
553; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
554; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
555; X64-AVX1-NEXT: vmovq %xmm0, %rax
556; X64-AVX1-NEXT: vzeroupper
557; X64-AVX1-NEXT: retq
558;
559; X64-AVX2-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000560; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000561; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
562; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
563; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
564; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
565; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
566; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
567; X64-AVX2-NEXT: vmovq %xmm0, %rax
568; X64-AVX2-NEXT: vzeroupper
569; X64-AVX2-NEXT: retq
570;
571; X64-AVX512-LABEL: test_reduce_v4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000572; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000573; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
574; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
575; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
576; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
577; X64-AVX512-NEXT: vmovq %xmm0, %rax
578; X64-AVX512-NEXT: vzeroupper
579; X64-AVX512-NEXT: retq
580 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
581 %2 = icmp sgt <4 x i64> %a0, %1
582 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
583 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
584 %5 = icmp sgt <4 x i64> %3, %4
585 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
586 %7 = extractelement <4 x i64> %6, i32 0
587 ret i64 %7
588}
589
590define i32 @test_reduce_v8i32(<8 x i32> %a0) {
591; X86-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000592; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000593; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
594; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
595; X86-SSE2-NEXT: pand %xmm2, %xmm0
596; X86-SSE2-NEXT: pandn %xmm1, %xmm2
597; X86-SSE2-NEXT: por %xmm0, %xmm2
598; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
599; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
600; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
601; X86-SSE2-NEXT: pand %xmm1, %xmm2
602; X86-SSE2-NEXT: pandn %xmm0, %xmm1
603; X86-SSE2-NEXT: por %xmm2, %xmm1
604; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
605; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
606; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
607; X86-SSE2-NEXT: pand %xmm2, %xmm1
608; X86-SSE2-NEXT: pandn %xmm0, %xmm2
609; X86-SSE2-NEXT: por %xmm1, %xmm2
610; X86-SSE2-NEXT: movd %xmm2, %eax
611; X86-SSE2-NEXT: retl
612;
613; X86-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000614; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000615; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
616; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
617; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
618; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
619; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
620; X86-SSE42-NEXT: movd %xmm0, %eax
621; X86-SSE42-NEXT: retl
622;
623; X86-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000624; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000625; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
626; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
627; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
628; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
629; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
630; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
631; X86-AVX1-NEXT: vmovd %xmm0, %eax
632; X86-AVX1-NEXT: vzeroupper
633; X86-AVX1-NEXT: retl
634;
635; X86-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000636; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000637; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
638; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
639; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
640; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
641; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
642; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
643; X86-AVX2-NEXT: vmovd %xmm0, %eax
644; X86-AVX2-NEXT: vzeroupper
645; X86-AVX2-NEXT: retl
646;
647; X64-SSE2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000648; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000649; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
650; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
651; X64-SSE2-NEXT: pand %xmm2, %xmm0
652; X64-SSE2-NEXT: pandn %xmm1, %xmm2
653; X64-SSE2-NEXT: por %xmm0, %xmm2
654; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
655; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
656; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
657; X64-SSE2-NEXT: pand %xmm1, %xmm2
658; X64-SSE2-NEXT: pandn %xmm0, %xmm1
659; X64-SSE2-NEXT: por %xmm2, %xmm1
660; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
661; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
662; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
663; X64-SSE2-NEXT: pand %xmm2, %xmm1
664; X64-SSE2-NEXT: pandn %xmm0, %xmm2
665; X64-SSE2-NEXT: por %xmm1, %xmm2
666; X64-SSE2-NEXT: movd %xmm2, %eax
667; X64-SSE2-NEXT: retq
668;
669; X64-SSE42-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000670; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000671; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
672; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
673; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
674; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
675; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
676; X64-SSE42-NEXT: movd %xmm0, %eax
677; X64-SSE42-NEXT: retq
678;
679; X64-AVX1-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000680; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000681; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
682; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
683; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
684; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
685; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
686; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
687; X64-AVX1-NEXT: vmovd %xmm0, %eax
688; X64-AVX1-NEXT: vzeroupper
689; X64-AVX1-NEXT: retq
690;
691; X64-AVX2-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000692; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000693; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
694; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
695; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
696; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
697; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
698; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
699; X64-AVX2-NEXT: vmovd %xmm0, %eax
700; X64-AVX2-NEXT: vzeroupper
701; X64-AVX2-NEXT: retq
702;
703; X64-AVX512-LABEL: test_reduce_v8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000704; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000705; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
706; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
707; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
708; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
709; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
710; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
711; X64-AVX512-NEXT: vmovd %xmm0, %eax
712; X64-AVX512-NEXT: vzeroupper
713; X64-AVX512-NEXT: retq
714 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
715 %2 = icmp sgt <8 x i32> %a0, %1
716 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
717 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
718 %5 = icmp sgt <8 x i32> %3, %4
719 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
720 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
721 %8 = icmp sgt <8 x i32> %6, %7
722 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
723 %10 = extractelement <8 x i32> %9, i32 0
724 ret i32 %10
725}
726
727define i16 @test_reduce_v16i16(<16 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000728; X86-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000729; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000730; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
731; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
732; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
733; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
734; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
735; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
736; X86-SSE2-NEXT: psrld $16, %xmm1
737; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
738; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000739; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000740; X86-SSE2-NEXT: retl
741;
742; X86-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000743; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000744; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
745; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
746; X86-SSE42-NEXT: pxor %xmm1, %xmm0
747; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
748; X86-SSE42-NEXT: pxor %xmm1, %xmm0
749; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000750; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000751; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000752;
753; X86-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000754; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000755; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
756; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000757; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
758; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
759; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
760; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000761; X86-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000762; X86-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000763; X86-AVX1-NEXT: vzeroupper
764; X86-AVX1-NEXT: retl
765;
766; X86-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000767; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000768; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000769; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
770; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
771; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
772; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
773; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000774; X86-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000775; X86-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000776; X86-AVX2-NEXT: vzeroupper
777; X86-AVX2-NEXT: retl
778;
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000779; X64-SSE2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000780; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000781; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
782; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
783; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
784; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
785; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
786; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
787; X64-SSE2-NEXT: psrld $16, %xmm1
788; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
789; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000790; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000791; X64-SSE2-NEXT: retq
792;
793; X64-SSE42-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000794; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000795; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
796; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
797; X64-SSE42-NEXT: pxor %xmm1, %xmm0
798; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
799; X64-SSE42-NEXT: pxor %xmm1, %xmm0
800; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000801; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000802; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000803;
804; X64-AVX1-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000805; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000806; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
807; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000808; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
809; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
810; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
811; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000812; X64-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000813; X64-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000814; X64-AVX1-NEXT: vzeroupper
815; X64-AVX1-NEXT: retq
816;
817; X64-AVX2-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000818; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000819; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000820; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
821; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
822; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
823; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
824; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000825; X64-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000826; X64-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000827; X64-AVX2-NEXT: vzeroupper
828; X64-AVX2-NEXT: retq
829;
830; X64-AVX512-LABEL: test_reduce_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000831; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000832; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +0000833; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
834; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
835; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
836; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
837; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000838; X64-AVX512-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000839; X64-AVX512-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000840; X64-AVX512-NEXT: vzeroupper
841; X64-AVX512-NEXT: retq
842 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
843 %2 = icmp sgt <16 x i16> %a0, %1
844 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
845 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
846 %5 = icmp sgt <16 x i16> %3, %4
847 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
848 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
849 %8 = icmp sgt <16 x i16> %6, %7
850 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
851 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
852 %11 = icmp sgt <16 x i16> %9, %10
853 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
854 %13 = extractelement <16 x i16> %12, i32 0
855 ret i16 %13
856}
857
858define i8 @test_reduce_v32i8(<32 x i8> %a0) {
859; X86-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000860; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000861; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
862; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
863; X86-SSE2-NEXT: pand %xmm2, %xmm0
864; X86-SSE2-NEXT: pandn %xmm1, %xmm2
865; X86-SSE2-NEXT: por %xmm0, %xmm2
866; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
867; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
868; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
869; X86-SSE2-NEXT: pand %xmm1, %xmm2
870; X86-SSE2-NEXT: pandn %xmm0, %xmm1
871; X86-SSE2-NEXT: por %xmm2, %xmm1
872; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
873; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
874; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
875; X86-SSE2-NEXT: pand %xmm2, %xmm1
876; X86-SSE2-NEXT: pandn %xmm0, %xmm2
877; X86-SSE2-NEXT: por %xmm1, %xmm2
878; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
879; X86-SSE2-NEXT: psrld $16, %xmm0
880; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
881; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
882; X86-SSE2-NEXT: pand %xmm1, %xmm2
883; X86-SSE2-NEXT: pandn %xmm0, %xmm1
884; X86-SSE2-NEXT: por %xmm2, %xmm1
885; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
886; X86-SSE2-NEXT: psrlw $8, %xmm0
887; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
888; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
889; X86-SSE2-NEXT: pand %xmm2, %xmm1
890; X86-SSE2-NEXT: pandn %xmm0, %xmm2
891; X86-SSE2-NEXT: por %xmm1, %xmm2
892; X86-SSE2-NEXT: movd %xmm2, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000893; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000894; X86-SSE2-NEXT: retl
895;
896; X86-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000897; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000898; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000899; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
900; X86-SSE42-NEXT: pxor %xmm1, %xmm0
901; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
902; X86-SSE42-NEXT: psrlw $8, %xmm2
903; X86-SSE42-NEXT: pminub %xmm0, %xmm2
904; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
905; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000906; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000907; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000908; X86-SSE42-NEXT: retl
909;
910; X86-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000911; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000912; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
913; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000914; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
915; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
916; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
917; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
918; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
919; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000920; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000921; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000922; X86-AVX1-NEXT: vzeroupper
923; X86-AVX1-NEXT: retl
924;
925; X86-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000926; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000927; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000928; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
929; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
930; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
931; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
932; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
933; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
934; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000935; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000936; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000937; X86-AVX2-NEXT: vzeroupper
938; X86-AVX2-NEXT: retl
939;
940; X64-SSE2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000941; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000942; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
943; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
944; X64-SSE2-NEXT: pand %xmm2, %xmm0
945; X64-SSE2-NEXT: pandn %xmm1, %xmm2
946; X64-SSE2-NEXT: por %xmm0, %xmm2
947; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
948; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
949; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
950; X64-SSE2-NEXT: pand %xmm1, %xmm2
951; X64-SSE2-NEXT: pandn %xmm0, %xmm1
952; X64-SSE2-NEXT: por %xmm2, %xmm1
953; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
954; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
955; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
956; X64-SSE2-NEXT: pand %xmm2, %xmm1
957; X64-SSE2-NEXT: pandn %xmm0, %xmm2
958; X64-SSE2-NEXT: por %xmm1, %xmm2
959; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
960; X64-SSE2-NEXT: psrld $16, %xmm0
961; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
962; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
963; X64-SSE2-NEXT: pand %xmm1, %xmm2
964; X64-SSE2-NEXT: pandn %xmm0, %xmm1
965; X64-SSE2-NEXT: por %xmm2, %xmm1
966; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
967; X64-SSE2-NEXT: psrlw $8, %xmm0
968; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
969; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
970; X64-SSE2-NEXT: pand %xmm2, %xmm1
971; X64-SSE2-NEXT: pandn %xmm0, %xmm2
972; X64-SSE2-NEXT: por %xmm1, %xmm2
973; X64-SSE2-NEXT: movd %xmm2, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000974; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000975; X64-SSE2-NEXT: retq
976;
977; X64-SSE42-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000978; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000979; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000980; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
981; X64-SSE42-NEXT: pxor %xmm1, %xmm0
982; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
983; X64-SSE42-NEXT: psrlw $8, %xmm2
984; X64-SSE42-NEXT: pminub %xmm0, %xmm2
985; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
986; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000987; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000988; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000989; X64-SSE42-NEXT: retq
990;
991; X64-AVX1-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000992; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +0000993; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
994; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +0000995; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
996; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
997; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
998; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
999; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1000; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001001; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001002; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001003; X64-AVX1-NEXT: vzeroupper
1004; X64-AVX1-NEXT: retq
1005;
1006; X64-AVX2-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001007; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001008; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001009; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1010; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1011; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1012; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1013; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1014; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1015; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001016; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001017; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001018; X64-AVX2-NEXT: vzeroupper
1019; X64-AVX2-NEXT: retq
1020;
1021; X64-AVX512-LABEL: test_reduce_v32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001022; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001023; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001024; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1025; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1026; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1027; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1028; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1029; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1030; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001031; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001032; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001033; X64-AVX512-NEXT: vzeroupper
1034; X64-AVX512-NEXT: retq
1035 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1036 %2 = icmp sgt <32 x i8> %a0, %1
1037 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1038 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1039 %5 = icmp sgt <32 x i8> %3, %4
1040 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1041 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1042 %8 = icmp sgt <32 x i8> %6, %7
1043 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1044 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1045 %11 = icmp sgt <32 x i8> %9, %10
1046 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1047 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1048 %14 = icmp sgt <32 x i8> %12, %13
1049 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1050 %16 = extractelement <32 x i8> %15, i32 0
1051 ret i8 %16
1052}
1053
1054;
1055; 512-bit Vectors
1056;
1057
1058define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1059; X86-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001060; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001061; X86-SSE2-NEXT: subl $28, %esp
1062; X86-SSE2-NEXT: .cfi_def_cfa_offset 32
1063; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
1064; X86-SSE2-NEXT: movdqa %xmm5, (%esp) ## 16-byte Spill
1065; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1066; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1067; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1068; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1069; X86-SSE2-NEXT: pxor %xmm4, %xmm5
1070; X86-SSE2-NEXT: movdqa %xmm2, %xmm6
1071; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1072; X86-SSE2-NEXT: movdqa %xmm6, %xmm7
1073; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1074; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1075; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1076; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1077; X86-SSE2-NEXT: pand %xmm5, %xmm6
1078; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1079; X86-SSE2-NEXT: por %xmm6, %xmm5
1080; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1081; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1082; X86-SSE2-NEXT: movdqa %xmm1, %xmm7
1083; X86-SSE2-NEXT: pxor %xmm4, %xmm7
1084; X86-SSE2-NEXT: movdqa %xmm7, %xmm0
1085; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm0
1086; X86-SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1087; X86-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1088; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
1089; X86-SSE2-NEXT: pand %xmm6, %xmm7
1090; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1091; X86-SSE2-NEXT: por %xmm7, %xmm6
1092; X86-SSE2-NEXT: pand %xmm6, %xmm1
1093; X86-SSE2-NEXT: pandn %xmm3, %xmm6
1094; X86-SSE2-NEXT: por %xmm1, %xmm6
1095; X86-SSE2-NEXT: pand %xmm5, %xmm2
1096; X86-SSE2-NEXT: pandn (%esp), %xmm5 ## 16-byte Folded Reload
1097; X86-SSE2-NEXT: por %xmm2, %xmm5
1098; X86-SSE2-NEXT: movdqa %xmm5, %xmm0
1099; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1100; X86-SSE2-NEXT: movdqa %xmm6, %xmm1
1101; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1102; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1103; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1104; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1105; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
1106; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1107; X86-SSE2-NEXT: pand %xmm0, %xmm1
1108; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1109; X86-SSE2-NEXT: por %xmm1, %xmm0
1110; X86-SSE2-NEXT: pand %xmm0, %xmm6
1111; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1112; X86-SSE2-NEXT: por %xmm6, %xmm0
1113; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1114; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1115; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1116; X86-SSE2-NEXT: pxor %xmm1, %xmm4
1117; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1118; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3
1119; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1120; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1121; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1122; X86-SSE2-NEXT: pand %xmm2, %xmm4
1123; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1124; X86-SSE2-NEXT: por %xmm4, %xmm2
1125; X86-SSE2-NEXT: pand %xmm2, %xmm0
1126; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1127; X86-SSE2-NEXT: por %xmm0, %xmm2
1128; X86-SSE2-NEXT: movd %xmm2, %eax
1129; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1130; X86-SSE2-NEXT: movd %xmm0, %edx
1131; X86-SSE2-NEXT: addl $28, %esp
1132; X86-SSE2-NEXT: retl
1133;
1134; X86-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001135; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001136; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
1137; X86-SSE42-NEXT: movdqa %xmm4, %xmm5
1138; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm5
1139; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
1140; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1141; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1142; X86-SSE42-NEXT: movdqa %xmm5, %xmm0
1143; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1144; X86-SSE42-NEXT: movapd %xmm2, %xmm0
1145; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1146; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1147; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1148; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
1149; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1150; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1151; X86-SSE42-NEXT: movd %xmm1, %eax
1152; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx
1153; X86-SSE42-NEXT: retl
1154;
1155; X86-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001156; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001157; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1158; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1159; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1160; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1161; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1162; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1163; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1164; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1165; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1166; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1167; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1168; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1169; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1170; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1171; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
1172; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1173; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1174; X86-AVX1-NEXT: vmovd %xmm0, %eax
1175; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
1176; X86-AVX1-NEXT: vzeroupper
1177; X86-AVX1-NEXT: retl
1178;
1179; X86-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001180; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001181; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1182; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1183; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1184; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1185; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1186; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1187; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1188; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1189; X86-AVX2-NEXT: vmovd %xmm0, %eax
1190; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
1191; X86-AVX2-NEXT: vzeroupper
1192; X86-AVX2-NEXT: retl
1193;
1194; X64-SSE2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001195; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001196; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1197; X64-SSE2-NEXT: movdqa %xmm3, %xmm5
1198; X64-SSE2-NEXT: pxor %xmm4, %xmm5
1199; X64-SSE2-NEXT: movdqa %xmm1, %xmm6
1200; X64-SSE2-NEXT: pxor %xmm4, %xmm6
1201; X64-SSE2-NEXT: movdqa %xmm6, %xmm7
1202; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1203; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1204; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1205; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1206; X64-SSE2-NEXT: pand %xmm8, %xmm6
1207; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
1208; X64-SSE2-NEXT: por %xmm6, %xmm8
1209; X64-SSE2-NEXT: movdqa %xmm2, %xmm6
1210; X64-SSE2-NEXT: pxor %xmm4, %xmm6
1211; X64-SSE2-NEXT: movdqa %xmm0, %xmm7
1212; X64-SSE2-NEXT: pxor %xmm4, %xmm7
1213; X64-SSE2-NEXT: movdqa %xmm7, %xmm5
1214; X64-SSE2-NEXT: pcmpgtd %xmm6, %xmm5
1215; X64-SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1216; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1217; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1218; X64-SSE2-NEXT: pand %xmm9, %xmm7
1219; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
1220; X64-SSE2-NEXT: por %xmm7, %xmm6
1221; X64-SSE2-NEXT: pand %xmm6, %xmm0
1222; X64-SSE2-NEXT: pandn %xmm2, %xmm6
1223; X64-SSE2-NEXT: por %xmm0, %xmm6
1224; X64-SSE2-NEXT: pand %xmm8, %xmm1
1225; X64-SSE2-NEXT: pandn %xmm3, %xmm8
1226; X64-SSE2-NEXT: por %xmm1, %xmm8
1227; X64-SSE2-NEXT: movdqa %xmm8, %xmm0
1228; X64-SSE2-NEXT: pxor %xmm4, %xmm0
1229; X64-SSE2-NEXT: movdqa %xmm6, %xmm1
1230; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1231; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1232; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1233; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
1234; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1235; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1236; X64-SSE2-NEXT: pand %xmm3, %xmm0
1237; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1238; X64-SSE2-NEXT: por %xmm0, %xmm1
1239; X64-SSE2-NEXT: pand %xmm1, %xmm6
1240; X64-SSE2-NEXT: pandn %xmm8, %xmm1
1241; X64-SSE2-NEXT: por %xmm6, %xmm1
1242; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1243; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1244; X64-SSE2-NEXT: pxor %xmm4, %xmm2
1245; X64-SSE2-NEXT: pxor %xmm0, %xmm4
1246; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
1247; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3
1248; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1249; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1250; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1251; X64-SSE2-NEXT: pand %xmm5, %xmm2
1252; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1253; X64-SSE2-NEXT: por %xmm2, %xmm3
1254; X64-SSE2-NEXT: pand %xmm3, %xmm1
1255; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1256; X64-SSE2-NEXT: por %xmm1, %xmm3
1257; X64-SSE2-NEXT: movq %xmm3, %rax
1258; X64-SSE2-NEXT: retq
1259;
1260; X64-SSE42-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001261; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001262; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
1263; X64-SSE42-NEXT: movdqa %xmm4, %xmm5
1264; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm5
1265; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
1266; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1267; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1268; X64-SSE42-NEXT: movdqa %xmm5, %xmm0
1269; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1270; X64-SSE42-NEXT: movapd %xmm2, %xmm0
1271; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1272; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1273; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1274; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
1275; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1276; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1277; X64-SSE42-NEXT: movq %xmm1, %rax
1278; X64-SSE42-NEXT: retq
1279;
1280; X64-AVX1-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001281; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001282; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1283; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1284; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1285; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1286; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1287; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1288; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1289; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1290; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1291; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1292; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1293; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1294; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1295; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1296; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
1297; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1298; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1299; X64-AVX1-NEXT: vmovq %xmm0, %rax
1300; X64-AVX1-NEXT: vzeroupper
1301; X64-AVX1-NEXT: retq
1302;
1303; X64-AVX2-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001304; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001305; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1306; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1307; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1308; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1309; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1310; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1311; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1312; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1313; X64-AVX2-NEXT: vmovq %xmm0, %rax
1314; X64-AVX2-NEXT: vzeroupper
1315; X64-AVX2-NEXT: retq
1316;
1317; X64-AVX512-LABEL: test_reduce_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001318; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001319; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1320; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1321; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1322; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1323; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1324; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1325; X64-AVX512-NEXT: vmovq %xmm0, %rax
1326; X64-AVX512-NEXT: vzeroupper
1327; X64-AVX512-NEXT: retq
1328 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1329 %2 = icmp sgt <8 x i64> %a0, %1
1330 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1331 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1332 %5 = icmp sgt <8 x i64> %3, %4
1333 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1334 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1335 %8 = icmp sgt <8 x i64> %6, %7
1336 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1337 %10 = extractelement <8 x i64> %9, i32 0
1338 ret i64 %10
1339}
1340
1341define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1342; X86-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001343; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001344; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
1345; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1346; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
1347; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1348; X86-SSE2-NEXT: pand %xmm5, %xmm1
1349; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1350; X86-SSE2-NEXT: por %xmm1, %xmm5
1351; X86-SSE2-NEXT: pand %xmm4, %xmm0
1352; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1353; X86-SSE2-NEXT: por %xmm0, %xmm4
1354; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1355; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm0
1356; X86-SSE2-NEXT: pand %xmm0, %xmm4
1357; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1358; X86-SSE2-NEXT: por %xmm4, %xmm0
1359; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1360; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1361; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1362; X86-SSE2-NEXT: pand %xmm2, %xmm0
1363; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1364; X86-SSE2-NEXT: por %xmm0, %xmm2
1365; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1366; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
1367; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1368; X86-SSE2-NEXT: pand %xmm1, %xmm2
1369; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1370; X86-SSE2-NEXT: por %xmm2, %xmm1
1371; X86-SSE2-NEXT: movd %xmm1, %eax
1372; X86-SSE2-NEXT: retl
1373;
1374; X86-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001375; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001376; X86-SSE42-NEXT: pmaxsd %xmm3, %xmm1
1377; X86-SSE42-NEXT: pmaxsd %xmm2, %xmm0
1378; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
1379; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1380; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1
1381; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1382; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0
1383; X86-SSE42-NEXT: movd %xmm0, %eax
1384; X86-SSE42-NEXT: retl
1385;
1386; X86-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001387; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001388; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1389; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1390; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
1391; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1392; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
1393; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1394; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1395; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1396; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1397; X86-AVX1-NEXT: vmovd %xmm0, %eax
1398; X86-AVX1-NEXT: vzeroupper
1399; X86-AVX1-NEXT: retl
1400;
1401; X86-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001402; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001403; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1404; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1405; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1406; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1407; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1408; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1409; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1410; X86-AVX2-NEXT: vmovd %xmm0, %eax
1411; X86-AVX2-NEXT: vzeroupper
1412; X86-AVX2-NEXT: retl
1413;
1414; X64-SSE2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001415; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001416; X64-SSE2-NEXT: movdqa %xmm0, %xmm4
1417; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1418; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1419; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1420; X64-SSE2-NEXT: pand %xmm5, %xmm1
1421; X64-SSE2-NEXT: pandn %xmm3, %xmm5
1422; X64-SSE2-NEXT: por %xmm1, %xmm5
1423; X64-SSE2-NEXT: pand %xmm4, %xmm0
1424; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1425; X64-SSE2-NEXT: por %xmm0, %xmm4
1426; X64-SSE2-NEXT: movdqa %xmm4, %xmm0
1427; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm0
1428; X64-SSE2-NEXT: pand %xmm0, %xmm4
1429; X64-SSE2-NEXT: pandn %xmm5, %xmm0
1430; X64-SSE2-NEXT: por %xmm4, %xmm0
1431; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1432; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1433; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1434; X64-SSE2-NEXT: pand %xmm2, %xmm0
1435; X64-SSE2-NEXT: pandn %xmm1, %xmm2
1436; X64-SSE2-NEXT: por %xmm0, %xmm2
1437; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1438; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
1439; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1440; X64-SSE2-NEXT: pand %xmm1, %xmm2
1441; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1442; X64-SSE2-NEXT: por %xmm2, %xmm1
1443; X64-SSE2-NEXT: movd %xmm1, %eax
1444; X64-SSE2-NEXT: retq
1445;
1446; X64-SSE42-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001447; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001448; X64-SSE42-NEXT: pmaxsd %xmm3, %xmm1
1449; X64-SSE42-NEXT: pmaxsd %xmm2, %xmm0
1450; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
1451; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1452; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1
1453; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1454; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0
1455; X64-SSE42-NEXT: movd %xmm0, %eax
1456; X64-SSE42-NEXT: retq
1457;
1458; X64-AVX1-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001459; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001460; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1461; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1462; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
1463; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1464; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
1465; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1466; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1467; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1468; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1469; X64-AVX1-NEXT: vmovd %xmm0, %eax
1470; X64-AVX1-NEXT: vzeroupper
1471; X64-AVX1-NEXT: retq
1472;
1473; X64-AVX2-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001474; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001475; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1476; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1477; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1478; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1479; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1480; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1481; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1482; X64-AVX2-NEXT: vmovd %xmm0, %eax
1483; X64-AVX2-NEXT: vzeroupper
1484; X64-AVX2-NEXT: retq
1485;
1486; X64-AVX512-LABEL: test_reduce_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001487; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001488; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1489; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1490; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1491; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1492; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1493; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1494; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1495; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1496; X64-AVX512-NEXT: vmovd %xmm0, %eax
1497; X64-AVX512-NEXT: vzeroupper
1498; X64-AVX512-NEXT: retq
1499 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1500 %2 = icmp sgt <16 x i32> %a0, %1
1501 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1502 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1503 %5 = icmp sgt <16 x i32> %3, %4
1504 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1505 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1506 %8 = icmp sgt <16 x i32> %6, %7
1507 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1508 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1509 %11 = icmp sgt <16 x i32> %9, %10
1510 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1511 %13 = extractelement <16 x i32> %12, i32 0
1512 ret i32 %13
1513}
1514
1515define i16 @test_reduce_v32i16(<32 x i16> %a0) {
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001516; X86-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001517; X86-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001518; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1
1519; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0
1520; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
1521; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1522; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
1523; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1524; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
1525; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1526; X86-SSE2-NEXT: psrld $16, %xmm1
1527; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
1528; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001529; X86-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001530; X86-SSE2-NEXT: retl
1531;
1532; X86-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001533; X86-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001534; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1
1535; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0
1536; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
1537; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1538; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1539; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1540; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1541; X86-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001542; X86-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001543; X86-SSE42-NEXT: retl
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001544;
1545; X86-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001546; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001547; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1548; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1549; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
1550; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
1551; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001552; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1553; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1554; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1555; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001556; X86-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001557; X86-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001558; X86-AVX1-NEXT: vzeroupper
1559; X86-AVX1-NEXT: retl
1560;
1561; X86-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001562; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001563; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
1564; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001565; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
1566; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1567; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1568; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1569; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001570; X86-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001571; X86-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001572; X86-AVX2-NEXT: vzeroupper
1573; X86-AVX2-NEXT: retl
1574;
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001575; X64-SSE2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001576; X64-SSE2: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001577; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1
1578; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0
1579; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
1580; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1581; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
1582; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1583; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
1584; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1585; X64-SSE2-NEXT: psrld $16, %xmm1
1586; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
1587; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001588; X64-SSE2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001589; X64-SSE2-NEXT: retq
1590;
1591; X64-SSE42-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001592; X64-SSE42: ## %bb.0:
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001593; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1
1594; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0
1595; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
1596; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1597; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1598; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1599; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1600; X64-SSE42-NEXT: movd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001601; X64-SSE42-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001602; X64-SSE42-NEXT: retq
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001603;
1604; X64-AVX1-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001605; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001606; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1607; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1608; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
1609; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
1610; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001611; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1612; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1613; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1614; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001615; X64-AVX1-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001616; X64-AVX1-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001617; X64-AVX1-NEXT: vzeroupper
1618; X64-AVX1-NEXT: retq
1619;
1620; X64-AVX2-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001621; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001622; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
1623; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001624; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
1625; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1626; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1627; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1628; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001629; X64-AVX2-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001630; X64-AVX2-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001631; X64-AVX2-NEXT: vzeroupper
1632; X64-AVX2-NEXT: retq
1633;
1634; X64-AVX512-LABEL: test_reduce_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001635; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001636; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001637; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001638; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim90accbc2017-11-23 13:50:27 +00001639; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
1640; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
1641; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1642; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1643; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001644; X64-AVX512-NEXT: vmovd %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001645; X64-AVX512-NEXT: ## kill: def %ax killed %ax killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001646; X64-AVX512-NEXT: vzeroupper
1647; X64-AVX512-NEXT: retq
1648 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1649 %2 = icmp sgt <32 x i16> %a0, %1
1650 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1651 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1652 %5 = icmp sgt <32 x i16> %3, %4
1653 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1654 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1655 %8 = icmp sgt <32 x i16> %6, %7
1656 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1657 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1658 %11 = icmp sgt <32 x i16> %9, %10
1659 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1660 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1661 %14 = icmp sgt <32 x i16> %12, %13
1662 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1663 %16 = extractelement <32 x i16> %15, i32 0
1664 ret i16 %16
1665}
1666
1667define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1668; X86-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001669; X86-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001670; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
1671; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm4
1672; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
1673; X86-SSE2-NEXT: pcmpgtb %xmm3, %xmm5
1674; X86-SSE2-NEXT: pand %xmm5, %xmm1
1675; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1676; X86-SSE2-NEXT: por %xmm1, %xmm5
1677; X86-SSE2-NEXT: pand %xmm4, %xmm0
1678; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1679; X86-SSE2-NEXT: por %xmm0, %xmm4
1680; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1681; X86-SSE2-NEXT: pcmpgtb %xmm5, %xmm0
1682; X86-SSE2-NEXT: pand %xmm0, %xmm4
1683; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1684; X86-SSE2-NEXT: por %xmm4, %xmm0
1685; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1686; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1687; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1688; X86-SSE2-NEXT: pand %xmm2, %xmm0
1689; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1690; X86-SSE2-NEXT: por %xmm0, %xmm2
1691; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1692; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
1693; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
1694; X86-SSE2-NEXT: pand %xmm1, %xmm2
1695; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1696; X86-SSE2-NEXT: por %xmm2, %xmm1
1697; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1698; X86-SSE2-NEXT: psrld $16, %xmm0
1699; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1700; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1701; X86-SSE2-NEXT: pand %xmm2, %xmm1
1702; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1703; X86-SSE2-NEXT: por %xmm1, %xmm2
1704; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
1705; X86-SSE2-NEXT: psrlw $8, %xmm0
1706; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
1707; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
1708; X86-SSE2-NEXT: pand %xmm1, %xmm2
1709; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1710; X86-SSE2-NEXT: por %xmm2, %xmm1
1711; X86-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001712; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001713; X86-SSE2-NEXT: retl
1714;
1715; X86-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001716; X86-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001717; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
1718; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
1719; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001720; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1721; X86-SSE42-NEXT: pxor %xmm1, %xmm0
1722; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
1723; X86-SSE42-NEXT: psrlw $8, %xmm2
1724; X86-SSE42-NEXT: pminub %xmm0, %xmm2
1725; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
1726; X86-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001727; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001728; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001729; X86-SSE42-NEXT: retl
1730;
1731; X86-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001732; X86-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001733; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1734; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1735; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
1736; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1737; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001738; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1739; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1740; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1741; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1742; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1743; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001744; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001745; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001746; X86-AVX1-NEXT: vzeroupper
1747; X86-AVX1-NEXT: retl
1748;
1749; X86-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001750; X86-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001751; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
1752; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001753; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1754; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1755; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1756; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1757; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1758; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1759; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001760; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001761; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001762; X86-AVX2-NEXT: vzeroupper
1763; X86-AVX2-NEXT: retl
1764;
1765; X64-SSE2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001766; X64-SSE2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001767; X64-SSE2-NEXT: movdqa %xmm0, %xmm4
1768; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm4
1769; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1770; X64-SSE2-NEXT: pcmpgtb %xmm3, %xmm5
1771; X64-SSE2-NEXT: pand %xmm5, %xmm1
1772; X64-SSE2-NEXT: pandn %xmm3, %xmm5
1773; X64-SSE2-NEXT: por %xmm1, %xmm5
1774; X64-SSE2-NEXT: pand %xmm4, %xmm0
1775; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1776; X64-SSE2-NEXT: por %xmm0, %xmm4
1777; X64-SSE2-NEXT: movdqa %xmm4, %xmm0
1778; X64-SSE2-NEXT: pcmpgtb %xmm5, %xmm0
1779; X64-SSE2-NEXT: pand %xmm0, %xmm4
1780; X64-SSE2-NEXT: pandn %xmm5, %xmm0
1781; X64-SSE2-NEXT: por %xmm4, %xmm0
1782; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1783; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1784; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1785; X64-SSE2-NEXT: pand %xmm2, %xmm0
1786; X64-SSE2-NEXT: pandn %xmm1, %xmm2
1787; X64-SSE2-NEXT: por %xmm0, %xmm2
1788; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1789; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
1790; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
1791; X64-SSE2-NEXT: pand %xmm1, %xmm2
1792; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1793; X64-SSE2-NEXT: por %xmm2, %xmm1
1794; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
1795; X64-SSE2-NEXT: psrld $16, %xmm0
1796; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1797; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1798; X64-SSE2-NEXT: pand %xmm2, %xmm1
1799; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1800; X64-SSE2-NEXT: por %xmm1, %xmm2
1801; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
1802; X64-SSE2-NEXT: psrlw $8, %xmm0
1803; X64-SSE2-NEXT: movdqa %xmm2, %xmm1
1804; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
1805; X64-SSE2-NEXT: pand %xmm1, %xmm2
1806; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1807; X64-SSE2-NEXT: por %xmm2, %xmm1
1808; X64-SSE2-NEXT: movd %xmm1, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001809; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001810; X64-SSE2-NEXT: retq
1811;
1812; X64-SSE42-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001813; X64-SSE42: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001814; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
1815; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
1816; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001817; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1818; X64-SSE42-NEXT: pxor %xmm1, %xmm0
1819; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
1820; X64-SSE42-NEXT: psrlw $8, %xmm2
1821; X64-SSE42-NEXT: pminub %xmm0, %xmm2
1822; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
1823; X64-SSE42-NEXT: pxor %xmm1, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001824; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001825; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001826; X64-SSE42-NEXT: retq
1827;
1828; X64-AVX1-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001829; X64-AVX1: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001830; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1831; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1832; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
1833; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1834; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001835; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1836; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1837; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
1838; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1839; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1840; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001841; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001842; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001843; X64-AVX1-NEXT: vzeroupper
1844; X64-AVX1-NEXT: retq
1845;
1846; X64-AVX2-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001847; X64-AVX2: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001848; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
1849; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001850; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1851; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1852; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1853; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
1854; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
1855; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1856; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001857; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001858; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001859; X64-AVX2-NEXT: vzeroupper
1860; X64-AVX2-NEXT: retq
1861;
1862; X64-AVX512-LABEL: test_reduce_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001863; X64-AVX512: ## %bb.0:
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001864; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001865; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001866; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimf6d4ab62017-12-19 12:02:40 +00001867; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1868; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
1869; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1870; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
1871; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
1872; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1873; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001874; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001875; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
Simon Pilgrim879c5b12017-11-05 19:48:24 +00001876; X64-AVX512-NEXT: vzeroupper
1877; X64-AVX512-NEXT: retq
1878 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1879 %2 = icmp sgt <64 x i8> %a0, %1
1880 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1881 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1882 %5 = icmp sgt <64 x i8> %3, %4
1883 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1884 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1885 %8 = icmp sgt <64 x i8> %6, %7
1886 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1887 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1888 %11 = icmp sgt <64 x i8> %9, %10
1889 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1890 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1891 %14 = icmp sgt <64 x i8> %12, %13
1892 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1893 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1894 %17 = icmp sgt <64 x i8> %15, %16
1895 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1896 %19 = extractelement <64 x i8> %18, i32 0
1897 ret i8 %19
1898}