blob: 57cc295a805eb2b1861c8d2947e4dd2ae5de8cf0 [file] [log] [blame]
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5
6define i64 @test_v2f64(<2 x double> %a0, <2 x double> %a1) {
7; SSE-LABEL: test_v2f64:
8; SSE: # BB#0:
9; SSE-NEXT: cmpltpd %xmm0, %xmm1
10; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
11; SSE-NEXT: pand %xmm1, %xmm0
12; SSE-NEXT: movd %xmm0, %rax
13; SSE-NEXT: retq
14;
15; AVX-LABEL: test_v2f64:
16; AVX: # BB#0:
17; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
18; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
19; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
20; AVX-NEXT: vmovq %xmm0, %rax
21; AVX-NEXT: retq
22 %c = fcmp ogt <2 x double> %a0, %a1
23 %s = sext <2 x i1> %c to <2 x i64>
24 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
25 %2 = and <2 x i64> %s, %1
26 %3 = extractelement <2 x i64> %2, i32 0
27 ret i64 %3
28}
29
30define i64 @test_v4f64(<4 x double> %a0, <4 x double> %a1) {
31; SSE-LABEL: test_v4f64:
32; SSE: # BB#0:
33; SSE-NEXT: cmpltpd %xmm1, %xmm3
34; SSE-NEXT: cmpltpd %xmm0, %xmm2
35; SSE-NEXT: andpd %xmm3, %xmm2
36; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
37; SSE-NEXT: pand %xmm2, %xmm0
38; SSE-NEXT: movd %xmm0, %rax
39; SSE-NEXT: retq
40;
41; AVX1-LABEL: test_v4f64:
42; AVX1: # BB#0:
43; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
44; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
45; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
46; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
47; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
48; AVX1-NEXT: vmovq %xmm0, %rax
49; AVX1-NEXT: vzeroupper
50; AVX1-NEXT: retq
51;
52; AVX2-LABEL: test_v4f64:
53; AVX2: # BB#0:
54; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
55; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
56; AVX2-NEXT: vandpd %ymm1, %ymm0, %ymm0
57; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
58; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
59; AVX2-NEXT: vmovq %xmm0, %rax
60; AVX2-NEXT: vzeroupper
61; AVX2-NEXT: retq
62 %c = fcmp ogt <4 x double> %a0, %a1
63 %s = sext <4 x i1> %c to <4 x i64>
64 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
65 %2 = and <4 x i64> %s, %1
66 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
67 %4 = and <4 x i64> %2, %3
68 %5 = extractelement <4 x i64> %4, i64 0
69 ret i64 %5
70}
71
72define i64 @test_v4f64_legal(<4 x double> %a0, <4 x double> %a1) {
73; SSE-LABEL: test_v4f64_legal:
74; SSE: # BB#0:
75; SSE-NEXT: cmpltpd %xmm1, %xmm3
76; SSE-NEXT: cmpltpd %xmm0, %xmm2
77; SSE-NEXT: packsswb %xmm3, %xmm2
78; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
79; SSE-NEXT: pand %xmm2, %xmm0
80; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
81; SSE-NEXT: pand %xmm0, %xmm1
82; SSE-NEXT: movd %xmm1, %eax
83; SSE-NEXT: cltq
84; SSE-NEXT: retq
85;
86; AVX-LABEL: test_v4f64_legal:
87; AVX: # BB#0:
88; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
89; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
90; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
91; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
92; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
93; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
94; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
95; AVX-NEXT: vmovd %xmm0, %eax
96; AVX-NEXT: cltq
97; AVX-NEXT: vzeroupper
98; AVX-NEXT: retq
99 %c = fcmp ogt <4 x double> %a0, %a1
100 %s = sext <4 x i1> %c to <4 x i32>
101 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
102 %2 = and <4 x i32> %s, %1
103 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
104 %4 = and <4 x i32> %2, %3
105 %5 = extractelement <4 x i32> %4, i64 0
106 %6 = sext i32 %5 to i64
107 ret i64 %6
108}
109
110define i32 @test_v4f32(<4 x float> %a0, <4 x float> %a1) {
111; SSE-LABEL: test_v4f32:
112; SSE: # BB#0:
113; SSE-NEXT: cmpltps %xmm0, %xmm1
114; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
115; SSE-NEXT: pand %xmm1, %xmm0
116; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
117; SSE-NEXT: pand %xmm0, %xmm1
118; SSE-NEXT: movd %xmm1, %eax
119; SSE-NEXT: retq
120;
121; AVX-LABEL: test_v4f32:
122; AVX: # BB#0:
123; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
124; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
125; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
126; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
127; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
128; AVX-NEXT: vmovd %xmm0, %eax
129; AVX-NEXT: retq
130 %c = fcmp ogt <4 x float> %a0, %a1
131 %s = sext <4 x i1> %c to <4 x i32>
132 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
133 %2 = and <4 x i32> %s, %1
134 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
135 %4 = and <4 x i32> %2, %3
136 %5 = extractelement <4 x i32> %4, i32 0
137 ret i32 %5
138}
139
140define i32 @test_v8f32(<8 x float> %a0, <8 x float> %a1) {
141; SSE-LABEL: test_v8f32:
142; SSE: # BB#0:
143; SSE-NEXT: cmpltps %xmm1, %xmm3
144; SSE-NEXT: cmpltps %xmm0, %xmm2
145; SSE-NEXT: andps %xmm3, %xmm2
146; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
147; SSE-NEXT: pand %xmm2, %xmm0
148; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
149; SSE-NEXT: pand %xmm0, %xmm1
150; SSE-NEXT: movd %xmm1, %eax
151; SSE-NEXT: retq
152;
153; AVX1-LABEL: test_v8f32:
154; AVX1: # BB#0:
155; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
156; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
157; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
158; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
159; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
160; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
161; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
162; AVX1-NEXT: vmovd %xmm0, %eax
163; AVX1-NEXT: vzeroupper
164; AVX1-NEXT: retq
165;
166; AVX2-LABEL: test_v8f32:
167; AVX2: # BB#0:
168; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
169; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
170; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
171; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
172; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
173; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
174; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
175; AVX2-NEXT: vmovd %xmm0, %eax
176; AVX2-NEXT: vzeroupper
177; AVX2-NEXT: retq
178 %c = fcmp ogt <8 x float> %a0, %a1
179 %s = sext <8 x i1> %c to <8 x i32>
180 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
181 %2 = and <8 x i32> %s, %1
182 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
183 %4 = and <8 x i32> %2, %3
184 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
185 %6 = and <8 x i32> %4, %5
186 %7 = extractelement <8 x i32> %6, i32 0
187 ret i32 %7
188}
189
190define i32 @test_v8f32_legal(<8 x float> %a0, <8 x float> %a1) {
191; SSE-LABEL: test_v8f32_legal:
192; SSE: # BB#0:
193; SSE-NEXT: cmpltps %xmm1, %xmm3
194; SSE-NEXT: cmpltps %xmm0, %xmm2
195; SSE-NEXT: packsswb %xmm3, %xmm2
196; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
197; SSE-NEXT: pand %xmm2, %xmm0
198; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
199; SSE-NEXT: pand %xmm0, %xmm1
200; SSE-NEXT: movdqa %xmm1, %xmm0
201; SSE-NEXT: psrld $16, %xmm0
202; SSE-NEXT: pand %xmm1, %xmm0
203; SSE-NEXT: movd %xmm0, %eax
204; SSE-NEXT: cwtl
205; SSE-NEXT: retq
206;
207; AVX-LABEL: test_v8f32_legal:
208; AVX: # BB#0:
209; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
210; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
211; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
212; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
213; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
214; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
215; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
216; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
217; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
218; AVX-NEXT: vmovd %xmm0, %eax
219; AVX-NEXT: cwtl
220; AVX-NEXT: vzeroupper
221; AVX-NEXT: retq
222 %c = fcmp ogt <8 x float> %a0, %a1
223 %s = sext <8 x i1> %c to <8 x i16>
224 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
225 %2 = and <8 x i16> %s, %1
226 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
227 %4 = and <8 x i16> %2, %3
228 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
229 %6 = and <8 x i16> %4, %5
230 %7 = extractelement <8 x i16> %6, i32 0
231 %8 = sext i16 %7 to i32
232 ret i32 %8
233}
234
235define i64 @test_v2i64(<2 x i64> %a0, <2 x i64> %a1) {
236; SSE-LABEL: test_v2i64:
237; SSE: # BB#0:
238; SSE-NEXT: pcmpgtq %xmm1, %xmm0
239; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
240; SSE-NEXT: pand %xmm0, %xmm1
241; SSE-NEXT: movd %xmm1, %rax
242; SSE-NEXT: retq
243;
244; AVX-LABEL: test_v2i64:
245; AVX: # BB#0:
246; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
247; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
248; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
249; AVX-NEXT: vmovq %xmm0, %rax
250; AVX-NEXT: retq
251 %c = icmp sgt <2 x i64> %a0, %a1
252 %s = sext <2 x i1> %c to <2 x i64>
253 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
254 %2 = and <2 x i64> %s, %1
255 %3 = extractelement <2 x i64> %2, i32 0
256 ret i64 %3
257}
258
259define i64 @test_v4i64(<4 x i64> %a0, <4 x i64> %a1) {
260; SSE-LABEL: test_v4i64:
261; SSE: # BB#0:
262; SSE-NEXT: pcmpgtq %xmm3, %xmm1
263; SSE-NEXT: pcmpgtq %xmm2, %xmm0
264; SSE-NEXT: pand %xmm1, %xmm0
265; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
266; SSE-NEXT: pand %xmm0, %xmm1
267; SSE-NEXT: movd %xmm1, %rax
268; SSE-NEXT: retq
269;
270; AVX1-LABEL: test_v4i64:
271; AVX1: # BB#0:
272; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
273; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
274; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
275; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
276; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
277; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
278; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
279; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
280; AVX1-NEXT: vmovq %xmm0, %rax
281; AVX1-NEXT: vzeroupper
282; AVX1-NEXT: retq
283;
284; AVX2-LABEL: test_v4i64:
285; AVX2: # BB#0:
286; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
287; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
288; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
289; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
290; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
291; AVX2-NEXT: vmovq %xmm0, %rax
292; AVX2-NEXT: vzeroupper
293; AVX2-NEXT: retq
294 %c = icmp sgt <4 x i64> %a0, %a1
295 %s = sext <4 x i1> %c to <4 x i64>
296 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
297 %2 = and <4 x i64> %s, %1
298 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
299 %4 = and <4 x i64> %2, %3
300 %5 = extractelement <4 x i64> %4, i64 0
301 ret i64 %5
302}
303
304define i64 @test_v4i64_legal(<4 x i64> %a0, <4 x i64> %a1) {
305; SSE-LABEL: test_v4i64_legal:
306; SSE: # BB#0:
307; SSE-NEXT: pcmpgtq %xmm3, %xmm1
308; SSE-NEXT: pcmpgtq %xmm2, %xmm0
309; SSE-NEXT: packsswb %xmm1, %xmm0
310; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
311; SSE-NEXT: pand %xmm0, %xmm1
312; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
313; SSE-NEXT: pand %xmm1, %xmm0
314; SSE-NEXT: movd %xmm0, %eax
315; SSE-NEXT: cltq
316; SSE-NEXT: retq
317;
318; AVX1-LABEL: test_v4i64_legal:
319; AVX1: # BB#0:
320; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
321; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
322; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
323; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
324; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
325; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
326; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
327; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
328; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
329; AVX1-NEXT: vmovd %xmm0, %eax
330; AVX1-NEXT: cltq
331; AVX1-NEXT: vzeroupper
332; AVX1-NEXT: retq
333;
334; AVX2-LABEL: test_v4i64_legal:
335; AVX2: # BB#0:
336; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
337; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
338; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
339; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
340; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
341; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
342; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
343; AVX2-NEXT: vmovd %xmm0, %eax
344; AVX2-NEXT: cltq
345; AVX2-NEXT: vzeroupper
346; AVX2-NEXT: retq
347 %c = icmp sgt <4 x i64> %a0, %a1
348 %s = sext <4 x i1> %c to <4 x i32>
349 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
350 %2 = and <4 x i32> %s, %1
351 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
352 %4 = and <4 x i32> %2, %3
353 %5 = extractelement <4 x i32> %4, i64 0
354 %6 = sext i32 %5 to i64
355 ret i64 %6
356}
357
358define i32 @test_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
359; SSE-LABEL: test_v4i32:
360; SSE: # BB#0:
361; SSE-NEXT: pcmpgtd %xmm1, %xmm0
362; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
363; SSE-NEXT: pand %xmm0, %xmm1
364; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
365; SSE-NEXT: pand %xmm1, %xmm0
366; SSE-NEXT: movd %xmm0, %eax
367; SSE-NEXT: retq
368;
369; AVX-LABEL: test_v4i32:
370; AVX: # BB#0:
371; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
372; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
373; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
374; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
375; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
376; AVX-NEXT: vmovd %xmm0, %eax
377; AVX-NEXT: retq
378 %c = icmp sgt <4 x i32> %a0, %a1
379 %s = sext <4 x i1> %c to <4 x i32>
380 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
381 %2 = and <4 x i32> %s, %1
382 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
383 %4 = and <4 x i32> %2, %3
384 %5 = extractelement <4 x i32> %4, i32 0
385 ret i32 %5
386}
387
388define i32 @test_v8i32(<8 x i32> %a0, <8 x i32> %a1) {
389; SSE-LABEL: test_v8i32:
390; SSE: # BB#0:
391; SSE-NEXT: pcmpgtd %xmm3, %xmm1
392; SSE-NEXT: pcmpgtd %xmm2, %xmm0
393; SSE-NEXT: pand %xmm1, %xmm0
394; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
395; SSE-NEXT: pand %xmm0, %xmm1
396; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
397; SSE-NEXT: pand %xmm1, %xmm0
398; SSE-NEXT: movd %xmm0, %eax
399; SSE-NEXT: retq
400;
401; AVX1-LABEL: test_v8i32:
402; AVX1: # BB#0:
403; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
404; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
405; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
406; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
407; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
408; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
409; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
410; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
411; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
412; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
413; AVX1-NEXT: vmovd %xmm0, %eax
414; AVX1-NEXT: vzeroupper
415; AVX1-NEXT: retq
416;
417; AVX2-LABEL: test_v8i32:
418; AVX2: # BB#0:
419; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
420; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
421; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
422; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
423; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
424; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
425; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
426; AVX2-NEXT: vmovd %xmm0, %eax
427; AVX2-NEXT: vzeroupper
428; AVX2-NEXT: retq
429 %c = icmp sgt <8 x i32> %a0, %a1
430 %s = sext <8 x i1> %c to <8 x i32>
431 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
432 %2 = and <8 x i32> %s, %1
433 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
434 %4 = and <8 x i32> %2, %3
435 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
436 %6 = and <8 x i32> %4, %5
437 %7 = extractelement <8 x i32> %6, i32 0
438 ret i32 %7
439}
440
441define i32 @test_v8i32_legal(<8 x i32> %a0, <8 x i32> %a1) {
442; SSE-LABEL: test_v8i32_legal:
443; SSE: # BB#0:
444; SSE-NEXT: pcmpgtd %xmm3, %xmm1
445; SSE-NEXT: pcmpgtd %xmm2, %xmm0
446; SSE-NEXT: packsswb %xmm1, %xmm0
447; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
448; SSE-NEXT: pand %xmm0, %xmm1
449; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
450; SSE-NEXT: pand %xmm1, %xmm0
451; SSE-NEXT: movdqa %xmm0, %xmm1
452; SSE-NEXT: psrld $16, %xmm1
453; SSE-NEXT: pand %xmm0, %xmm1
454; SSE-NEXT: movd %xmm1, %eax
455; SSE-NEXT: cwtl
456; SSE-NEXT: retq
457;
458; AVX1-LABEL: test_v8i32_legal:
459; AVX1: # BB#0:
460; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
461; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
462; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
463; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
464; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
465; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
466; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
467; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
468; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
469; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
470; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
471; AVX1-NEXT: vmovd %xmm0, %eax
472; AVX1-NEXT: cwtl
473; AVX1-NEXT: vzeroupper
474; AVX1-NEXT: retq
475;
476; AVX2-LABEL: test_v8i32_legal:
477; AVX2: # BB#0:
478; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
479; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
480; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
481; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
482; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
483; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
484; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
485; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
486; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
487; AVX2-NEXT: vmovd %xmm0, %eax
488; AVX2-NEXT: cwtl
489; AVX2-NEXT: vzeroupper
490; AVX2-NEXT: retq
491 %c = icmp sgt <8 x i32> %a0, %a1
492 %s = sext <8 x i1> %c to <8 x i16>
493 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
494 %2 = and <8 x i16> %s, %1
495 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
496 %4 = and <8 x i16> %2, %3
497 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
498 %6 = and <8 x i16> %4, %5
499 %7 = extractelement <8 x i16> %6, i32 0
500 %8 = sext i16 %7 to i32
501 ret i32 %8
502}
503
504define i16 @test_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
505; SSE-LABEL: test_v8i16:
506; SSE: # BB#0:
507; SSE-NEXT: pcmpgtw %xmm1, %xmm0
508; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
509; SSE-NEXT: pand %xmm0, %xmm1
510; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
511; SSE-NEXT: pand %xmm1, %xmm0
512; SSE-NEXT: movdqa %xmm0, %xmm1
513; SSE-NEXT: psrld $16, %xmm1
514; SSE-NEXT: pand %xmm0, %xmm1
515; SSE-NEXT: movd %xmm1, %eax
516; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
517; SSE-NEXT: retq
518;
519; AVX-LABEL: test_v8i16:
520; AVX: # BB#0:
521; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
522; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
523; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
524; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
525; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
526; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
527; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
528; AVX-NEXT: vmovd %xmm0, %eax
529; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
530; AVX-NEXT: retq
531 %c = icmp sgt <8 x i16> %a0, %a1
532 %s = sext <8 x i1> %c to <8 x i16>
533 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
534 %2 = and <8 x i16> %s, %1
535 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
536 %4 = and <8 x i16> %2, %3
537 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
538 %6 = and <8 x i16> %4, %5
539 %7 = extractelement <8 x i16> %6, i32 0
540 ret i16 %7
541}
542
543define i16 @test_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
544; SSE-LABEL: test_v16i16:
545; SSE: # BB#0:
546; SSE-NEXT: pcmpgtw %xmm3, %xmm1
547; SSE-NEXT: pcmpgtw %xmm2, %xmm0
548; SSE-NEXT: pand %xmm1, %xmm0
549; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
550; SSE-NEXT: pand %xmm0, %xmm1
551; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
552; SSE-NEXT: pand %xmm1, %xmm0
553; SSE-NEXT: movdqa %xmm0, %xmm1
554; SSE-NEXT: psrld $16, %xmm1
555; SSE-NEXT: pand %xmm0, %xmm1
556; SSE-NEXT: movd %xmm1, %eax
557; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
558; SSE-NEXT: retq
559;
560; AVX1-LABEL: test_v16i16:
561; AVX1: # BB#0:
562; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
563; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
564; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
565; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
566; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
567; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
568; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
569; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
570; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
571; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
572; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
573; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
574; AVX1-NEXT: vmovd %xmm0, %eax
575; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
576; AVX1-NEXT: vzeroupper
577; AVX1-NEXT: retq
578;
579; AVX2-LABEL: test_v16i16:
580; AVX2: # BB#0:
581; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
582; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
583; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
584; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
585; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
586; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
587; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
588; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
589; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
590; AVX2-NEXT: vmovd %xmm0, %eax
591; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
592; AVX2-NEXT: vzeroupper
593; AVX2-NEXT: retq
594 %c = icmp sgt <16 x i16> %a0, %a1
595 %s = sext <16 x i1> %c to <16 x i16>
596 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
597 %2 = and <16 x i16> %s, %1
598 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
599 %4 = and <16 x i16> %2, %3
600 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
601 %6 = and <16 x i16> %4, %5
602 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
603 %8 = and <16 x i16> %6, %7
604 %9 = extractelement <16 x i16> %8, i32 0
605 ret i16 %9
606}
607
608define i16 @test_v16i16_legal(<16 x i16> %a0, <16 x i16> %a1) {
609; SSE-LABEL: test_v16i16_legal:
610; SSE: # BB#0:
611; SSE-NEXT: pcmpgtw %xmm3, %xmm1
612; SSE-NEXT: pcmpgtw %xmm2, %xmm0
613; SSE-NEXT: packsswb %xmm1, %xmm0
614; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
615; SSE-NEXT: pand %xmm0, %xmm1
616; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
617; SSE-NEXT: pand %xmm1, %xmm0
618; SSE-NEXT: movdqa %xmm0, %xmm1
619; SSE-NEXT: psrld $16, %xmm1
620; SSE-NEXT: pand %xmm0, %xmm1
621; SSE-NEXT: movdqa %xmm1, %xmm0
622; SSE-NEXT: psrlw $8, %xmm0
623; SSE-NEXT: pand %xmm1, %xmm0
624; SSE-NEXT: pextrb $0, %xmm0, %eax
625; SSE-NEXT: movsbl %al, %eax
626; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
627; SSE-NEXT: retq
628;
629; AVX1-LABEL: test_v16i16_legal:
630; AVX1: # BB#0:
631; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
632; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
633; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
634; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
635; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
636; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
637; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
638; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
639; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
640; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
641; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
642; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
643; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
644; AVX1-NEXT: vpextrb $0, %xmm0, %eax
645; AVX1-NEXT: movsbl %al, %eax
646; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
647; AVX1-NEXT: vzeroupper
648; AVX1-NEXT: retq
649;
650; AVX2-LABEL: test_v16i16_legal:
651; AVX2: # BB#0:
652; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
653; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
654; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
655; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
656; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
657; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
658; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
659; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
660; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
661; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
662; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
663; AVX2-NEXT: vpextrb $0, %xmm0, %eax
664; AVX2-NEXT: movsbl %al, %eax
665; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
666; AVX2-NEXT: vzeroupper
667; AVX2-NEXT: retq
668 %c = icmp sgt <16 x i16> %a0, %a1
669 %s = sext <16 x i1> %c to <16 x i8>
670 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
671 %2 = and <16 x i8> %s, %1
672 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
673 %4 = and <16 x i8> %2, %3
674 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
675 %6 = and <16 x i8> %4, %5
676 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
677 %8 = and <16 x i8> %6, %7
678 %9 = extractelement <16 x i8> %8, i32 0
679 %10 = sext i8 %9 to i16
680 ret i16 %10
681}
682
683define i8 @test_v16i8(<16 x i8> %a0, <16 x i8> %a1) {
684; SSE-LABEL: test_v16i8:
685; SSE: # BB#0:
686; SSE-NEXT: pcmpgtb %xmm1, %xmm0
687; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
688; SSE-NEXT: pand %xmm0, %xmm1
689; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
690; SSE-NEXT: pand %xmm1, %xmm0
691; SSE-NEXT: movdqa %xmm0, %xmm1
692; SSE-NEXT: psrld $16, %xmm1
693; SSE-NEXT: pand %xmm0, %xmm1
694; SSE-NEXT: movdqa %xmm1, %xmm0
695; SSE-NEXT: psrlw $8, %xmm0
696; SSE-NEXT: pand %xmm1, %xmm0
697; SSE-NEXT: pextrb $0, %xmm0, %eax
698; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
699; SSE-NEXT: retq
700;
701; AVX-LABEL: test_v16i8:
702; AVX: # BB#0:
703; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
704; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
705; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
706; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
707; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
708; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
709; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
710; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
711; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
712; AVX-NEXT: vpextrb $0, %xmm0, %eax
713; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
714; AVX-NEXT: retq
715 %c = icmp sgt <16 x i8> %a0, %a1
716 %s = sext <16 x i1> %c to <16 x i8>
717 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
718 %2 = and <16 x i8> %s, %1
719 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
720 %4 = and <16 x i8> %2, %3
721 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %6 = and <16 x i8> %4, %5
723 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
724 %8 = and <16 x i8> %6, %7
725 %9 = extractelement <16 x i8> %8, i32 0
726 ret i8 %9
727}
728
729define i8 @test_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
730; SSE-LABEL: test_v32i8:
731; SSE: # BB#0:
732; SSE-NEXT: pcmpgtb %xmm3, %xmm1
733; SSE-NEXT: pcmpgtb %xmm2, %xmm0
734; SSE-NEXT: pand %xmm1, %xmm0
735; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
736; SSE-NEXT: pand %xmm0, %xmm1
737; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
738; SSE-NEXT: pand %xmm1, %xmm0
739; SSE-NEXT: movdqa %xmm0, %xmm1
740; SSE-NEXT: psrld $16, %xmm1
741; SSE-NEXT: pand %xmm0, %xmm1
742; SSE-NEXT: movdqa %xmm1, %xmm0
743; SSE-NEXT: psrlw $8, %xmm0
744; SSE-NEXT: pand %xmm1, %xmm0
745; SSE-NEXT: pextrb $0, %xmm0, %eax
746; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
747; SSE-NEXT: retq
748;
749; AVX1-LABEL: test_v32i8:
750; AVX1: # BB#0:
751; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
752; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
753; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
754; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
755; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
756; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
757; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
758; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
759; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
760; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
761; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
762; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
763; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
764; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
765; AVX1-NEXT: vpextrb $0, %xmm0, %eax
766; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
767; AVX1-NEXT: vzeroupper
768; AVX1-NEXT: retq
769;
770; AVX2-LABEL: test_v32i8:
771; AVX2: # BB#0:
772; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
773; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
774; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
775; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
776; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
777; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
778; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
779; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
780; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
781; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
782; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
783; AVX2-NEXT: vpextrb $0, %xmm0, %eax
784; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
785; AVX2-NEXT: vzeroupper
786; AVX2-NEXT: retq
787 %c = icmp sgt <32 x i8> %a0, %a1
788 %s = sext <32 x i1> %c to <32 x i8>
789 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
790 %2 = and <32 x i8> %s, %1
791 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
792 %4 = and <32 x i8> %2, %3
793 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
794 %6 = and <32 x i8> %4, %5
795 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
796 %8 = and <32 x i8> %6, %7
797 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
798 %10 = and <32 x i8> %8, %9
799 %11 = extractelement <32 x i8> %10, i32 0
800 ret i8 %11
801}