blob: 36ed86fc528f95076cce06860b7b7b78cfa7dc9a [file] [log] [blame]
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
Simon Pilgrim60662cb2017-01-17 17:33:18 +00005; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00006
7define i64 @test_v2f64(<2 x double> %a0, <2 x double> %a1) {
8; SSE-LABEL: test_v2f64:
9; SSE: # BB#0:
10; SSE-NEXT: cmpltpd %xmm0, %xmm1
11; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
12; SSE-NEXT: pand %xmm1, %xmm0
13; SSE-NEXT: movd %xmm0, %rax
14; SSE-NEXT: retq
15;
16; AVX-LABEL: test_v2f64:
17; AVX: # BB#0:
18; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
19; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
20; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
21; AVX-NEXT: vmovq %xmm0, %rax
22; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +000023;
24; AVX512-LABEL: test_v2f64:
25; AVX512: # BB#0:
26; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
27; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
30; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
31; AVX512-NEXT: vmovq %xmm0, %rax
32; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000033 %c = fcmp ogt <2 x double> %a0, %a1
34 %s = sext <2 x i1> %c to <2 x i64>
35 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
36 %2 = and <2 x i64> %s, %1
37 %3 = extractelement <2 x i64> %2, i32 0
38 ret i64 %3
39}
40
41define i64 @test_v4f64(<4 x double> %a0, <4 x double> %a1) {
42; SSE-LABEL: test_v4f64:
43; SSE: # BB#0:
44; SSE-NEXT: cmpltpd %xmm1, %xmm3
45; SSE-NEXT: cmpltpd %xmm0, %xmm2
46; SSE-NEXT: andpd %xmm3, %xmm2
47; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
48; SSE-NEXT: pand %xmm2, %xmm0
49; SSE-NEXT: movd %xmm0, %rax
50; SSE-NEXT: retq
51;
52; AVX1-LABEL: test_v4f64:
53; AVX1: # BB#0:
54; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
55; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
56; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
57; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
58; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
59; AVX1-NEXT: vmovq %xmm0, %rax
60; AVX1-NEXT: vzeroupper
61; AVX1-NEXT: retq
62;
63; AVX2-LABEL: test_v4f64:
64; AVX2: # BB#0:
65; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
66; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
67; AVX2-NEXT: vandpd %ymm1, %ymm0, %ymm0
68; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
69; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
70; AVX2-NEXT: vmovq %xmm0, %rax
71; AVX2-NEXT: vzeroupper
72; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +000073;
74; AVX512-LABEL: test_v4f64:
75; AVX512: # BB#0:
76; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
77; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
78; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
79; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
80; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
81; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
82; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
83; AVX512-NEXT: vmovq %xmm0, %rax
84; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000085 %c = fcmp ogt <4 x double> %a0, %a1
86 %s = sext <4 x i1> %c to <4 x i64>
87 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
88 %2 = and <4 x i64> %s, %1
89 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
90 %4 = and <4 x i64> %2, %3
91 %5 = extractelement <4 x i64> %4, i64 0
92 ret i64 %5
93}
94
95define i64 @test_v4f64_legal(<4 x double> %a0, <4 x double> %a1) {
96; SSE-LABEL: test_v4f64_legal:
97; SSE: # BB#0:
98; SSE-NEXT: cmpltpd %xmm1, %xmm3
99; SSE-NEXT: cmpltpd %xmm0, %xmm2
100; SSE-NEXT: packsswb %xmm3, %xmm2
101; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
102; SSE-NEXT: pand %xmm2, %xmm0
103; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
104; SSE-NEXT: pand %xmm0, %xmm1
105; SSE-NEXT: movd %xmm1, %eax
106; SSE-NEXT: cltq
107; SSE-NEXT: retq
108;
109; AVX-LABEL: test_v4f64_legal:
110; AVX: # BB#0:
111; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
112; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
113; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
114; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
115; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
116; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
117; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
118; AVX-NEXT: vmovd %xmm0, %eax
119; AVX-NEXT: cltq
120; AVX-NEXT: vzeroupper
121; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000122;
123; AVX512-LABEL: test_v4f64_legal:
124; AVX512: # BB#0:
125; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
126; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
127; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
128; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
129; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
130; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
131; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
132; AVX512-NEXT: vmovd %xmm0, %eax
133; AVX512-NEXT: cltq
134; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000135 %c = fcmp ogt <4 x double> %a0, %a1
136 %s = sext <4 x i1> %c to <4 x i32>
137 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
138 %2 = and <4 x i32> %s, %1
139 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
140 %4 = and <4 x i32> %2, %3
141 %5 = extractelement <4 x i32> %4, i64 0
142 %6 = sext i32 %5 to i64
143 ret i64 %6
144}
145
146define i32 @test_v4f32(<4 x float> %a0, <4 x float> %a1) {
147; SSE-LABEL: test_v4f32:
148; SSE: # BB#0:
149; SSE-NEXT: cmpltps %xmm0, %xmm1
150; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
151; SSE-NEXT: pand %xmm1, %xmm0
152; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
153; SSE-NEXT: pand %xmm0, %xmm1
154; SSE-NEXT: movd %xmm1, %eax
155; SSE-NEXT: retq
156;
157; AVX-LABEL: test_v4f32:
158; AVX: # BB#0:
159; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
160; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
161; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
162; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
163; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
164; AVX-NEXT: vmovd %xmm0, %eax
165; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000166;
167; AVX512-LABEL: test_v4f32:
168; AVX512: # BB#0:
169; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
170; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
171; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
172; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
173; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
174; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
175; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
176; AVX512-NEXT: vmovd %xmm0, %eax
177; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000178 %c = fcmp ogt <4 x float> %a0, %a1
179 %s = sext <4 x i1> %c to <4 x i32>
180 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
181 %2 = and <4 x i32> %s, %1
182 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
183 %4 = and <4 x i32> %2, %3
184 %5 = extractelement <4 x i32> %4, i32 0
185 ret i32 %5
186}
187
188define i32 @test_v8f32(<8 x float> %a0, <8 x float> %a1) {
189; SSE-LABEL: test_v8f32:
190; SSE: # BB#0:
191; SSE-NEXT: cmpltps %xmm1, %xmm3
192; SSE-NEXT: cmpltps %xmm0, %xmm2
193; SSE-NEXT: andps %xmm3, %xmm2
194; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
195; SSE-NEXT: pand %xmm2, %xmm0
196; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
197; SSE-NEXT: pand %xmm0, %xmm1
198; SSE-NEXT: movd %xmm1, %eax
199; SSE-NEXT: retq
200;
201; AVX1-LABEL: test_v8f32:
202; AVX1: # BB#0:
203; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
204; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
205; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
206; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
207; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
208; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
209; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
210; AVX1-NEXT: vmovd %xmm0, %eax
211; AVX1-NEXT: vzeroupper
212; AVX1-NEXT: retq
213;
214; AVX2-LABEL: test_v8f32:
215; AVX2: # BB#0:
216; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
217; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
218; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
219; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
220; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
221; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
222; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
223; AVX2-NEXT: vmovd %xmm0, %eax
224; AVX2-NEXT: vzeroupper
225; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000226;
227; AVX512-LABEL: test_v8f32:
228; AVX512: # BB#0:
229; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
230; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
231; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
232; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
233; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
234; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
235; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
236; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
237; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
238; AVX512-NEXT: vmovd %xmm0, %eax
239; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000240 %c = fcmp ogt <8 x float> %a0, %a1
241 %s = sext <8 x i1> %c to <8 x i32>
242 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
243 %2 = and <8 x i32> %s, %1
244 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
245 %4 = and <8 x i32> %2, %3
246 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
247 %6 = and <8 x i32> %4, %5
248 %7 = extractelement <8 x i32> %6, i32 0
249 ret i32 %7
250}
251
252define i32 @test_v8f32_legal(<8 x float> %a0, <8 x float> %a1) {
253; SSE-LABEL: test_v8f32_legal:
254; SSE: # BB#0:
255; SSE-NEXT: cmpltps %xmm1, %xmm3
256; SSE-NEXT: cmpltps %xmm0, %xmm2
257; SSE-NEXT: packsswb %xmm3, %xmm2
258; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
259; SSE-NEXT: pand %xmm2, %xmm0
260; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
261; SSE-NEXT: pand %xmm0, %xmm1
262; SSE-NEXT: movdqa %xmm1, %xmm0
263; SSE-NEXT: psrld $16, %xmm0
264; SSE-NEXT: pand %xmm1, %xmm0
265; SSE-NEXT: movd %xmm0, %eax
266; SSE-NEXT: cwtl
267; SSE-NEXT: retq
268;
269; AVX-LABEL: test_v8f32_legal:
270; AVX: # BB#0:
271; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
272; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
273; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
274; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
275; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
276; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
277; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
278; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
279; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
280; AVX-NEXT: vmovd %xmm0, %eax
281; AVX-NEXT: cwtl
282; AVX-NEXT: vzeroupper
283; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000284;
285; AVX512-LABEL: test_v8f32_legal:
286; AVX512: # BB#0:
287; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
288; AVX512-NEXT: vpmovm2w %k0, %xmm0
289; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
290; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
291; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
292; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
293; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
294; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
295; AVX512-NEXT: vmovd %xmm0, %eax
296; AVX512-NEXT: cwtl
297; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000298 %c = fcmp ogt <8 x float> %a0, %a1
299 %s = sext <8 x i1> %c to <8 x i16>
300 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
301 %2 = and <8 x i16> %s, %1
302 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
303 %4 = and <8 x i16> %2, %3
304 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
305 %6 = and <8 x i16> %4, %5
306 %7 = extractelement <8 x i16> %6, i32 0
307 %8 = sext i16 %7 to i32
308 ret i32 %8
309}
310
311define i64 @test_v2i64(<2 x i64> %a0, <2 x i64> %a1) {
312; SSE-LABEL: test_v2i64:
313; SSE: # BB#0:
314; SSE-NEXT: pcmpgtq %xmm1, %xmm0
315; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
316; SSE-NEXT: pand %xmm0, %xmm1
317; SSE-NEXT: movd %xmm1, %rax
318; SSE-NEXT: retq
319;
320; AVX-LABEL: test_v2i64:
321; AVX: # BB#0:
322; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
323; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
324; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
325; AVX-NEXT: vmovq %xmm0, %rax
326; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000327;
328; AVX512-LABEL: test_v2i64:
329; AVX512: # BB#0:
330; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
331; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
332; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
333; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
334; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
335; AVX512-NEXT: vmovq %xmm0, %rax
336; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000337 %c = icmp sgt <2 x i64> %a0, %a1
338 %s = sext <2 x i1> %c to <2 x i64>
339 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
340 %2 = and <2 x i64> %s, %1
341 %3 = extractelement <2 x i64> %2, i32 0
342 ret i64 %3
343}
344
345define i64 @test_v4i64(<4 x i64> %a0, <4 x i64> %a1) {
346; SSE-LABEL: test_v4i64:
347; SSE: # BB#0:
348; SSE-NEXT: pcmpgtq %xmm3, %xmm1
349; SSE-NEXT: pcmpgtq %xmm2, %xmm0
350; SSE-NEXT: pand %xmm1, %xmm0
351; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
352; SSE-NEXT: pand %xmm0, %xmm1
353; SSE-NEXT: movd %xmm1, %rax
354; SSE-NEXT: retq
355;
356; AVX1-LABEL: test_v4i64:
357; AVX1: # BB#0:
358; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
359; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
360; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
361; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
362; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
363; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
364; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
365; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
366; AVX1-NEXT: vmovq %xmm0, %rax
367; AVX1-NEXT: vzeroupper
368; AVX1-NEXT: retq
369;
370; AVX2-LABEL: test_v4i64:
371; AVX2: # BB#0:
372; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
373; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
374; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
375; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
376; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
377; AVX2-NEXT: vmovq %xmm0, %rax
378; AVX2-NEXT: vzeroupper
379; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000380;
381; AVX512-LABEL: test_v4i64:
382; AVX512: # BB#0:
383; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
384; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
385; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
386; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
387; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
388; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
389; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
390; AVX512-NEXT: vmovq %xmm0, %rax
391; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000392 %c = icmp sgt <4 x i64> %a0, %a1
393 %s = sext <4 x i1> %c to <4 x i64>
394 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
395 %2 = and <4 x i64> %s, %1
396 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
397 %4 = and <4 x i64> %2, %3
398 %5 = extractelement <4 x i64> %4, i64 0
399 ret i64 %5
400}
401
402define i64 @test_v4i64_legal(<4 x i64> %a0, <4 x i64> %a1) {
403; SSE-LABEL: test_v4i64_legal:
404; SSE: # BB#0:
405; SSE-NEXT: pcmpgtq %xmm3, %xmm1
406; SSE-NEXT: pcmpgtq %xmm2, %xmm0
407; SSE-NEXT: packsswb %xmm1, %xmm0
408; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
409; SSE-NEXT: pand %xmm0, %xmm1
410; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
411; SSE-NEXT: pand %xmm1, %xmm0
412; SSE-NEXT: movd %xmm0, %eax
413; SSE-NEXT: cltq
414; SSE-NEXT: retq
415;
416; AVX1-LABEL: test_v4i64_legal:
417; AVX1: # BB#0:
418; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
419; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
420; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
421; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
422; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
423; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
424; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
425; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
426; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
427; AVX1-NEXT: vmovd %xmm0, %eax
428; AVX1-NEXT: cltq
429; AVX1-NEXT: vzeroupper
430; AVX1-NEXT: retq
431;
432; AVX2-LABEL: test_v4i64_legal:
433; AVX2: # BB#0:
434; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
435; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
436; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
437; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
438; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
439; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
440; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
441; AVX2-NEXT: vmovd %xmm0, %eax
442; AVX2-NEXT: cltq
443; AVX2-NEXT: vzeroupper
444; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000445;
446; AVX512-LABEL: test_v4i64_legal:
447; AVX512: # BB#0:
448; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
449; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
450; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
451; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
452; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
453; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
454; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
455; AVX512-NEXT: vmovd %xmm0, %eax
456; AVX512-NEXT: cltq
457; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000458 %c = icmp sgt <4 x i64> %a0, %a1
459 %s = sext <4 x i1> %c to <4 x i32>
460 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
461 %2 = and <4 x i32> %s, %1
462 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
463 %4 = and <4 x i32> %2, %3
464 %5 = extractelement <4 x i32> %4, i64 0
465 %6 = sext i32 %5 to i64
466 ret i64 %6
467}
468
469define i32 @test_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
470; SSE-LABEL: test_v4i32:
471; SSE: # BB#0:
472; SSE-NEXT: pcmpgtd %xmm1, %xmm0
473; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
474; SSE-NEXT: pand %xmm0, %xmm1
475; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
476; SSE-NEXT: pand %xmm1, %xmm0
477; SSE-NEXT: movd %xmm0, %eax
478; SSE-NEXT: retq
479;
480; AVX-LABEL: test_v4i32:
481; AVX: # BB#0:
482; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
483; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
484; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
485; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
486; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
487; AVX-NEXT: vmovd %xmm0, %eax
488; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000489;
490; AVX512-LABEL: test_v4i32:
491; AVX512: # BB#0:
492; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
493; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
494; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
495; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
496; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
497; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
498; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
499; AVX512-NEXT: vmovd %xmm0, %eax
500; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000501 %c = icmp sgt <4 x i32> %a0, %a1
502 %s = sext <4 x i1> %c to <4 x i32>
503 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
504 %2 = and <4 x i32> %s, %1
505 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
506 %4 = and <4 x i32> %2, %3
507 %5 = extractelement <4 x i32> %4, i32 0
508 ret i32 %5
509}
510
511define i32 @test_v8i32(<8 x i32> %a0, <8 x i32> %a1) {
512; SSE-LABEL: test_v8i32:
513; SSE: # BB#0:
514; SSE-NEXT: pcmpgtd %xmm3, %xmm1
515; SSE-NEXT: pcmpgtd %xmm2, %xmm0
516; SSE-NEXT: pand %xmm1, %xmm0
517; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
518; SSE-NEXT: pand %xmm0, %xmm1
519; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
520; SSE-NEXT: pand %xmm1, %xmm0
521; SSE-NEXT: movd %xmm0, %eax
522; SSE-NEXT: retq
523;
524; AVX1-LABEL: test_v8i32:
525; AVX1: # BB#0:
526; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
527; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
528; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
529; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
530; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
531; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
532; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
533; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
534; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
535; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
536; AVX1-NEXT: vmovd %xmm0, %eax
537; AVX1-NEXT: vzeroupper
538; AVX1-NEXT: retq
539;
540; AVX2-LABEL: test_v8i32:
541; AVX2: # BB#0:
542; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
543; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
544; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
545; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
546; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
547; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
548; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
549; AVX2-NEXT: vmovd %xmm0, %eax
550; AVX2-NEXT: vzeroupper
551; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000552;
553; AVX512-LABEL: test_v8i32:
554; AVX512: # BB#0:
555; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
556; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
557; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
558; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
559; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
560; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
561; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
562; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
563; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
564; AVX512-NEXT: vmovd %xmm0, %eax
565; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000566 %c = icmp sgt <8 x i32> %a0, %a1
567 %s = sext <8 x i1> %c to <8 x i32>
568 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
569 %2 = and <8 x i32> %s, %1
570 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
571 %4 = and <8 x i32> %2, %3
572 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
573 %6 = and <8 x i32> %4, %5
574 %7 = extractelement <8 x i32> %6, i32 0
575 ret i32 %7
576}
577
578define i32 @test_v8i32_legal(<8 x i32> %a0, <8 x i32> %a1) {
579; SSE-LABEL: test_v8i32_legal:
580; SSE: # BB#0:
581; SSE-NEXT: pcmpgtd %xmm3, %xmm1
582; SSE-NEXT: pcmpgtd %xmm2, %xmm0
583; SSE-NEXT: packsswb %xmm1, %xmm0
584; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
585; SSE-NEXT: pand %xmm0, %xmm1
586; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
587; SSE-NEXT: pand %xmm1, %xmm0
588; SSE-NEXT: movdqa %xmm0, %xmm1
589; SSE-NEXT: psrld $16, %xmm1
590; SSE-NEXT: pand %xmm0, %xmm1
591; SSE-NEXT: movd %xmm1, %eax
592; SSE-NEXT: cwtl
593; SSE-NEXT: retq
594;
595; AVX1-LABEL: test_v8i32_legal:
596; AVX1: # BB#0:
597; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
598; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
599; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
600; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
601; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
602; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
603; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
604; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
605; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
606; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
607; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
608; AVX1-NEXT: vmovd %xmm0, %eax
609; AVX1-NEXT: cwtl
610; AVX1-NEXT: vzeroupper
611; AVX1-NEXT: retq
612;
613; AVX2-LABEL: test_v8i32_legal:
614; AVX2: # BB#0:
615; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
616; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
617; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
618; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
619; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
620; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
621; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
622; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
623; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
624; AVX2-NEXT: vmovd %xmm0, %eax
625; AVX2-NEXT: cwtl
626; AVX2-NEXT: vzeroupper
627; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000628;
629; AVX512-LABEL: test_v8i32_legal:
630; AVX512: # BB#0:
631; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
632; AVX512-NEXT: vpmovm2w %k0, %xmm0
633; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
634; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
635; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
636; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
637; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
638; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
639; AVX512-NEXT: vmovd %xmm0, %eax
640; AVX512-NEXT: cwtl
641; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000642 %c = icmp sgt <8 x i32> %a0, %a1
643 %s = sext <8 x i1> %c to <8 x i16>
644 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
645 %2 = and <8 x i16> %s, %1
646 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
647 %4 = and <8 x i16> %2, %3
648 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
649 %6 = and <8 x i16> %4, %5
650 %7 = extractelement <8 x i16> %6, i32 0
651 %8 = sext i16 %7 to i32
652 ret i32 %8
653}
654
655define i16 @test_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
656; SSE-LABEL: test_v8i16:
657; SSE: # BB#0:
658; SSE-NEXT: pcmpgtw %xmm1, %xmm0
659; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
660; SSE-NEXT: pand %xmm0, %xmm1
661; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
662; SSE-NEXT: pand %xmm1, %xmm0
663; SSE-NEXT: movdqa %xmm0, %xmm1
664; SSE-NEXT: psrld $16, %xmm1
665; SSE-NEXT: pand %xmm0, %xmm1
666; SSE-NEXT: movd %xmm1, %eax
667; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
668; SSE-NEXT: retq
669;
670; AVX-LABEL: test_v8i16:
671; AVX: # BB#0:
672; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
673; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
674; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
675; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
676; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
677; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
678; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
679; AVX-NEXT: vmovd %xmm0, %eax
680; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
681; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000682;
683; AVX512-LABEL: test_v8i16:
684; AVX512: # BB#0:
685; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
686; AVX512-NEXT: vpmovm2w %k0, %xmm0
687; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
688; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
689; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
690; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
691; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
692; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
693; AVX512-NEXT: vmovd %xmm0, %eax
694; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
695; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000696 %c = icmp sgt <8 x i16> %a0, %a1
697 %s = sext <8 x i1> %c to <8 x i16>
698 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
699 %2 = and <8 x i16> %s, %1
700 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
701 %4 = and <8 x i16> %2, %3
702 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
703 %6 = and <8 x i16> %4, %5
704 %7 = extractelement <8 x i16> %6, i32 0
705 ret i16 %7
706}
707
708define i16 @test_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
709; SSE-LABEL: test_v16i16:
710; SSE: # BB#0:
711; SSE-NEXT: pcmpgtw %xmm3, %xmm1
712; SSE-NEXT: pcmpgtw %xmm2, %xmm0
713; SSE-NEXT: pand %xmm1, %xmm0
714; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
715; SSE-NEXT: pand %xmm0, %xmm1
716; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
717; SSE-NEXT: pand %xmm1, %xmm0
718; SSE-NEXT: movdqa %xmm0, %xmm1
719; SSE-NEXT: psrld $16, %xmm1
720; SSE-NEXT: pand %xmm0, %xmm1
721; SSE-NEXT: movd %xmm1, %eax
722; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
723; SSE-NEXT: retq
724;
725; AVX1-LABEL: test_v16i16:
726; AVX1: # BB#0:
727; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
728; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
729; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
730; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
731; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
732; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
733; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
734; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
735; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
736; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
737; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
738; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
739; AVX1-NEXT: vmovd %xmm0, %eax
740; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
741; AVX1-NEXT: vzeroupper
742; AVX1-NEXT: retq
743;
744; AVX2-LABEL: test_v16i16:
745; AVX2: # BB#0:
746; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
747; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
748; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
749; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
750; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
751; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
752; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
753; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
754; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
755; AVX2-NEXT: vmovd %xmm0, %eax
756; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
757; AVX2-NEXT: vzeroupper
758; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000759;
760; AVX512-LABEL: test_v16i16:
761; AVX512: # BB#0:
762; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
763; AVX512-NEXT: vpmovm2w %k0, %ymm0
764; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
765; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
766; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
767; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
768; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
769; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
770; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
771; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
772; AVX512-NEXT: vmovd %xmm0, %eax
773; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
774; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000775 %c = icmp sgt <16 x i16> %a0, %a1
776 %s = sext <16 x i1> %c to <16 x i16>
777 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
778 %2 = and <16 x i16> %s, %1
779 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
780 %4 = and <16 x i16> %2, %3
781 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
782 %6 = and <16 x i16> %4, %5
783 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
784 %8 = and <16 x i16> %6, %7
785 %9 = extractelement <16 x i16> %8, i32 0
786 ret i16 %9
787}
788
789define i16 @test_v16i16_legal(<16 x i16> %a0, <16 x i16> %a1) {
790; SSE-LABEL: test_v16i16_legal:
791; SSE: # BB#0:
792; SSE-NEXT: pcmpgtw %xmm3, %xmm1
793; SSE-NEXT: pcmpgtw %xmm2, %xmm0
794; SSE-NEXT: packsswb %xmm1, %xmm0
795; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
796; SSE-NEXT: pand %xmm0, %xmm1
797; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
798; SSE-NEXT: pand %xmm1, %xmm0
799; SSE-NEXT: movdqa %xmm0, %xmm1
800; SSE-NEXT: psrld $16, %xmm1
801; SSE-NEXT: pand %xmm0, %xmm1
802; SSE-NEXT: movdqa %xmm1, %xmm0
803; SSE-NEXT: psrlw $8, %xmm0
804; SSE-NEXT: pand %xmm1, %xmm0
805; SSE-NEXT: pextrb $0, %xmm0, %eax
806; SSE-NEXT: movsbl %al, %eax
807; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
808; SSE-NEXT: retq
809;
810; AVX1-LABEL: test_v16i16_legal:
811; AVX1: # BB#0:
812; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
813; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
814; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
815; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
816; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
817; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
818; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
819; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
820; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
821; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
822; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
823; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
824; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
825; AVX1-NEXT: vpextrb $0, %xmm0, %eax
826; AVX1-NEXT: movsbl %al, %eax
827; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
828; AVX1-NEXT: vzeroupper
829; AVX1-NEXT: retq
830;
831; AVX2-LABEL: test_v16i16_legal:
832; AVX2: # BB#0:
833; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
834; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
835; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
836; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
837; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
838; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
839; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
840; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
841; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
842; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
843; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
844; AVX2-NEXT: vpextrb $0, %xmm0, %eax
845; AVX2-NEXT: movsbl %al, %eax
846; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
847; AVX2-NEXT: vzeroupper
848; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000849;
850; AVX512-LABEL: test_v16i16_legal:
851; AVX512: # BB#0:
852; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
853; AVX512-NEXT: vpmovm2b %k0, %xmm0
854; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
855; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
856; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
857; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
858; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
859; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
860; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
861; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
862; AVX512-NEXT: vpextrb $0, %xmm0, %eax
863; AVX512-NEXT: movsbl %al, %eax
864; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
865; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000866 %c = icmp sgt <16 x i16> %a0, %a1
867 %s = sext <16 x i1> %c to <16 x i8>
868 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
869 %2 = and <16 x i8> %s, %1
870 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
871 %4 = and <16 x i8> %2, %3
872 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
873 %6 = and <16 x i8> %4, %5
874 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
875 %8 = and <16 x i8> %6, %7
876 %9 = extractelement <16 x i8> %8, i32 0
877 %10 = sext i8 %9 to i16
878 ret i16 %10
879}
880
881define i8 @test_v16i8(<16 x i8> %a0, <16 x i8> %a1) {
882; SSE-LABEL: test_v16i8:
883; SSE: # BB#0:
884; SSE-NEXT: pcmpgtb %xmm1, %xmm0
885; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
886; SSE-NEXT: pand %xmm0, %xmm1
887; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
888; SSE-NEXT: pand %xmm1, %xmm0
889; SSE-NEXT: movdqa %xmm0, %xmm1
890; SSE-NEXT: psrld $16, %xmm1
891; SSE-NEXT: pand %xmm0, %xmm1
892; SSE-NEXT: movdqa %xmm1, %xmm0
893; SSE-NEXT: psrlw $8, %xmm0
894; SSE-NEXT: pand %xmm1, %xmm0
895; SSE-NEXT: pextrb $0, %xmm0, %eax
896; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
897; SSE-NEXT: retq
898;
899; AVX-LABEL: test_v16i8:
900; AVX: # BB#0:
901; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
902; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
903; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
904; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
905; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
906; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
907; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
908; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
909; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
910; AVX-NEXT: vpextrb $0, %xmm0, %eax
911; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
912; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000913;
914; AVX512-LABEL: test_v16i8:
915; AVX512: # BB#0:
916; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
917; AVX512-NEXT: vpmovm2b %k0, %xmm0
918; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
919; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
920; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
921; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
922; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
923; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
924; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
925; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
926; AVX512-NEXT: vpextrb $0, %xmm0, %eax
927; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
928; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000929 %c = icmp sgt <16 x i8> %a0, %a1
930 %s = sext <16 x i1> %c to <16 x i8>
931 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
932 %2 = and <16 x i8> %s, %1
933 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
934 %4 = and <16 x i8> %2, %3
935 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
936 %6 = and <16 x i8> %4, %5
937 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
938 %8 = and <16 x i8> %6, %7
939 %9 = extractelement <16 x i8> %8, i32 0
940 ret i8 %9
941}
942
943define i8 @test_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
944; SSE-LABEL: test_v32i8:
945; SSE: # BB#0:
946; SSE-NEXT: pcmpgtb %xmm3, %xmm1
947; SSE-NEXT: pcmpgtb %xmm2, %xmm0
948; SSE-NEXT: pand %xmm1, %xmm0
949; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
950; SSE-NEXT: pand %xmm0, %xmm1
951; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
952; SSE-NEXT: pand %xmm1, %xmm0
953; SSE-NEXT: movdqa %xmm0, %xmm1
954; SSE-NEXT: psrld $16, %xmm1
955; SSE-NEXT: pand %xmm0, %xmm1
956; SSE-NEXT: movdqa %xmm1, %xmm0
957; SSE-NEXT: psrlw $8, %xmm0
958; SSE-NEXT: pand %xmm1, %xmm0
959; SSE-NEXT: pextrb $0, %xmm0, %eax
960; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
961; SSE-NEXT: retq
962;
963; AVX1-LABEL: test_v32i8:
964; AVX1: # BB#0:
965; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
966; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
967; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
968; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
969; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
970; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
971; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
972; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
973; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
974; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
975; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
976; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
977; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
978; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
979; AVX1-NEXT: vpextrb $0, %xmm0, %eax
980; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
981; AVX1-NEXT: vzeroupper
982; AVX1-NEXT: retq
983;
984; AVX2-LABEL: test_v32i8:
985; AVX2: # BB#0:
986; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
987; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
988; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
989; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
990; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
991; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
992; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
993; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
994; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
995; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
996; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
997; AVX2-NEXT: vpextrb $0, %xmm0, %eax
998; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
999; AVX2-NEXT: vzeroupper
1000; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +00001001;
1002; AVX512-LABEL: test_v32i8:
1003; AVX512: # BB#0:
1004; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
1005; AVX512-NEXT: vpmovm2b %k0, %ymm0
1006; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1007; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1008; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1009; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1010; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1011; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1012; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1013; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1014; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1015; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
1016; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1017; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
1018; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00001019 %c = icmp sgt <32 x i8> %a0, %a1
1020 %s = sext <32 x i1> %c to <32 x i8>
1021 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1022 %2 = and <32 x i8> %s, %1
1023 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1024 %4 = and <32 x i8> %2, %3
1025 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1026 %6 = and <32 x i8> %4, %5
1027 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1028 %8 = and <32 x i8> %6, %7
1029 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1030 %10 = and <32 x i8> %8, %9
1031 %11 = extractelement <32 x i8> %10, i32 0
1032 ret i8 %11
1033}