blob: d5efd1e78b4835601eca4c4b91c28e198aee2364 [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
11; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
12; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
14; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
15; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
18; SSE-NEXT: pslld $31, %xmm2
19; SSE-NEXT: psrad $31, %xmm2
20; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
21; SSE-NEXT: pshufb %xmm3, %xmm2
22; SSE-NEXT: pcmpgtq %xmm5, %xmm1
23; SSE-NEXT: pcmpgtq %xmm4, %xmm0
24; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
25; SSE-NEXT: pslld $31, %xmm0
26; SSE-NEXT: psrad $31, %xmm0
27; SSE-NEXT: pshufb %xmm3, %xmm0
28; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
29; SSE-NEXT: psllw $15, %xmm0
30; SSE-NEXT: psraw $15, %xmm0
31; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
32; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
33; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2],xmm11[0,2]
34; SSE-NEXT: pslld $31, %xmm9
35; SSE-NEXT: psrad $31, %xmm9
36; SSE-NEXT: pshufb %xmm3, %xmm9
37; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
38; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
39; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm10[0,2]
40; SSE-NEXT: pslld $31, %xmm8
41; SSE-NEXT: psrad $31, %xmm8
42; SSE-NEXT: pshufb %xmm3, %xmm8
43; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
44; SSE-NEXT: psllw $15, %xmm8
45; SSE-NEXT: psraw $15, %xmm8
46; SSE-NEXT: pand %xmm0, %xmm8
47; SSE-NEXT: pshufb {{.*#+}} xmm8 = xmm8[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
48; SSE-NEXT: pmovmskb %xmm8, %eax
49; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
50; SSE-NEXT: retq
51;
52; AVX1-LABEL: v8i64:
53; AVX1: # BB#0:
54; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
55; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
56; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
57; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
58; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
59; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
60; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
61; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
62; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
63; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
64; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
65; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
66; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
67; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
68; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
69; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
70; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
71; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
72; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
73; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
74; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
75; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
76; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
77; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
78; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
79; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
80; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
81; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
82; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
83; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
84; AVX1-NEXT: vpsraw $15, %xmm1, %xmm1
85; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
86; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
87; AVX1-NEXT: vpmovmskb %xmm0, %eax
88; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
89; AVX1-NEXT: vzeroupper
90; AVX1-NEXT: retq
91;
92; AVX2-LABEL: v8i64:
93; AVX2: # BB#0:
94; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
95; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
96; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
97; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
98; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
99; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
100; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
101; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
102; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
103; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
104; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
105; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
106; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
107; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
108; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
109; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
110; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
111; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
112; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
113; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
114; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
115; AVX2-NEXT: vpsllw $15, %xmm1, %xmm1
116; AVX2-NEXT: vpsraw $15, %xmm1, %xmm1
117; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
118; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
119; AVX2-NEXT: vpmovmskb %xmm0, %eax
120; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
121; AVX2-NEXT: vzeroupper
122; AVX2-NEXT: retq
123;
124; AVX512F-LABEL: v8i64:
125; AVX512F: # BB#0:
126; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
127; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
128; AVX512F-NEXT: kmovw %k0, %eax
129; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
130; AVX512F-NEXT: vzeroupper
131; AVX512F-NEXT: retq
132;
133; AVX512BW-LABEL: v8i64:
134; AVX512BW: # BB#0:
135; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
136; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
137; AVX512BW-NEXT: kmovd %k0, %eax
138; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
139; AVX512BW-NEXT: vzeroupper
140; AVX512BW-NEXT: retq
141 %x0 = icmp sgt <8 x i64> %a, %b
142 %x1 = icmp sgt <8 x i64> %c, %d
143 %y = and <8 x i1> %x0, %x1
144 %res = bitcast <8 x i1> %y to i8
145 ret i8 %res
146}
147
148define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
149; SSE-LABEL: v8f64:
150; SSE: # BB#0:
151; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
152; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
153; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
154; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
155; SSE-NEXT: cmpltpd %xmm3, %xmm7
156; SSE-NEXT: cmpltpd %xmm2, %xmm6
157; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
158; SSE-NEXT: pslld $31, %xmm6
159; SSE-NEXT: psrad $31, %xmm6
160; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
161; SSE-NEXT: pshufb %xmm2, %xmm6
162; SSE-NEXT: cmpltpd %xmm1, %xmm5
163; SSE-NEXT: cmpltpd %xmm0, %xmm4
164; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2]
165; SSE-NEXT: pslld $31, %xmm4
166; SSE-NEXT: psrad $31, %xmm4
167; SSE-NEXT: pshufb %xmm2, %xmm4
168; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
169; SSE-NEXT: psllw $15, %xmm4
170; SSE-NEXT: psraw $15, %xmm4
171; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
172; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
173; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2],xmm11[0,2]
174; SSE-NEXT: pslld $31, %xmm9
175; SSE-NEXT: psrad $31, %xmm9
176; SSE-NEXT: pshufb %xmm2, %xmm9
177; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
178; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
179; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm10[0,2]
180; SSE-NEXT: pslld $31, %xmm8
181; SSE-NEXT: psrad $31, %xmm8
182; SSE-NEXT: pshufb %xmm2, %xmm8
183; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
184; SSE-NEXT: psllw $15, %xmm8
185; SSE-NEXT: psraw $15, %xmm8
186; SSE-NEXT: pand %xmm4, %xmm8
187; SSE-NEXT: pshufb {{.*#+}} xmm8 = xmm8[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
188; SSE-NEXT: pmovmskb %xmm8, %eax
189; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
190; SSE-NEXT: retq
191;
192; AVX12-LABEL: v8f64:
193; AVX12: # BB#0:
194; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
195; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
196; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
197; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
198; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
199; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
200; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
201; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
202; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
203; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
204; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
205; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
206; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
207; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
208; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
209; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
210; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
211; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
212; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
213; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
214; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
215; AVX12-NEXT: vpsllw $15, %xmm1, %xmm1
216; AVX12-NEXT: vpsraw $15, %xmm1, %xmm1
217; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
218; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
219; AVX12-NEXT: vpmovmskb %xmm0, %eax
220; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
221; AVX12-NEXT: vzeroupper
222; AVX12-NEXT: retq
223;
224; AVX512F-LABEL: v8f64:
225; AVX512F: # BB#0:
226; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
227; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
228; AVX512F-NEXT: kmovw %k0, %eax
229; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
230; AVX512F-NEXT: vzeroupper
231; AVX512F-NEXT: retq
232;
233; AVX512BW-LABEL: v8f64:
234; AVX512BW: # BB#0:
235; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
236; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
237; AVX512BW-NEXT: kmovd %k0, %eax
238; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
239; AVX512BW-NEXT: vzeroupper
240; AVX512BW-NEXT: retq
241 %x0 = fcmp ogt <8 x double> %a, %b
242 %x1 = fcmp ogt <8 x double> %c, %d
243 %y = and <8 x i1> %x0, %x1
244 %res = bitcast <8 x i1> %y to i8
245 ret i8 %res
246}
247
248define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
249; SSE-LABEL: v32i16:
250; SSE: # BB#0:
251; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
252; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
253; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
254; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
255; SSE-NEXT: pcmpgtw %xmm5, %xmm1
256; SSE-NEXT: movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
257; SSE-NEXT: pshufb %xmm5, %xmm1
258; SSE-NEXT: pcmpgtw %xmm4, %xmm0
259; SSE-NEXT: pshufb %xmm5, %xmm0
260; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
261; SSE-NEXT: psllw $7, %xmm0
262; SSE-NEXT: movdqa {{.*#+}} xmm12 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
263; SSE-NEXT: pand %xmm12, %xmm0
264; SSE-NEXT: pxor %xmm1, %xmm1
265; SSE-NEXT: pxor %xmm4, %xmm4
266; SSE-NEXT: pcmpgtb %xmm0, %xmm4
267; SSE-NEXT: pcmpgtw %xmm7, %xmm3
268; SSE-NEXT: pshufb %xmm5, %xmm3
269; SSE-NEXT: pcmpgtw %xmm6, %xmm2
270; SSE-NEXT: pshufb %xmm5, %xmm2
271; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
272; SSE-NEXT: psllw $7, %xmm2
273; SSE-NEXT: pand %xmm12, %xmm2
274; SSE-NEXT: pxor %xmm0, %xmm0
275; SSE-NEXT: pcmpgtb %xmm2, %xmm0
276; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
277; SSE-NEXT: pshufb %xmm5, %xmm11
278; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
279; SSE-NEXT: pshufb %xmm5, %xmm10
280; SSE-NEXT: punpcklqdq {{.*#+}} xmm10 = xmm10[0],xmm11[0]
281; SSE-NEXT: psllw $7, %xmm10
282; SSE-NEXT: pand %xmm12, %xmm10
283; SSE-NEXT: pxor %xmm2, %xmm2
284; SSE-NEXT: pcmpgtb %xmm10, %xmm2
285; SSE-NEXT: pand %xmm4, %xmm2
286; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
287; SSE-NEXT: pshufb %xmm5, %xmm9
288; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
289; SSE-NEXT: pshufb %xmm5, %xmm8
290; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
291; SSE-NEXT: psllw $7, %xmm8
292; SSE-NEXT: pand %xmm12, %xmm8
293; SSE-NEXT: pcmpgtb %xmm8, %xmm1
294; SSE-NEXT: pand %xmm0, %xmm1
295; SSE-NEXT: pmovmskb %xmm2, %ecx
296; SSE-NEXT: pmovmskb %xmm1, %eax
297; SSE-NEXT: shll $16, %eax
298; SSE-NEXT: orl %ecx, %eax
299; SSE-NEXT: retq
300;
301; AVX1-LABEL: v32i16:
302; AVX1: # BB#0:
303; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
304; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
305; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
306; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
307; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
308; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
309; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
310; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
311; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
312; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
313; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
314; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
315; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
316; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
317; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
318; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
319; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
320; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
321; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
322; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
323; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
324; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
325; AVX1-NEXT: vpmovmskb %xmm0, %ecx
326; AVX1-NEXT: vpmovmskb %xmm1, %eax
327; AVX1-NEXT: shll $16, %eax
328; AVX1-NEXT: orl %ecx, %eax
329; AVX1-NEXT: vzeroupper
330; AVX1-NEXT: retq
331;
332; AVX2-LABEL: v32i16:
333; AVX2: # BB#0:
334; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
335; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
336; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
337; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
338; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
339; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
340; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
341; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
342; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
343; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
344; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
345; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
346; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
347; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
348; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
349; AVX2-NEXT: vpmovmskb %ymm0, %eax
350; AVX2-NEXT: vzeroupper
351; AVX2-NEXT: retq
352;
353; AVX512F-LABEL: v32i16:
354; AVX512F: # BB#0:
355; AVX512F-NEXT: pushq %rbp
356; AVX512F-NEXT: .Lcfi0:
357; AVX512F-NEXT: .cfi_def_cfa_offset 16
358; AVX512F-NEXT: .Lcfi1:
359; AVX512F-NEXT: .cfi_offset %rbp, -16
360; AVX512F-NEXT: movq %rsp, %rbp
361; AVX512F-NEXT: .Lcfi2:
362; AVX512F-NEXT: .cfi_def_cfa_register %rbp
363; AVX512F-NEXT: andq $-32, %rsp
364; AVX512F-NEXT: subq $32, %rsp
365; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
366; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
367; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
368; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
369; AVX512F-NEXT: kshiftlw $14, %k0, %k1
370; AVX512F-NEXT: kshiftrw $15, %k1, %k1
371; AVX512F-NEXT: kmovw %k1, %eax
372; AVX512F-NEXT: kshiftlw $15, %k0, %k1
373; AVX512F-NEXT: kshiftrw $15, %k1, %k1
374; AVX512F-NEXT: kmovw %k1, %ecx
375; AVX512F-NEXT: vmovd %ecx, %xmm1
376; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
377; AVX512F-NEXT: kshiftlw $13, %k0, %k1
378; AVX512F-NEXT: kshiftrw $15, %k1, %k1
379; AVX512F-NEXT: kmovw %k1, %eax
380; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
381; AVX512F-NEXT: kshiftlw $12, %k0, %k1
382; AVX512F-NEXT: kshiftrw $15, %k1, %k1
383; AVX512F-NEXT: kmovw %k1, %eax
384; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
385; AVX512F-NEXT: kshiftlw $11, %k0, %k1
386; AVX512F-NEXT: kshiftrw $15, %k1, %k1
387; AVX512F-NEXT: kmovw %k1, %eax
388; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
389; AVX512F-NEXT: kshiftlw $10, %k0, %k1
390; AVX512F-NEXT: kshiftrw $15, %k1, %k1
391; AVX512F-NEXT: kmovw %k1, %eax
392; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
393; AVX512F-NEXT: kshiftlw $9, %k0, %k1
394; AVX512F-NEXT: kshiftrw $15, %k1, %k1
395; AVX512F-NEXT: kmovw %k1, %eax
396; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
397; AVX512F-NEXT: kshiftlw $8, %k0, %k1
398; AVX512F-NEXT: kshiftrw $15, %k1, %k1
399; AVX512F-NEXT: kmovw %k1, %eax
400; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
401; AVX512F-NEXT: kshiftlw $7, %k0, %k1
402; AVX512F-NEXT: kshiftrw $15, %k1, %k1
403; AVX512F-NEXT: kmovw %k1, %eax
404; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
405; AVX512F-NEXT: kshiftlw $6, %k0, %k1
406; AVX512F-NEXT: kshiftrw $15, %k1, %k1
407; AVX512F-NEXT: kmovw %k1, %eax
408; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
409; AVX512F-NEXT: kshiftlw $5, %k0, %k1
410; AVX512F-NEXT: kshiftrw $15, %k1, %k1
411; AVX512F-NEXT: kmovw %k1, %eax
412; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
413; AVX512F-NEXT: kshiftlw $4, %k0, %k1
414; AVX512F-NEXT: kshiftrw $15, %k1, %k1
415; AVX512F-NEXT: kmovw %k1, %eax
416; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
417; AVX512F-NEXT: kshiftlw $3, %k0, %k1
418; AVX512F-NEXT: kshiftrw $15, %k1, %k1
419; AVX512F-NEXT: kmovw %k1, %eax
420; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
421; AVX512F-NEXT: kshiftlw $2, %k0, %k1
422; AVX512F-NEXT: kshiftrw $15, %k1, %k1
423; AVX512F-NEXT: kmovw %k1, %eax
424; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
425; AVX512F-NEXT: kshiftlw $1, %k0, %k1
426; AVX512F-NEXT: kshiftrw $15, %k1, %k1
427; AVX512F-NEXT: kmovw %k1, %eax
428; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
429; AVX512F-NEXT: kshiftrw $15, %k0, %k0
430; AVX512F-NEXT: kmovw %k0, %eax
431; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
432; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
433; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
434; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
435; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
436; AVX512F-NEXT: kshiftlw $14, %k0, %k1
437; AVX512F-NEXT: kshiftrw $15, %k1, %k1
438; AVX512F-NEXT: kmovw %k1, %eax
439; AVX512F-NEXT: kshiftlw $15, %k0, %k1
440; AVX512F-NEXT: kshiftrw $15, %k1, %k1
441; AVX512F-NEXT: kmovw %k1, %ecx
442; AVX512F-NEXT: vmovd %ecx, %xmm0
443; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
444; AVX512F-NEXT: kshiftlw $13, %k0, %k1
445; AVX512F-NEXT: kshiftrw $15, %k1, %k1
446; AVX512F-NEXT: kmovw %k1, %eax
447; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
448; AVX512F-NEXT: kshiftlw $12, %k0, %k1
449; AVX512F-NEXT: kshiftrw $15, %k1, %k1
450; AVX512F-NEXT: kmovw %k1, %eax
451; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
452; AVX512F-NEXT: kshiftlw $11, %k0, %k1
453; AVX512F-NEXT: kshiftrw $15, %k1, %k1
454; AVX512F-NEXT: kmovw %k1, %eax
455; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
456; AVX512F-NEXT: kshiftlw $10, %k0, %k1
457; AVX512F-NEXT: kshiftrw $15, %k1, %k1
458; AVX512F-NEXT: kmovw %k1, %eax
459; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
460; AVX512F-NEXT: kshiftlw $9, %k0, %k1
461; AVX512F-NEXT: kshiftrw $15, %k1, %k1
462; AVX512F-NEXT: kmovw %k1, %eax
463; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
464; AVX512F-NEXT: kshiftlw $8, %k0, %k1
465; AVX512F-NEXT: kshiftrw $15, %k1, %k1
466; AVX512F-NEXT: kmovw %k1, %eax
467; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
468; AVX512F-NEXT: kshiftlw $7, %k0, %k1
469; AVX512F-NEXT: kshiftrw $15, %k1, %k1
470; AVX512F-NEXT: kmovw %k1, %eax
471; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
472; AVX512F-NEXT: kshiftlw $6, %k0, %k1
473; AVX512F-NEXT: kshiftrw $15, %k1, %k1
474; AVX512F-NEXT: kmovw %k1, %eax
475; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
476; AVX512F-NEXT: kshiftlw $5, %k0, %k1
477; AVX512F-NEXT: kshiftrw $15, %k1, %k1
478; AVX512F-NEXT: kmovw %k1, %eax
479; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
480; AVX512F-NEXT: kshiftlw $4, %k0, %k1
481; AVX512F-NEXT: kshiftrw $15, %k1, %k1
482; AVX512F-NEXT: kmovw %k1, %eax
483; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
484; AVX512F-NEXT: kshiftlw $3, %k0, %k1
485; AVX512F-NEXT: kshiftrw $15, %k1, %k1
486; AVX512F-NEXT: kmovw %k1, %eax
487; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
488; AVX512F-NEXT: kshiftlw $2, %k0, %k1
489; AVX512F-NEXT: kshiftrw $15, %k1, %k1
490; AVX512F-NEXT: kmovw %k1, %eax
491; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
492; AVX512F-NEXT: kshiftlw $1, %k0, %k1
493; AVX512F-NEXT: kshiftrw $15, %k1, %k1
494; AVX512F-NEXT: kmovw %k1, %eax
495; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
496; AVX512F-NEXT: kshiftrw $15, %k0, %k0
497; AVX512F-NEXT: kmovw %k0, %eax
498; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
499; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
500; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
501; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
502; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
503; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
504; AVX512F-NEXT: kshiftlw $14, %k0, %k1
505; AVX512F-NEXT: kshiftrw $15, %k1, %k1
506; AVX512F-NEXT: kmovw %k1, %eax
507; AVX512F-NEXT: kshiftlw $15, %k0, %k1
508; AVX512F-NEXT: kshiftrw $15, %k1, %k1
509; AVX512F-NEXT: kmovw %k1, %ecx
510; AVX512F-NEXT: vmovd %ecx, %xmm1
511; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
512; AVX512F-NEXT: kshiftlw $13, %k0, %k1
513; AVX512F-NEXT: kshiftrw $15, %k1, %k1
514; AVX512F-NEXT: kmovw %k1, %eax
515; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
516; AVX512F-NEXT: kshiftlw $12, %k0, %k1
517; AVX512F-NEXT: kshiftrw $15, %k1, %k1
518; AVX512F-NEXT: kmovw %k1, %eax
519; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
520; AVX512F-NEXT: kshiftlw $11, %k0, %k1
521; AVX512F-NEXT: kshiftrw $15, %k1, %k1
522; AVX512F-NEXT: kmovw %k1, %eax
523; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
524; AVX512F-NEXT: kshiftlw $10, %k0, %k1
525; AVX512F-NEXT: kshiftrw $15, %k1, %k1
526; AVX512F-NEXT: kmovw %k1, %eax
527; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
528; AVX512F-NEXT: kshiftlw $9, %k0, %k1
529; AVX512F-NEXT: kshiftrw $15, %k1, %k1
530; AVX512F-NEXT: kmovw %k1, %eax
531; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
532; AVX512F-NEXT: kshiftlw $8, %k0, %k1
533; AVX512F-NEXT: kshiftrw $15, %k1, %k1
534; AVX512F-NEXT: kmovw %k1, %eax
535; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
536; AVX512F-NEXT: kshiftlw $7, %k0, %k1
537; AVX512F-NEXT: kshiftrw $15, %k1, %k1
538; AVX512F-NEXT: kmovw %k1, %eax
539; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
540; AVX512F-NEXT: kshiftlw $6, %k0, %k1
541; AVX512F-NEXT: kshiftrw $15, %k1, %k1
542; AVX512F-NEXT: kmovw %k1, %eax
543; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
544; AVX512F-NEXT: kshiftlw $5, %k0, %k1
545; AVX512F-NEXT: kshiftrw $15, %k1, %k1
546; AVX512F-NEXT: kmovw %k1, %eax
547; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
548; AVX512F-NEXT: kshiftlw $4, %k0, %k1
549; AVX512F-NEXT: kshiftrw $15, %k1, %k1
550; AVX512F-NEXT: kmovw %k1, %eax
551; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
552; AVX512F-NEXT: kshiftlw $3, %k0, %k1
553; AVX512F-NEXT: kshiftrw $15, %k1, %k1
554; AVX512F-NEXT: kmovw %k1, %eax
555; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
556; AVX512F-NEXT: kshiftlw $2, %k0, %k1
557; AVX512F-NEXT: kshiftrw $15, %k1, %k1
558; AVX512F-NEXT: kmovw %k1, %eax
559; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
560; AVX512F-NEXT: kshiftlw $1, %k0, %k1
561; AVX512F-NEXT: kshiftrw $15, %k1, %k1
562; AVX512F-NEXT: kmovw %k1, %eax
563; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
564; AVX512F-NEXT: kshiftrw $15, %k0, %k0
565; AVX512F-NEXT: kmovw %k0, %eax
566; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
567; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
568; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
569; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
570; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
571; AVX512F-NEXT: kshiftlw $14, %k0, %k1
572; AVX512F-NEXT: kshiftrw $15, %k1, %k1
573; AVX512F-NEXT: kmovw %k1, %eax
574; AVX512F-NEXT: kshiftlw $15, %k0, %k1
575; AVX512F-NEXT: kshiftrw $15, %k1, %k1
576; AVX512F-NEXT: kmovw %k1, %ecx
577; AVX512F-NEXT: vmovd %ecx, %xmm2
578; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
579; AVX512F-NEXT: kshiftlw $13, %k0, %k1
580; AVX512F-NEXT: kshiftrw $15, %k1, %k1
581; AVX512F-NEXT: kmovw %k1, %eax
582; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
583; AVX512F-NEXT: kshiftlw $12, %k0, %k1
584; AVX512F-NEXT: kshiftrw $15, %k1, %k1
585; AVX512F-NEXT: kmovw %k1, %eax
586; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
587; AVX512F-NEXT: kshiftlw $11, %k0, %k1
588; AVX512F-NEXT: kshiftrw $15, %k1, %k1
589; AVX512F-NEXT: kmovw %k1, %eax
590; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
591; AVX512F-NEXT: kshiftlw $10, %k0, %k1
592; AVX512F-NEXT: kshiftrw $15, %k1, %k1
593; AVX512F-NEXT: kmovw %k1, %eax
594; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
595; AVX512F-NEXT: kshiftlw $9, %k0, %k1
596; AVX512F-NEXT: kshiftrw $15, %k1, %k1
597; AVX512F-NEXT: kmovw %k1, %eax
598; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
599; AVX512F-NEXT: kshiftlw $8, %k0, %k1
600; AVX512F-NEXT: kshiftrw $15, %k1, %k1
601; AVX512F-NEXT: kmovw %k1, %eax
602; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
603; AVX512F-NEXT: kshiftlw $7, %k0, %k1
604; AVX512F-NEXT: kshiftrw $15, %k1, %k1
605; AVX512F-NEXT: kmovw %k1, %eax
606; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
607; AVX512F-NEXT: kshiftlw $6, %k0, %k1
608; AVX512F-NEXT: kshiftrw $15, %k1, %k1
609; AVX512F-NEXT: kmovw %k1, %eax
610; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
611; AVX512F-NEXT: kshiftlw $5, %k0, %k1
612; AVX512F-NEXT: kshiftrw $15, %k1, %k1
613; AVX512F-NEXT: kmovw %k1, %eax
614; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
615; AVX512F-NEXT: kshiftlw $4, %k0, %k1
616; AVX512F-NEXT: kshiftrw $15, %k1, %k1
617; AVX512F-NEXT: kmovw %k1, %eax
618; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
619; AVX512F-NEXT: kshiftlw $3, %k0, %k1
620; AVX512F-NEXT: kshiftrw $15, %k1, %k1
621; AVX512F-NEXT: kmovw %k1, %eax
622; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
623; AVX512F-NEXT: kshiftlw $2, %k0, %k1
624; AVX512F-NEXT: kshiftrw $15, %k1, %k1
625; AVX512F-NEXT: kmovw %k1, %eax
626; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
627; AVX512F-NEXT: kshiftlw $1, %k0, %k1
628; AVX512F-NEXT: kshiftrw $15, %k1, %k1
629; AVX512F-NEXT: kmovw %k1, %eax
630; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
631; AVX512F-NEXT: kshiftrw $15, %k0, %k0
632; AVX512F-NEXT: kmovw %k0, %eax
633; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
634; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
635; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
636; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
637; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
638; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
639; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
640; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
641; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
642; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
643; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
644; AVX512F-NEXT: kmovw %k0, (%rsp)
645; AVX512F-NEXT: movl (%rsp), %eax
646; AVX512F-NEXT: movq %rbp, %rsp
647; AVX512F-NEXT: popq %rbp
648; AVX512F-NEXT: vzeroupper
649; AVX512F-NEXT: retq
650;
651; AVX512BW-LABEL: v32i16:
652; AVX512BW: # BB#0:
653; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
654; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
655; AVX512BW-NEXT: kmovd %k0, %eax
656; AVX512BW-NEXT: vzeroupper
657; AVX512BW-NEXT: retq
658 %x0 = icmp sgt <32 x i16> %a, %b
659 %x1 = icmp sgt <32 x i16> %c, %d
660 %y = and <32 x i1> %x0, %x1
661 %res = bitcast <32 x i1> %y to i32
662 ret i32 %res
663}
664
665define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
666; SSE-LABEL: v16i32:
667; SSE: # BB#0:
668; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
669; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
670; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
671; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
672; SSE-NEXT: pcmpgtd %xmm7, %xmm3
673; SSE-NEXT: movdqa {{.*#+}} xmm7 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
674; SSE-NEXT: pshufb %xmm7, %xmm3
675; SSE-NEXT: pcmpgtd %xmm6, %xmm2
676; SSE-NEXT: pshufb %xmm7, %xmm2
677; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
678; SSE-NEXT: psllw $15, %xmm2
679; SSE-NEXT: psraw $15, %xmm2
680; SSE-NEXT: movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
681; SSE-NEXT: pshufb %xmm3, %xmm2
682; SSE-NEXT: pcmpgtd %xmm5, %xmm1
683; SSE-NEXT: pshufb %xmm7, %xmm1
684; SSE-NEXT: pcmpgtd %xmm4, %xmm0
685; SSE-NEXT: pshufb %xmm7, %xmm0
686; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
687; SSE-NEXT: psllw $15, %xmm0
688; SSE-NEXT: psraw $15, %xmm0
689; SSE-NEXT: pshufb %xmm3, %xmm0
690; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
691; SSE-NEXT: psllw $7, %xmm0
692; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
693; SSE-NEXT: pand %xmm2, %xmm0
694; SSE-NEXT: pxor %xmm1, %xmm1
695; SSE-NEXT: pxor %xmm4, %xmm4
696; SSE-NEXT: pcmpgtb %xmm0, %xmm4
697; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
698; SSE-NEXT: pshufb %xmm7, %xmm11
699; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
700; SSE-NEXT: pshufb %xmm7, %xmm9
701; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
702; SSE-NEXT: psllw $15, %xmm9
703; SSE-NEXT: psraw $15, %xmm9
704; SSE-NEXT: pshufb %xmm3, %xmm9
705; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
706; SSE-NEXT: pshufb %xmm7, %xmm10
707; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
708; SSE-NEXT: pshufb %xmm7, %xmm8
709; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
710; SSE-NEXT: psllw $15, %xmm8
711; SSE-NEXT: psraw $15, %xmm8
712; SSE-NEXT: pshufb %xmm3, %xmm8
713; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
714; SSE-NEXT: psllw $7, %xmm8
715; SSE-NEXT: pand %xmm2, %xmm8
716; SSE-NEXT: pcmpgtb %xmm8, %xmm1
717; SSE-NEXT: pand %xmm4, %xmm1
718; SSE-NEXT: pmovmskb %xmm1, %eax
719; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
720; SSE-NEXT: retq
721;
722; AVX1-LABEL: v16i32:
723; AVX1: # BB#0:
724; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
725; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
726; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
727; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
728; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
729; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
730; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
731; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
732; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
733; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
734; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
735; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
736; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
737; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
738; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
739; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
740; AVX1-NEXT: vpand %xmm9, %xmm0, %xmm0
741; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
742; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
743; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm3
744; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
745; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
746; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm3
747; AVX1-NEXT: vpacksswb %xmm1, %xmm3, %xmm1
748; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
749; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm3
750; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
751; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm3
752; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm4
753; AVX1-NEXT: vpacksswb %xmm3, %xmm4, %xmm3
754; AVX1-NEXT: vpshufb %xmm8, %xmm3, %xmm3
755; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
756; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
757; AVX1-NEXT: vpand %xmm9, %xmm1, %xmm1
758; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
759; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
760; AVX1-NEXT: vpmovmskb %xmm0, %eax
761; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
762; AVX1-NEXT: vzeroupper
763; AVX1-NEXT: retq
764;
765; AVX2-LABEL: v16i32:
766; AVX2: # BB#0:
767; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
768; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
769; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
770; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
771; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
772; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
773; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
774; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
775; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
776; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
777; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
778; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
779; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
780; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
781; AVX2-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
782; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm5
783; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm7
784; AVX2-NEXT: vpacksswb %xmm7, %xmm5, %xmm5
785; AVX2-NEXT: vpshufb %xmm3, %xmm5, %xmm5
786; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm4
787; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm6
788; AVX2-NEXT: vpacksswb %xmm6, %xmm4, %xmm4
789; AVX2-NEXT: vpshufb %xmm3, %xmm4, %xmm3
790; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
791; AVX2-NEXT: vpsllw $7, %xmm3, %xmm3
792; AVX2-NEXT: vpand %xmm1, %xmm3, %xmm1
793; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
794; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
795; AVX2-NEXT: vpmovmskb %xmm0, %eax
796; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
797; AVX2-NEXT: vzeroupper
798; AVX2-NEXT: retq
799;
800; AVX512F-LABEL: v16i32:
801; AVX512F: # BB#0:
802; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
803; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
804; AVX512F-NEXT: kmovw %k0, %eax
805; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
806; AVX512F-NEXT: vzeroupper
807; AVX512F-NEXT: retq
808;
809; AVX512BW-LABEL: v16i32:
810; AVX512BW: # BB#0:
811; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
812; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
813; AVX512BW-NEXT: kmovd %k0, %eax
814; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
815; AVX512BW-NEXT: vzeroupper
816; AVX512BW-NEXT: retq
817 %x0 = icmp sgt <16 x i32> %a, %b
818 %x1 = icmp sgt <16 x i32> %c, %d
819 %y = and <16 x i1> %x0, %x1
820 %res = bitcast <16 x i1> %y to i16
821 ret i16 %res
822}
823
824define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
825; SSE-LABEL: v16f32:
826; SSE: # BB#0:
827; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
828; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
829; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
830; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
831; SSE-NEXT: cmpltps %xmm3, %xmm7
832; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
833; SSE-NEXT: pshufb %xmm3, %xmm7
834; SSE-NEXT: cmpltps %xmm2, %xmm6
835; SSE-NEXT: pshufb %xmm3, %xmm6
836; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
837; SSE-NEXT: psllw $15, %xmm6
838; SSE-NEXT: psraw $15, %xmm6
839; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
840; SSE-NEXT: pshufb %xmm2, %xmm6
841; SSE-NEXT: cmpltps %xmm1, %xmm5
842; SSE-NEXT: pshufb %xmm3, %xmm5
843; SSE-NEXT: cmpltps %xmm0, %xmm4
844; SSE-NEXT: pshufb %xmm3, %xmm4
845; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
846; SSE-NEXT: psllw $15, %xmm4
847; SSE-NEXT: psraw $15, %xmm4
848; SSE-NEXT: pshufb %xmm2, %xmm4
849; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
850; SSE-NEXT: psllw $7, %xmm4
851; SSE-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
852; SSE-NEXT: pand %xmm1, %xmm4
853; SSE-NEXT: xorps %xmm0, %xmm0
854; SSE-NEXT: pxor %xmm5, %xmm5
855; SSE-NEXT: pcmpgtb %xmm4, %xmm5
856; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
857; SSE-NEXT: pshufb %xmm3, %xmm11
858; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
859; SSE-NEXT: pshufb %xmm3, %xmm9
860; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
861; SSE-NEXT: psllw $15, %xmm9
862; SSE-NEXT: psraw $15, %xmm9
863; SSE-NEXT: pshufb %xmm2, %xmm9
864; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
865; SSE-NEXT: pshufb %xmm3, %xmm10
866; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
867; SSE-NEXT: pshufb %xmm3, %xmm8
868; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
869; SSE-NEXT: psllw $15, %xmm8
870; SSE-NEXT: psraw $15, %xmm8
871; SSE-NEXT: pshufb %xmm2, %xmm8
872; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
873; SSE-NEXT: psllw $7, %xmm8
874; SSE-NEXT: pand %xmm1, %xmm8
875; SSE-NEXT: pcmpgtb %xmm8, %xmm0
876; SSE-NEXT: pand %xmm5, %xmm0
877; SSE-NEXT: pmovmskb %xmm0, %eax
878; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
879; SSE-NEXT: retq
880;
881; AVX12-LABEL: v16f32:
882; AVX12: # BB#0:
883; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
884; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
885; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
886; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
887; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
888; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
889; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
890; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
891; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
892; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
893; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0
894; AVX12-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
895; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
896; AVX12-NEXT: vpxor %xmm2, %xmm2, %xmm2
897; AVX12-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
898; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm5
899; AVX12-NEXT: vextractf128 $1, %ymm5, %xmm7
900; AVX12-NEXT: vpacksswb %xmm7, %xmm5, %xmm5
901; AVX12-NEXT: vpshufb %xmm3, %xmm5, %xmm5
902; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm4
903; AVX12-NEXT: vextractf128 $1, %ymm4, %xmm6
904; AVX12-NEXT: vpacksswb %xmm6, %xmm4, %xmm4
905; AVX12-NEXT: vpshufb %xmm3, %xmm4, %xmm3
906; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
907; AVX12-NEXT: vpsllw $7, %xmm3, %xmm3
908; AVX12-NEXT: vpand %xmm1, %xmm3, %xmm1
909; AVX12-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
910; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
911; AVX12-NEXT: vpmovmskb %xmm0, %eax
912; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
913; AVX12-NEXT: vzeroupper
914; AVX12-NEXT: retq
915;
916; AVX512F-LABEL: v16f32:
917; AVX512F: # BB#0:
918; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
919; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
920; AVX512F-NEXT: kmovw %k0, %eax
921; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
922; AVX512F-NEXT: vzeroupper
923; AVX512F-NEXT: retq
924;
925; AVX512BW-LABEL: v16f32:
926; AVX512BW: # BB#0:
927; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
928; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
929; AVX512BW-NEXT: kmovd %k0, %eax
930; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
931; AVX512BW-NEXT: vzeroupper
932; AVX512BW-NEXT: retq
933 %x0 = fcmp ogt <16 x float> %a, %b
934 %x1 = fcmp ogt <16 x float> %c, %d
935 %y = and <16 x i1> %x0, %x1
936 %res = bitcast <16 x i1> %y to i16
937 ret i16 %res
938}
939
940define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
941; SSE-LABEL: v64i8:
942; SSE: # BB#0:
943; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
944; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
945; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
946; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
947; SSE-NEXT: pcmpgtb %xmm6, %xmm2
948; SSE-NEXT: pcmpgtb %xmm7, %xmm3
949; SSE-NEXT: pcmpgtb %xmm4, %xmm0
950; SSE-NEXT: pcmpgtb %xmm5, %xmm1
951; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
952; SSE-NEXT: pand %xmm2, %xmm8
953; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
954; SSE-NEXT: pand %xmm3, %xmm9
955; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
956; SSE-NEXT: pand %xmm0, %xmm10
957; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
958; SSE-NEXT: pand %xmm1, %xmm11
959; SSE-NEXT: pextrb $15, %xmm11, %eax
960; SSE-NEXT: andb $1, %al
961; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
962; SSE-NEXT: pextrb $14, %xmm11, %eax
963; SSE-NEXT: andb $1, %al
964; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
965; SSE-NEXT: pextrb $13, %xmm11, %eax
966; SSE-NEXT: andb $1, %al
967; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
968; SSE-NEXT: pextrb $12, %xmm11, %eax
969; SSE-NEXT: andb $1, %al
970; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
971; SSE-NEXT: pextrb $11, %xmm11, %eax
972; SSE-NEXT: andb $1, %al
973; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
974; SSE-NEXT: pextrb $10, %xmm11, %eax
975; SSE-NEXT: andb $1, %al
976; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
977; SSE-NEXT: pextrb $9, %xmm11, %eax
978; SSE-NEXT: andb $1, %al
979; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
980; SSE-NEXT: pextrb $8, %xmm11, %eax
981; SSE-NEXT: andb $1, %al
982; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
983; SSE-NEXT: pextrb $7, %xmm11, %eax
984; SSE-NEXT: andb $1, %al
985; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
986; SSE-NEXT: pextrb $6, %xmm11, %eax
987; SSE-NEXT: andb $1, %al
988; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
989; SSE-NEXT: pextrb $5, %xmm11, %eax
990; SSE-NEXT: andb $1, %al
991; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
992; SSE-NEXT: pextrb $4, %xmm11, %eax
993; SSE-NEXT: andb $1, %al
994; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
995; SSE-NEXT: pextrb $3, %xmm11, %eax
996; SSE-NEXT: andb $1, %al
997; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
998; SSE-NEXT: pextrb $2, %xmm11, %eax
999; SSE-NEXT: andb $1, %al
1000; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1001; SSE-NEXT: pextrb $1, %xmm11, %eax
1002; SSE-NEXT: andb $1, %al
1003; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1004; SSE-NEXT: pextrb $0, %xmm11, %eax
1005; SSE-NEXT: andb $1, %al
1006; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1007; SSE-NEXT: pextrb $15, %xmm10, %eax
1008; SSE-NEXT: andb $1, %al
1009; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1010; SSE-NEXT: pextrb $14, %xmm10, %eax
1011; SSE-NEXT: andb $1, %al
1012; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1013; SSE-NEXT: pextrb $13, %xmm10, %eax
1014; SSE-NEXT: andb $1, %al
1015; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1016; SSE-NEXT: pextrb $12, %xmm10, %eax
1017; SSE-NEXT: andb $1, %al
1018; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1019; SSE-NEXT: pextrb $11, %xmm10, %eax
1020; SSE-NEXT: andb $1, %al
1021; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1022; SSE-NEXT: pextrb $10, %xmm10, %eax
1023; SSE-NEXT: andb $1, %al
1024; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1025; SSE-NEXT: pextrb $9, %xmm10, %eax
1026; SSE-NEXT: andb $1, %al
1027; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1028; SSE-NEXT: pextrb $8, %xmm10, %eax
1029; SSE-NEXT: andb $1, %al
1030; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1031; SSE-NEXT: pextrb $7, %xmm10, %eax
1032; SSE-NEXT: andb $1, %al
1033; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1034; SSE-NEXT: pextrb $6, %xmm10, %eax
1035; SSE-NEXT: andb $1, %al
1036; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1037; SSE-NEXT: pextrb $5, %xmm10, %eax
1038; SSE-NEXT: andb $1, %al
1039; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1040; SSE-NEXT: pextrb $4, %xmm10, %eax
1041; SSE-NEXT: andb $1, %al
1042; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1043; SSE-NEXT: pextrb $3, %xmm10, %eax
1044; SSE-NEXT: andb $1, %al
1045; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1046; SSE-NEXT: pextrb $2, %xmm10, %eax
1047; SSE-NEXT: andb $1, %al
1048; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1049; SSE-NEXT: pextrb $1, %xmm10, %eax
1050; SSE-NEXT: andb $1, %al
1051; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1052; SSE-NEXT: pextrb $0, %xmm10, %eax
1053; SSE-NEXT: andb $1, %al
1054; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1055; SSE-NEXT: pextrb $15, %xmm9, %eax
1056; SSE-NEXT: andb $1, %al
1057; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1058; SSE-NEXT: pextrb $14, %xmm9, %eax
1059; SSE-NEXT: andb $1, %al
1060; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1061; SSE-NEXT: pextrb $13, %xmm9, %eax
1062; SSE-NEXT: andb $1, %al
1063; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1064; SSE-NEXT: pextrb $12, %xmm9, %eax
1065; SSE-NEXT: andb $1, %al
1066; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1067; SSE-NEXT: pextrb $11, %xmm9, %eax
1068; SSE-NEXT: andb $1, %al
1069; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1070; SSE-NEXT: pextrb $10, %xmm9, %eax
1071; SSE-NEXT: andb $1, %al
1072; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1073; SSE-NEXT: pextrb $9, %xmm9, %eax
1074; SSE-NEXT: andb $1, %al
1075; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1076; SSE-NEXT: pextrb $8, %xmm9, %eax
1077; SSE-NEXT: andb $1, %al
1078; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1079; SSE-NEXT: pextrb $7, %xmm9, %eax
1080; SSE-NEXT: andb $1, %al
1081; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1082; SSE-NEXT: pextrb $6, %xmm9, %eax
1083; SSE-NEXT: andb $1, %al
1084; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1085; SSE-NEXT: pextrb $5, %xmm9, %eax
1086; SSE-NEXT: andb $1, %al
1087; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1088; SSE-NEXT: pextrb $4, %xmm9, %eax
1089; SSE-NEXT: andb $1, %al
1090; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1091; SSE-NEXT: pextrb $3, %xmm9, %eax
1092; SSE-NEXT: andb $1, %al
1093; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1094; SSE-NEXT: pextrb $2, %xmm9, %eax
1095; SSE-NEXT: andb $1, %al
1096; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1097; SSE-NEXT: pextrb $1, %xmm9, %eax
1098; SSE-NEXT: andb $1, %al
1099; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1100; SSE-NEXT: pextrb $0, %xmm9, %eax
1101; SSE-NEXT: andb $1, %al
1102; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1103; SSE-NEXT: pextrb $15, %xmm8, %eax
1104; SSE-NEXT: andb $1, %al
1105; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1106; SSE-NEXT: pextrb $14, %xmm8, %eax
1107; SSE-NEXT: andb $1, %al
1108; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1109; SSE-NEXT: pextrb $13, %xmm8, %eax
1110; SSE-NEXT: andb $1, %al
1111; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1112; SSE-NEXT: pextrb $12, %xmm8, %eax
1113; SSE-NEXT: andb $1, %al
1114; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1115; SSE-NEXT: pextrb $11, %xmm8, %eax
1116; SSE-NEXT: andb $1, %al
1117; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1118; SSE-NEXT: pextrb $10, %xmm8, %eax
1119; SSE-NEXT: andb $1, %al
1120; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1121; SSE-NEXT: pextrb $9, %xmm8, %eax
1122; SSE-NEXT: andb $1, %al
1123; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1124; SSE-NEXT: pextrb $8, %xmm8, %eax
1125; SSE-NEXT: andb $1, %al
1126; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1127; SSE-NEXT: pextrb $7, %xmm8, %eax
1128; SSE-NEXT: andb $1, %al
1129; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1130; SSE-NEXT: pextrb $6, %xmm8, %eax
1131; SSE-NEXT: andb $1, %al
1132; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1133; SSE-NEXT: pextrb $5, %xmm8, %eax
1134; SSE-NEXT: andb $1, %al
1135; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1136; SSE-NEXT: pextrb $4, %xmm8, %eax
1137; SSE-NEXT: andb $1, %al
1138; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1139; SSE-NEXT: pextrb $3, %xmm8, %eax
1140; SSE-NEXT: andb $1, %al
1141; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1142; SSE-NEXT: pextrb $2, %xmm8, %eax
1143; SSE-NEXT: andb $1, %al
1144; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1145; SSE-NEXT: pextrb $1, %xmm8, %eax
1146; SSE-NEXT: andb $1, %al
1147; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1148; SSE-NEXT: pextrb $0, %xmm8, %eax
1149; SSE-NEXT: andb $1, %al
1150; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1151; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1152; SSE-NEXT: shll $16, %eax
1153; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1154; SSE-NEXT: orl %eax, %ecx
1155; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1156; SSE-NEXT: shll $16, %edx
1157; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1158; SSE-NEXT: orl %edx, %eax
1159; SSE-NEXT: shlq $32, %rax
1160; SSE-NEXT: orq %rcx, %rax
1161; SSE-NEXT: retq
1162;
1163; AVX1-LABEL: v64i8:
1164; AVX1: # BB#0:
1165; AVX1-NEXT: pushq %rbp
1166; AVX1-NEXT: .Lcfi0:
1167; AVX1-NEXT: .cfi_def_cfa_offset 16
1168; AVX1-NEXT: .Lcfi1:
1169; AVX1-NEXT: .cfi_offset %rbp, -16
1170; AVX1-NEXT: movq %rsp, %rbp
1171; AVX1-NEXT: .Lcfi2:
1172; AVX1-NEXT: .cfi_def_cfa_register %rbp
1173; AVX1-NEXT: andq $-32, %rsp
1174; AVX1-NEXT: subq $64, %rsp
1175; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1176; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1177; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1178; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1179; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1180; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1181; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1182; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1183; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1184; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1185; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1186; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1187; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1188; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1189; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1190; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1191; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1192; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1193; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1194; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1195; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1196; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1197; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1198; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1199; AVX1-NEXT: andb $1, %al
1200; AVX1-NEXT: movb %al, (%rsp)
1201; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1202; AVX1-NEXT: andb $1, %al
1203; AVX1-NEXT: movb %al, (%rsp)
1204; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1205; AVX1-NEXT: andb $1, %al
1206; AVX1-NEXT: movb %al, (%rsp)
1207; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1208; AVX1-NEXT: andb $1, %al
1209; AVX1-NEXT: movb %al, (%rsp)
1210; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1211; AVX1-NEXT: andb $1, %al
1212; AVX1-NEXT: movb %al, (%rsp)
1213; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1214; AVX1-NEXT: andb $1, %al
1215; AVX1-NEXT: movb %al, (%rsp)
1216; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1217; AVX1-NEXT: andb $1, %al
1218; AVX1-NEXT: movb %al, (%rsp)
1219; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1220; AVX1-NEXT: andb $1, %al
1221; AVX1-NEXT: movb %al, (%rsp)
1222; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1223; AVX1-NEXT: andb $1, %al
1224; AVX1-NEXT: movb %al, (%rsp)
1225; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1226; AVX1-NEXT: andb $1, %al
1227; AVX1-NEXT: movb %al, (%rsp)
1228; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1229; AVX1-NEXT: andb $1, %al
1230; AVX1-NEXT: movb %al, (%rsp)
1231; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1232; AVX1-NEXT: andb $1, %al
1233; AVX1-NEXT: movb %al, (%rsp)
1234; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1235; AVX1-NEXT: andb $1, %al
1236; AVX1-NEXT: movb %al, (%rsp)
1237; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1238; AVX1-NEXT: andb $1, %al
1239; AVX1-NEXT: movb %al, (%rsp)
1240; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1241; AVX1-NEXT: andb $1, %al
1242; AVX1-NEXT: movb %al, (%rsp)
1243; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1244; AVX1-NEXT: andb $1, %al
1245; AVX1-NEXT: movb %al, (%rsp)
1246; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1247; AVX1-NEXT: andb $1, %al
1248; AVX1-NEXT: movb %al, (%rsp)
1249; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1250; AVX1-NEXT: andb $1, %al
1251; AVX1-NEXT: movb %al, (%rsp)
1252; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1253; AVX1-NEXT: andb $1, %al
1254; AVX1-NEXT: movb %al, (%rsp)
1255; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1256; AVX1-NEXT: andb $1, %al
1257; AVX1-NEXT: movb %al, (%rsp)
1258; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1259; AVX1-NEXT: andb $1, %al
1260; AVX1-NEXT: movb %al, (%rsp)
1261; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1262; AVX1-NEXT: andb $1, %al
1263; AVX1-NEXT: movb %al, (%rsp)
1264; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1265; AVX1-NEXT: andb $1, %al
1266; AVX1-NEXT: movb %al, (%rsp)
1267; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1268; AVX1-NEXT: andb $1, %al
1269; AVX1-NEXT: movb %al, (%rsp)
1270; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1271; AVX1-NEXT: andb $1, %al
1272; AVX1-NEXT: movb %al, (%rsp)
1273; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1274; AVX1-NEXT: andb $1, %al
1275; AVX1-NEXT: movb %al, (%rsp)
1276; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1277; AVX1-NEXT: andb $1, %al
1278; AVX1-NEXT: movb %al, (%rsp)
1279; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1280; AVX1-NEXT: andb $1, %al
1281; AVX1-NEXT: movb %al, (%rsp)
1282; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1283; AVX1-NEXT: andb $1, %al
1284; AVX1-NEXT: movb %al, (%rsp)
1285; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1286; AVX1-NEXT: andb $1, %al
1287; AVX1-NEXT: movb %al, (%rsp)
1288; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1289; AVX1-NEXT: andb $1, %al
1290; AVX1-NEXT: movb %al, (%rsp)
1291; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1292; AVX1-NEXT: andb $1, %al
1293; AVX1-NEXT: movb %al, (%rsp)
1294; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1295; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1296; AVX1-NEXT: andb $1, %al
1297; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1298; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1299; AVX1-NEXT: andb $1, %al
1300; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1301; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1302; AVX1-NEXT: andb $1, %al
1303; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1304; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1305; AVX1-NEXT: andb $1, %al
1306; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1307; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1308; AVX1-NEXT: andb $1, %al
1309; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1310; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1311; AVX1-NEXT: andb $1, %al
1312; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1313; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1314; AVX1-NEXT: andb $1, %al
1315; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1316; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1317; AVX1-NEXT: andb $1, %al
1318; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1319; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1320; AVX1-NEXT: andb $1, %al
1321; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1322; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1323; AVX1-NEXT: andb $1, %al
1324; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1325; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1326; AVX1-NEXT: andb $1, %al
1327; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1328; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1329; AVX1-NEXT: andb $1, %al
1330; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1331; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1332; AVX1-NEXT: andb $1, %al
1333; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1334; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1335; AVX1-NEXT: andb $1, %al
1336; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1337; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1338; AVX1-NEXT: andb $1, %al
1339; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1340; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1341; AVX1-NEXT: andb $1, %al
1342; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1343; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1344; AVX1-NEXT: andb $1, %al
1345; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1346; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1347; AVX1-NEXT: andb $1, %al
1348; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1349; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1350; AVX1-NEXT: andb $1, %al
1351; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1352; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1353; AVX1-NEXT: andb $1, %al
1354; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1355; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1356; AVX1-NEXT: andb $1, %al
1357; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1358; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1359; AVX1-NEXT: andb $1, %al
1360; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1361; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1362; AVX1-NEXT: andb $1, %al
1363; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1364; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1365; AVX1-NEXT: andb $1, %al
1366; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1367; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1368; AVX1-NEXT: andb $1, %al
1369; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1370; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1371; AVX1-NEXT: andb $1, %al
1372; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1373; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1374; AVX1-NEXT: andb $1, %al
1375; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1376; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1377; AVX1-NEXT: andb $1, %al
1378; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1379; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1380; AVX1-NEXT: andb $1, %al
1381; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1382; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1383; AVX1-NEXT: andb $1, %al
1384; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1385; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1386; AVX1-NEXT: andb $1, %al
1387; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1388; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1389; AVX1-NEXT: andb $1, %al
1390; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1391; AVX1-NEXT: movl (%rsp), %ecx
1392; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1393; AVX1-NEXT: shlq $32, %rax
1394; AVX1-NEXT: orq %rcx, %rax
1395; AVX1-NEXT: movq %rbp, %rsp
1396; AVX1-NEXT: popq %rbp
1397; AVX1-NEXT: vzeroupper
1398; AVX1-NEXT: retq
1399;
1400; AVX2-LABEL: v64i8:
1401; AVX2: # BB#0:
1402; AVX2-NEXT: pushq %rbp
1403; AVX2-NEXT: .Lcfi0:
1404; AVX2-NEXT: .cfi_def_cfa_offset 16
1405; AVX2-NEXT: .Lcfi1:
1406; AVX2-NEXT: .cfi_offset %rbp, -16
1407; AVX2-NEXT: movq %rsp, %rbp
1408; AVX2-NEXT: .Lcfi2:
1409; AVX2-NEXT: .cfi_def_cfa_register %rbp
1410; AVX2-NEXT: andq $-32, %rsp
1411; AVX2-NEXT: subq $64, %rsp
1412; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1413; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1414; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1415; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1416; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1417; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1418; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1419; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1420; AVX2-NEXT: andb $1, %al
1421; AVX2-NEXT: movb %al, (%rsp)
1422; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1423; AVX2-NEXT: andb $1, %al
1424; AVX2-NEXT: movb %al, (%rsp)
1425; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1426; AVX2-NEXT: andb $1, %al
1427; AVX2-NEXT: movb %al, (%rsp)
1428; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1429; AVX2-NEXT: andb $1, %al
1430; AVX2-NEXT: movb %al, (%rsp)
1431; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1432; AVX2-NEXT: andb $1, %al
1433; AVX2-NEXT: movb %al, (%rsp)
1434; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1435; AVX2-NEXT: andb $1, %al
1436; AVX2-NEXT: movb %al, (%rsp)
1437; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1438; AVX2-NEXT: andb $1, %al
1439; AVX2-NEXT: movb %al, (%rsp)
1440; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1441; AVX2-NEXT: andb $1, %al
1442; AVX2-NEXT: movb %al, (%rsp)
1443; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1444; AVX2-NEXT: andb $1, %al
1445; AVX2-NEXT: movb %al, (%rsp)
1446; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1447; AVX2-NEXT: andb $1, %al
1448; AVX2-NEXT: movb %al, (%rsp)
1449; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1450; AVX2-NEXT: andb $1, %al
1451; AVX2-NEXT: movb %al, (%rsp)
1452; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1453; AVX2-NEXT: andb $1, %al
1454; AVX2-NEXT: movb %al, (%rsp)
1455; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1456; AVX2-NEXT: andb $1, %al
1457; AVX2-NEXT: movb %al, (%rsp)
1458; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1459; AVX2-NEXT: andb $1, %al
1460; AVX2-NEXT: movb %al, (%rsp)
1461; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1462; AVX2-NEXT: andb $1, %al
1463; AVX2-NEXT: movb %al, (%rsp)
1464; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1465; AVX2-NEXT: andb $1, %al
1466; AVX2-NEXT: movb %al, (%rsp)
1467; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1468; AVX2-NEXT: andb $1, %al
1469; AVX2-NEXT: movb %al, (%rsp)
1470; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1471; AVX2-NEXT: andb $1, %al
1472; AVX2-NEXT: movb %al, (%rsp)
1473; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1474; AVX2-NEXT: andb $1, %al
1475; AVX2-NEXT: movb %al, (%rsp)
1476; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1477; AVX2-NEXT: andb $1, %al
1478; AVX2-NEXT: movb %al, (%rsp)
1479; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1480; AVX2-NEXT: andb $1, %al
1481; AVX2-NEXT: movb %al, (%rsp)
1482; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1483; AVX2-NEXT: andb $1, %al
1484; AVX2-NEXT: movb %al, (%rsp)
1485; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1486; AVX2-NEXT: andb $1, %al
1487; AVX2-NEXT: movb %al, (%rsp)
1488; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1489; AVX2-NEXT: andb $1, %al
1490; AVX2-NEXT: movb %al, (%rsp)
1491; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1492; AVX2-NEXT: andb $1, %al
1493; AVX2-NEXT: movb %al, (%rsp)
1494; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1495; AVX2-NEXT: andb $1, %al
1496; AVX2-NEXT: movb %al, (%rsp)
1497; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1498; AVX2-NEXT: andb $1, %al
1499; AVX2-NEXT: movb %al, (%rsp)
1500; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1501; AVX2-NEXT: andb $1, %al
1502; AVX2-NEXT: movb %al, (%rsp)
1503; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1504; AVX2-NEXT: andb $1, %al
1505; AVX2-NEXT: movb %al, (%rsp)
1506; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1507; AVX2-NEXT: andb $1, %al
1508; AVX2-NEXT: movb %al, (%rsp)
1509; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1510; AVX2-NEXT: andb $1, %al
1511; AVX2-NEXT: movb %al, (%rsp)
1512; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1513; AVX2-NEXT: andb $1, %al
1514; AVX2-NEXT: movb %al, (%rsp)
1515; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1516; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1517; AVX2-NEXT: andb $1, %al
1518; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1519; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1520; AVX2-NEXT: andb $1, %al
1521; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1522; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1523; AVX2-NEXT: andb $1, %al
1524; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1525; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1526; AVX2-NEXT: andb $1, %al
1527; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1528; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1529; AVX2-NEXT: andb $1, %al
1530; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1531; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1532; AVX2-NEXT: andb $1, %al
1533; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1534; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1535; AVX2-NEXT: andb $1, %al
1536; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1537; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1538; AVX2-NEXT: andb $1, %al
1539; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1540; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1541; AVX2-NEXT: andb $1, %al
1542; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1543; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1544; AVX2-NEXT: andb $1, %al
1545; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1546; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1547; AVX2-NEXT: andb $1, %al
1548; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1549; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1550; AVX2-NEXT: andb $1, %al
1551; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1552; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1553; AVX2-NEXT: andb $1, %al
1554; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1555; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1556; AVX2-NEXT: andb $1, %al
1557; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1558; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1559; AVX2-NEXT: andb $1, %al
1560; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1561; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1562; AVX2-NEXT: andb $1, %al
1563; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1564; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1565; AVX2-NEXT: andb $1, %al
1566; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1567; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1568; AVX2-NEXT: andb $1, %al
1569; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1570; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1571; AVX2-NEXT: andb $1, %al
1572; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1573; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1574; AVX2-NEXT: andb $1, %al
1575; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1576; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1577; AVX2-NEXT: andb $1, %al
1578; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1579; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1580; AVX2-NEXT: andb $1, %al
1581; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1582; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1583; AVX2-NEXT: andb $1, %al
1584; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1585; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1586; AVX2-NEXT: andb $1, %al
1587; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1588; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1589; AVX2-NEXT: andb $1, %al
1590; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1591; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1592; AVX2-NEXT: andb $1, %al
1593; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1594; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1595; AVX2-NEXT: andb $1, %al
1596; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1597; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1598; AVX2-NEXT: andb $1, %al
1599; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1600; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1601; AVX2-NEXT: andb $1, %al
1602; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1603; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1604; AVX2-NEXT: andb $1, %al
1605; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1606; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1607; AVX2-NEXT: andb $1, %al
1608; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1609; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1610; AVX2-NEXT: andb $1, %al
1611; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1612; AVX2-NEXT: movl (%rsp), %ecx
1613; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1614; AVX2-NEXT: shlq $32, %rax
1615; AVX2-NEXT: orq %rcx, %rax
1616; AVX2-NEXT: movq %rbp, %rsp
1617; AVX2-NEXT: popq %rbp
1618; AVX2-NEXT: vzeroupper
1619; AVX2-NEXT: retq
1620;
1621; AVX512F-LABEL: v64i8:
1622; AVX512F: # BB#0:
1623; AVX512F-NEXT: pushq %rbp
1624; AVX512F-NEXT: .Lcfi3:
1625; AVX512F-NEXT: .cfi_def_cfa_offset 16
1626; AVX512F-NEXT: .Lcfi4:
1627; AVX512F-NEXT: .cfi_offset %rbp, -16
1628; AVX512F-NEXT: movq %rsp, %rbp
1629; AVX512F-NEXT: .Lcfi5:
1630; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1631; AVX512F-NEXT: andq $-32, %rsp
1632; AVX512F-NEXT: subq $64, %rsp
1633; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1634; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1635; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1636; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1637; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1638; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1639; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1640; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1641; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1642; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1643; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1644; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1645; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1646; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1647; AVX512F-NEXT: kmovw %k0, (%rsp)
1648; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1649; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1650; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1651; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1652; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1653; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1654; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1655; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1656; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1657; AVX512F-NEXT: movl (%rsp), %ecx
1658; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1659; AVX512F-NEXT: shlq $32, %rax
1660; AVX512F-NEXT: orq %rcx, %rax
1661; AVX512F-NEXT: movq %rbp, %rsp
1662; AVX512F-NEXT: popq %rbp
1663; AVX512F-NEXT: vzeroupper
1664; AVX512F-NEXT: retq
1665;
1666; AVX512BW-LABEL: v64i8:
1667; AVX512BW: # BB#0:
1668; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1669; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1670; AVX512BW-NEXT: kmovq %k0, %rax
1671; AVX512BW-NEXT: vzeroupper
1672; AVX512BW-NEXT: retq
1673 %x0 = icmp sgt <64 x i8> %a, %b
1674 %x1 = icmp sgt <64 x i8> %c, %d
1675 %y = and <64 x i1> %x0, %x1
1676 %res = bitcast <64 x i1> %y to i64
1677 ret i64 %res
1678}