blob: 0f65a1ebee1a539136fe9e86b5d8041f1cf50f6c [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
15; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000018; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +000019; SSE-NEXT: pshufb %xmm3, %xmm2
20; SSE-NEXT: pcmpgtq %xmm5, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000021; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
22; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000023; SSE-NEXT: pcmpgtq %xmm4, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000024; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
25; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
26; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
27; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000028; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +000029; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000030; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +000031; SSE-NEXT: pshufb %xmm3, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000032; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
33; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm10[0,2,2,3]
34; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
35; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
36; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm9[0,2,2,3]
37; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
38; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
39; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
40; SSE-NEXT: pand %xmm0, %xmm2
41; SSE-NEXT: psllw $15, %xmm2
42; SSE-NEXT: psraw $15, %xmm2
43; SSE-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
44; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000045; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
46; SSE-NEXT: retq
47;
48; AVX1-LABEL: v8i64:
49; AVX1: # BB#0:
50; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
51; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
52; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
53; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
54; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
55; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
56; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
57; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
58; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
59; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
60; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
61; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
62; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
63; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000064; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
65; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
66; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
67; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
68; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
69; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
70; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
71; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
72; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
73; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
74; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
75; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
76; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000077; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000078; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
79; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000080; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
81; AVX1-NEXT: vpmovmskb %xmm0, %eax
82; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
83; AVX1-NEXT: vzeroupper
84; AVX1-NEXT: retq
85;
86; AVX2-LABEL: v8i64:
87; AVX2: # BB#0:
88; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
89; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
90; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
91; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
92; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
93; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
94; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
95; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
96; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
97; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000098; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
99; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
100; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
101; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
102; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
103; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
104; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
105; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
106; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000107; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000108; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
109; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000110; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
111; AVX2-NEXT: vpmovmskb %xmm0, %eax
112; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
113; AVX2-NEXT: vzeroupper
114; AVX2-NEXT: retq
115;
116; AVX512F-LABEL: v8i64:
117; AVX512F: # BB#0:
118; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
119; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
120; AVX512F-NEXT: kmovw %k0, %eax
121; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
122; AVX512F-NEXT: vzeroupper
123; AVX512F-NEXT: retq
124;
125; AVX512BW-LABEL: v8i64:
126; AVX512BW: # BB#0:
127; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
128; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
129; AVX512BW-NEXT: kmovd %k0, %eax
130; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
131; AVX512BW-NEXT: vzeroupper
132; AVX512BW-NEXT: retq
133 %x0 = icmp sgt <8 x i64> %a, %b
134 %x1 = icmp sgt <8 x i64> %c, %d
135 %y = and <8 x i1> %x0, %x1
136 %res = bitcast <8 x i1> %y to i8
137 ret i8 %res
138}
139
140define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
141; SSE-LABEL: v8f64:
142; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000143; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000144; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
145; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000146; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
147; SSE-NEXT: cmpltpd %xmm3, %xmm7
148; SSE-NEXT: cmpltpd %xmm2, %xmm6
149; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000150; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000151; SSE-NEXT: pshufb %xmm2, %xmm6
152; SSE-NEXT: cmpltpd %xmm1, %xmm5
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000153; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
154; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000155; SSE-NEXT: cmpltpd %xmm0, %xmm4
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000156; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
157; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
158; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
159; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000161; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000162; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000163; SSE-NEXT: pshufb %xmm2, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000164; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
165; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
166; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
167; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
168; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
169; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[0,2,2,3,4,5,6,7]
170; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
171; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
172; SSE-NEXT: pand %xmm0, %xmm2
173; SSE-NEXT: psllw $15, %xmm2
174; SSE-NEXT: psraw $15, %xmm2
175; SSE-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
176; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000177; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
178; SSE-NEXT: retq
179;
180; AVX12-LABEL: v8f64:
181; AVX12: # BB#0:
182; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
183; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
184; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
185; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
186; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
187; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
188; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
189; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
190; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
191; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000192; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
193; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
194; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
195; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
196; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
197; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
198; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
199; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
200; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000201; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000202; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
203; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000204; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
205; AVX12-NEXT: vpmovmskb %xmm0, %eax
206; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
207; AVX12-NEXT: vzeroupper
208; AVX12-NEXT: retq
209;
210; AVX512F-LABEL: v8f64:
211; AVX512F: # BB#0:
212; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
213; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
214; AVX512F-NEXT: kmovw %k0, %eax
215; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
216; AVX512F-NEXT: vzeroupper
217; AVX512F-NEXT: retq
218;
219; AVX512BW-LABEL: v8f64:
220; AVX512BW: # BB#0:
221; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
222; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
223; AVX512BW-NEXT: kmovd %k0, %eax
224; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
225; AVX512BW-NEXT: vzeroupper
226; AVX512BW-NEXT: retq
227 %x0 = fcmp ogt <8 x double> %a, %b
228 %x1 = fcmp ogt <8 x double> %c, %d
229 %y = and <8 x i1> %x0, %x1
230 %res = bitcast <8 x i1> %y to i8
231 ret i8 %res
232}
233
234define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
235; SSE-LABEL: v32i16:
236; SSE: # BB#0:
237; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000238; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000239; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000240; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
241; SSE-NEXT: pcmpgtw %xmm5, %xmm1
242; SSE-NEXT: movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
243; SSE-NEXT: pshufb %xmm5, %xmm1
244; SSE-NEXT: pcmpgtw %xmm4, %xmm0
245; SSE-NEXT: pshufb %xmm5, %xmm0
246; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000247; SSE-NEXT: pcmpgtw %xmm7, %xmm3
248; SSE-NEXT: pshufb %xmm5, %xmm3
249; SSE-NEXT: pcmpgtw %xmm6, %xmm2
250; SSE-NEXT: pshufb %xmm5, %xmm2
251; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000252; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
253; SSE-NEXT: pshufb %xmm5, %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000254; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
255; SSE-NEXT: pshufb %xmm5, %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000256; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
257; SSE-NEXT: pand %xmm0, %xmm9
258; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
259; SSE-NEXT: pshufb %xmm5, %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000260; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
261; SSE-NEXT: pshufb %xmm5, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000262; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
263; SSE-NEXT: pand %xmm2, %xmm8
264; SSE-NEXT: pmovmskb %xmm9, %ecx
265; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000266; SSE-NEXT: shll $16, %eax
267; SSE-NEXT: orl %ecx, %eax
268; SSE-NEXT: retq
269;
270; AVX1-LABEL: v32i16:
271; AVX1: # BB#0:
272; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
273; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
274; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
275; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
276; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
277; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
278; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
279; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
280; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
281; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
282; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
283; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
284; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
285; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
286; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
287; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
288; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
289; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
290; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
291; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
292; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
293; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
294; AVX1-NEXT: vpmovmskb %xmm0, %ecx
295; AVX1-NEXT: vpmovmskb %xmm1, %eax
296; AVX1-NEXT: shll $16, %eax
297; AVX1-NEXT: orl %ecx, %eax
298; AVX1-NEXT: vzeroupper
299; AVX1-NEXT: retq
300;
301; AVX2-LABEL: v32i16:
302; AVX2: # BB#0:
303; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
304; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
305; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
306; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
307; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
308; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
309; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
310; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
311; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
312; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
313; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
314; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
315; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
316; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
317; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
318; AVX2-NEXT: vpmovmskb %ymm0, %eax
319; AVX2-NEXT: vzeroupper
320; AVX2-NEXT: retq
321;
322; AVX512F-LABEL: v32i16:
323; AVX512F: # BB#0:
324; AVX512F-NEXT: pushq %rbp
325; AVX512F-NEXT: .Lcfi0:
326; AVX512F-NEXT: .cfi_def_cfa_offset 16
327; AVX512F-NEXT: .Lcfi1:
328; AVX512F-NEXT: .cfi_offset %rbp, -16
329; AVX512F-NEXT: movq %rsp, %rbp
330; AVX512F-NEXT: .Lcfi2:
331; AVX512F-NEXT: .cfi_def_cfa_register %rbp
332; AVX512F-NEXT: andq $-32, %rsp
333; AVX512F-NEXT: subq $32, %rsp
334; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
335; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
336; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
337; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
338; AVX512F-NEXT: kshiftlw $14, %k0, %k1
339; AVX512F-NEXT: kshiftrw $15, %k1, %k1
340; AVX512F-NEXT: kmovw %k1, %eax
341; AVX512F-NEXT: kshiftlw $15, %k0, %k1
342; AVX512F-NEXT: kshiftrw $15, %k1, %k1
343; AVX512F-NEXT: kmovw %k1, %ecx
344; AVX512F-NEXT: vmovd %ecx, %xmm1
345; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
346; AVX512F-NEXT: kshiftlw $13, %k0, %k1
347; AVX512F-NEXT: kshiftrw $15, %k1, %k1
348; AVX512F-NEXT: kmovw %k1, %eax
349; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
350; AVX512F-NEXT: kshiftlw $12, %k0, %k1
351; AVX512F-NEXT: kshiftrw $15, %k1, %k1
352; AVX512F-NEXT: kmovw %k1, %eax
353; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
354; AVX512F-NEXT: kshiftlw $11, %k0, %k1
355; AVX512F-NEXT: kshiftrw $15, %k1, %k1
356; AVX512F-NEXT: kmovw %k1, %eax
357; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
358; AVX512F-NEXT: kshiftlw $10, %k0, %k1
359; AVX512F-NEXT: kshiftrw $15, %k1, %k1
360; AVX512F-NEXT: kmovw %k1, %eax
361; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
362; AVX512F-NEXT: kshiftlw $9, %k0, %k1
363; AVX512F-NEXT: kshiftrw $15, %k1, %k1
364; AVX512F-NEXT: kmovw %k1, %eax
365; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
366; AVX512F-NEXT: kshiftlw $8, %k0, %k1
367; AVX512F-NEXT: kshiftrw $15, %k1, %k1
368; AVX512F-NEXT: kmovw %k1, %eax
369; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
370; AVX512F-NEXT: kshiftlw $7, %k0, %k1
371; AVX512F-NEXT: kshiftrw $15, %k1, %k1
372; AVX512F-NEXT: kmovw %k1, %eax
373; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
374; AVX512F-NEXT: kshiftlw $6, %k0, %k1
375; AVX512F-NEXT: kshiftrw $15, %k1, %k1
376; AVX512F-NEXT: kmovw %k1, %eax
377; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
378; AVX512F-NEXT: kshiftlw $5, %k0, %k1
379; AVX512F-NEXT: kshiftrw $15, %k1, %k1
380; AVX512F-NEXT: kmovw %k1, %eax
381; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
382; AVX512F-NEXT: kshiftlw $4, %k0, %k1
383; AVX512F-NEXT: kshiftrw $15, %k1, %k1
384; AVX512F-NEXT: kmovw %k1, %eax
385; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
386; AVX512F-NEXT: kshiftlw $3, %k0, %k1
387; AVX512F-NEXT: kshiftrw $15, %k1, %k1
388; AVX512F-NEXT: kmovw %k1, %eax
389; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
390; AVX512F-NEXT: kshiftlw $2, %k0, %k1
391; AVX512F-NEXT: kshiftrw $15, %k1, %k1
392; AVX512F-NEXT: kmovw %k1, %eax
393; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
394; AVX512F-NEXT: kshiftlw $1, %k0, %k1
395; AVX512F-NEXT: kshiftrw $15, %k1, %k1
396; AVX512F-NEXT: kmovw %k1, %eax
397; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
398; AVX512F-NEXT: kshiftrw $15, %k0, %k0
399; AVX512F-NEXT: kmovw %k0, %eax
400; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
401; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
402; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
403; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
404; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
405; AVX512F-NEXT: kshiftlw $14, %k0, %k1
406; AVX512F-NEXT: kshiftrw $15, %k1, %k1
407; AVX512F-NEXT: kmovw %k1, %eax
408; AVX512F-NEXT: kshiftlw $15, %k0, %k1
409; AVX512F-NEXT: kshiftrw $15, %k1, %k1
410; AVX512F-NEXT: kmovw %k1, %ecx
411; AVX512F-NEXT: vmovd %ecx, %xmm0
412; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
413; AVX512F-NEXT: kshiftlw $13, %k0, %k1
414; AVX512F-NEXT: kshiftrw $15, %k1, %k1
415; AVX512F-NEXT: kmovw %k1, %eax
416; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
417; AVX512F-NEXT: kshiftlw $12, %k0, %k1
418; AVX512F-NEXT: kshiftrw $15, %k1, %k1
419; AVX512F-NEXT: kmovw %k1, %eax
420; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
421; AVX512F-NEXT: kshiftlw $11, %k0, %k1
422; AVX512F-NEXT: kshiftrw $15, %k1, %k1
423; AVX512F-NEXT: kmovw %k1, %eax
424; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
425; AVX512F-NEXT: kshiftlw $10, %k0, %k1
426; AVX512F-NEXT: kshiftrw $15, %k1, %k1
427; AVX512F-NEXT: kmovw %k1, %eax
428; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
429; AVX512F-NEXT: kshiftlw $9, %k0, %k1
430; AVX512F-NEXT: kshiftrw $15, %k1, %k1
431; AVX512F-NEXT: kmovw %k1, %eax
432; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
433; AVX512F-NEXT: kshiftlw $8, %k0, %k1
434; AVX512F-NEXT: kshiftrw $15, %k1, %k1
435; AVX512F-NEXT: kmovw %k1, %eax
436; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
437; AVX512F-NEXT: kshiftlw $7, %k0, %k1
438; AVX512F-NEXT: kshiftrw $15, %k1, %k1
439; AVX512F-NEXT: kmovw %k1, %eax
440; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
441; AVX512F-NEXT: kshiftlw $6, %k0, %k1
442; AVX512F-NEXT: kshiftrw $15, %k1, %k1
443; AVX512F-NEXT: kmovw %k1, %eax
444; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
445; AVX512F-NEXT: kshiftlw $5, %k0, %k1
446; AVX512F-NEXT: kshiftrw $15, %k1, %k1
447; AVX512F-NEXT: kmovw %k1, %eax
448; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
449; AVX512F-NEXT: kshiftlw $4, %k0, %k1
450; AVX512F-NEXT: kshiftrw $15, %k1, %k1
451; AVX512F-NEXT: kmovw %k1, %eax
452; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
453; AVX512F-NEXT: kshiftlw $3, %k0, %k1
454; AVX512F-NEXT: kshiftrw $15, %k1, %k1
455; AVX512F-NEXT: kmovw %k1, %eax
456; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
457; AVX512F-NEXT: kshiftlw $2, %k0, %k1
458; AVX512F-NEXT: kshiftrw $15, %k1, %k1
459; AVX512F-NEXT: kmovw %k1, %eax
460; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
461; AVX512F-NEXT: kshiftlw $1, %k0, %k1
462; AVX512F-NEXT: kshiftrw $15, %k1, %k1
463; AVX512F-NEXT: kmovw %k1, %eax
464; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
465; AVX512F-NEXT: kshiftrw $15, %k0, %k0
466; AVX512F-NEXT: kmovw %k0, %eax
467; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
468; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
469; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
470; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
471; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
472; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
473; AVX512F-NEXT: kshiftlw $14, %k0, %k1
474; AVX512F-NEXT: kshiftrw $15, %k1, %k1
475; AVX512F-NEXT: kmovw %k1, %eax
476; AVX512F-NEXT: kshiftlw $15, %k0, %k1
477; AVX512F-NEXT: kshiftrw $15, %k1, %k1
478; AVX512F-NEXT: kmovw %k1, %ecx
479; AVX512F-NEXT: vmovd %ecx, %xmm1
480; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
481; AVX512F-NEXT: kshiftlw $13, %k0, %k1
482; AVX512F-NEXT: kshiftrw $15, %k1, %k1
483; AVX512F-NEXT: kmovw %k1, %eax
484; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
485; AVX512F-NEXT: kshiftlw $12, %k0, %k1
486; AVX512F-NEXT: kshiftrw $15, %k1, %k1
487; AVX512F-NEXT: kmovw %k1, %eax
488; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
489; AVX512F-NEXT: kshiftlw $11, %k0, %k1
490; AVX512F-NEXT: kshiftrw $15, %k1, %k1
491; AVX512F-NEXT: kmovw %k1, %eax
492; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
493; AVX512F-NEXT: kshiftlw $10, %k0, %k1
494; AVX512F-NEXT: kshiftrw $15, %k1, %k1
495; AVX512F-NEXT: kmovw %k1, %eax
496; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
497; AVX512F-NEXT: kshiftlw $9, %k0, %k1
498; AVX512F-NEXT: kshiftrw $15, %k1, %k1
499; AVX512F-NEXT: kmovw %k1, %eax
500; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
501; AVX512F-NEXT: kshiftlw $8, %k0, %k1
502; AVX512F-NEXT: kshiftrw $15, %k1, %k1
503; AVX512F-NEXT: kmovw %k1, %eax
504; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
505; AVX512F-NEXT: kshiftlw $7, %k0, %k1
506; AVX512F-NEXT: kshiftrw $15, %k1, %k1
507; AVX512F-NEXT: kmovw %k1, %eax
508; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
509; AVX512F-NEXT: kshiftlw $6, %k0, %k1
510; AVX512F-NEXT: kshiftrw $15, %k1, %k1
511; AVX512F-NEXT: kmovw %k1, %eax
512; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
513; AVX512F-NEXT: kshiftlw $5, %k0, %k1
514; AVX512F-NEXT: kshiftrw $15, %k1, %k1
515; AVX512F-NEXT: kmovw %k1, %eax
516; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
517; AVX512F-NEXT: kshiftlw $4, %k0, %k1
518; AVX512F-NEXT: kshiftrw $15, %k1, %k1
519; AVX512F-NEXT: kmovw %k1, %eax
520; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
521; AVX512F-NEXT: kshiftlw $3, %k0, %k1
522; AVX512F-NEXT: kshiftrw $15, %k1, %k1
523; AVX512F-NEXT: kmovw %k1, %eax
524; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
525; AVX512F-NEXT: kshiftlw $2, %k0, %k1
526; AVX512F-NEXT: kshiftrw $15, %k1, %k1
527; AVX512F-NEXT: kmovw %k1, %eax
528; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
529; AVX512F-NEXT: kshiftlw $1, %k0, %k1
530; AVX512F-NEXT: kshiftrw $15, %k1, %k1
531; AVX512F-NEXT: kmovw %k1, %eax
532; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
533; AVX512F-NEXT: kshiftrw $15, %k0, %k0
534; AVX512F-NEXT: kmovw %k0, %eax
535; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
536; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
537; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
538; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
539; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
540; AVX512F-NEXT: kshiftlw $14, %k0, %k1
541; AVX512F-NEXT: kshiftrw $15, %k1, %k1
542; AVX512F-NEXT: kmovw %k1, %eax
543; AVX512F-NEXT: kshiftlw $15, %k0, %k1
544; AVX512F-NEXT: kshiftrw $15, %k1, %k1
545; AVX512F-NEXT: kmovw %k1, %ecx
546; AVX512F-NEXT: vmovd %ecx, %xmm2
547; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
548; AVX512F-NEXT: kshiftlw $13, %k0, %k1
549; AVX512F-NEXT: kshiftrw $15, %k1, %k1
550; AVX512F-NEXT: kmovw %k1, %eax
551; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
552; AVX512F-NEXT: kshiftlw $12, %k0, %k1
553; AVX512F-NEXT: kshiftrw $15, %k1, %k1
554; AVX512F-NEXT: kmovw %k1, %eax
555; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
556; AVX512F-NEXT: kshiftlw $11, %k0, %k1
557; AVX512F-NEXT: kshiftrw $15, %k1, %k1
558; AVX512F-NEXT: kmovw %k1, %eax
559; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
560; AVX512F-NEXT: kshiftlw $10, %k0, %k1
561; AVX512F-NEXT: kshiftrw $15, %k1, %k1
562; AVX512F-NEXT: kmovw %k1, %eax
563; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
564; AVX512F-NEXT: kshiftlw $9, %k0, %k1
565; AVX512F-NEXT: kshiftrw $15, %k1, %k1
566; AVX512F-NEXT: kmovw %k1, %eax
567; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
568; AVX512F-NEXT: kshiftlw $8, %k0, %k1
569; AVX512F-NEXT: kshiftrw $15, %k1, %k1
570; AVX512F-NEXT: kmovw %k1, %eax
571; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
572; AVX512F-NEXT: kshiftlw $7, %k0, %k1
573; AVX512F-NEXT: kshiftrw $15, %k1, %k1
574; AVX512F-NEXT: kmovw %k1, %eax
575; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
576; AVX512F-NEXT: kshiftlw $6, %k0, %k1
577; AVX512F-NEXT: kshiftrw $15, %k1, %k1
578; AVX512F-NEXT: kmovw %k1, %eax
579; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
580; AVX512F-NEXT: kshiftlw $5, %k0, %k1
581; AVX512F-NEXT: kshiftrw $15, %k1, %k1
582; AVX512F-NEXT: kmovw %k1, %eax
583; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
584; AVX512F-NEXT: kshiftlw $4, %k0, %k1
585; AVX512F-NEXT: kshiftrw $15, %k1, %k1
586; AVX512F-NEXT: kmovw %k1, %eax
587; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
588; AVX512F-NEXT: kshiftlw $3, %k0, %k1
589; AVX512F-NEXT: kshiftrw $15, %k1, %k1
590; AVX512F-NEXT: kmovw %k1, %eax
591; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
592; AVX512F-NEXT: kshiftlw $2, %k0, %k1
593; AVX512F-NEXT: kshiftrw $15, %k1, %k1
594; AVX512F-NEXT: kmovw %k1, %eax
595; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
596; AVX512F-NEXT: kshiftlw $1, %k0, %k1
597; AVX512F-NEXT: kshiftrw $15, %k1, %k1
598; AVX512F-NEXT: kmovw %k1, %eax
599; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
600; AVX512F-NEXT: kshiftrw $15, %k0, %k0
601; AVX512F-NEXT: kmovw %k0, %eax
602; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
603; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
604; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
605; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
606; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
607; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
608; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
609; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
610; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
611; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
612; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
613; AVX512F-NEXT: kmovw %k0, (%rsp)
614; AVX512F-NEXT: movl (%rsp), %eax
615; AVX512F-NEXT: movq %rbp, %rsp
616; AVX512F-NEXT: popq %rbp
617; AVX512F-NEXT: vzeroupper
618; AVX512F-NEXT: retq
619;
620; AVX512BW-LABEL: v32i16:
621; AVX512BW: # BB#0:
622; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
623; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
624; AVX512BW-NEXT: kmovd %k0, %eax
625; AVX512BW-NEXT: vzeroupper
626; AVX512BW-NEXT: retq
627 %x0 = icmp sgt <32 x i16> %a, %b
628 %x1 = icmp sgt <32 x i16> %c, %d
629 %y = and <32 x i1> %x0, %x1
630 %res = bitcast <32 x i1> %y to i32
631 ret i32 %res
632}
633
634define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
635; SSE-LABEL: v16i32:
636; SSE: # BB#0:
637; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
638; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
639; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
640; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
641; SSE-NEXT: pcmpgtd %xmm7, %xmm3
642; SSE-NEXT: movdqa {{.*#+}} xmm7 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
643; SSE-NEXT: pshufb %xmm7, %xmm3
644; SSE-NEXT: pcmpgtd %xmm6, %xmm2
645; SSE-NEXT: pshufb %xmm7, %xmm2
646; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000647; SSE-NEXT: movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
648; SSE-NEXT: pshufb %xmm3, %xmm2
649; SSE-NEXT: pcmpgtd %xmm5, %xmm1
650; SSE-NEXT: pshufb %xmm7, %xmm1
651; SSE-NEXT: pcmpgtd %xmm4, %xmm0
652; SSE-NEXT: pshufb %xmm7, %xmm0
653; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000654; SSE-NEXT: pshufb %xmm3, %xmm0
655; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000656; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
657; SSE-NEXT: pshufb %xmm7, %xmm11
658; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
659; SSE-NEXT: pshufb %xmm7, %xmm9
660; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000661; SSE-NEXT: pshufb %xmm3, %xmm9
662; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
663; SSE-NEXT: pshufb %xmm7, %xmm10
664; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
665; SSE-NEXT: pshufb %xmm7, %xmm8
666; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000667; SSE-NEXT: pshufb %xmm3, %xmm8
668; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000669; SSE-NEXT: pand %xmm0, %xmm8
670; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000671; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
672; SSE-NEXT: retq
673;
674; AVX1-LABEL: v16i32:
675; AVX1: # BB#0:
676; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
677; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
678; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
679; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
680; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
681; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
682; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
683; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
684; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
685; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
686; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
687; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
688; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
689; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000690; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
691; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
692; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
693; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
694; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000695; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000696; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
697; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
698; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
699; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
700; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
701; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
702; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000703; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
704; AVX1-NEXT: vpmovmskb %xmm0, %eax
705; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
706; AVX1-NEXT: vzeroupper
707; AVX1-NEXT: retq
708;
709; AVX2-LABEL: v16i32:
710; AVX2: # BB#0:
711; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
712; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
713; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
714; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
715; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
716; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
717; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
718; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
719; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
720; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000721; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
722; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
723; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
724; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
725; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
726; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
727; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
728; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
729; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000730; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
731; AVX2-NEXT: vpmovmskb %xmm0, %eax
732; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
733; AVX2-NEXT: vzeroupper
734; AVX2-NEXT: retq
735;
736; AVX512F-LABEL: v16i32:
737; AVX512F: # BB#0:
738; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
739; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
740; AVX512F-NEXT: kmovw %k0, %eax
741; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
742; AVX512F-NEXT: vzeroupper
743; AVX512F-NEXT: retq
744;
745; AVX512BW-LABEL: v16i32:
746; AVX512BW: # BB#0:
747; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
748; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
749; AVX512BW-NEXT: kmovd %k0, %eax
750; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
751; AVX512BW-NEXT: vzeroupper
752; AVX512BW-NEXT: retq
753 %x0 = icmp sgt <16 x i32> %a, %b
754 %x1 = icmp sgt <16 x i32> %c, %d
755 %y = and <16 x i1> %x0, %x1
756 %res = bitcast <16 x i1> %y to i16
757 ret i16 %res
758}
759
760define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
761; SSE-LABEL: v16f32:
762; SSE: # BB#0:
763; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
764; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
765; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
766; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
767; SSE-NEXT: cmpltps %xmm3, %xmm7
768; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
769; SSE-NEXT: pshufb %xmm3, %xmm7
770; SSE-NEXT: cmpltps %xmm2, %xmm6
771; SSE-NEXT: pshufb %xmm3, %xmm6
772; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000773; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
774; SSE-NEXT: pshufb %xmm2, %xmm6
775; SSE-NEXT: cmpltps %xmm1, %xmm5
776; SSE-NEXT: pshufb %xmm3, %xmm5
777; SSE-NEXT: cmpltps %xmm0, %xmm4
778; SSE-NEXT: pshufb %xmm3, %xmm4
779; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000780; SSE-NEXT: pshufb %xmm2, %xmm4
781; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000782; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
783; SSE-NEXT: pshufb %xmm3, %xmm11
784; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
785; SSE-NEXT: pshufb %xmm3, %xmm9
786; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000787; SSE-NEXT: pshufb %xmm2, %xmm9
788; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
789; SSE-NEXT: pshufb %xmm3, %xmm10
790; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
791; SSE-NEXT: pshufb %xmm3, %xmm8
792; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000793; SSE-NEXT: pshufb %xmm2, %xmm8
794; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000795; SSE-NEXT: pand %xmm4, %xmm8
796; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000797; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
798; SSE-NEXT: retq
799;
800; AVX12-LABEL: v16f32:
801; AVX12: # BB#0:
802; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
803; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
804; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
805; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
806; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
807; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
808; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
809; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
810; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
811; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000812; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
813; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
814; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
815; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
816; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
817; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
818; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
819; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
820; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000821; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
822; AVX12-NEXT: vpmovmskb %xmm0, %eax
823; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
824; AVX12-NEXT: vzeroupper
825; AVX12-NEXT: retq
826;
827; AVX512F-LABEL: v16f32:
828; AVX512F: # BB#0:
829; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
830; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
831; AVX512F-NEXT: kmovw %k0, %eax
832; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
833; AVX512F-NEXT: vzeroupper
834; AVX512F-NEXT: retq
835;
836; AVX512BW-LABEL: v16f32:
837; AVX512BW: # BB#0:
838; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
839; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
840; AVX512BW-NEXT: kmovd %k0, %eax
841; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
842; AVX512BW-NEXT: vzeroupper
843; AVX512BW-NEXT: retq
844 %x0 = fcmp ogt <16 x float> %a, %b
845 %x1 = fcmp ogt <16 x float> %c, %d
846 %y = and <16 x i1> %x0, %x1
847 %res = bitcast <16 x i1> %y to i16
848 ret i16 %res
849}
850
851define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
852; SSE-LABEL: v64i8:
853; SSE: # BB#0:
854; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
855; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
856; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
857; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
858; SSE-NEXT: pcmpgtb %xmm6, %xmm2
859; SSE-NEXT: pcmpgtb %xmm7, %xmm3
860; SSE-NEXT: pcmpgtb %xmm4, %xmm0
861; SSE-NEXT: pcmpgtb %xmm5, %xmm1
862; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
863; SSE-NEXT: pand %xmm2, %xmm8
864; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
865; SSE-NEXT: pand %xmm3, %xmm9
866; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
867; SSE-NEXT: pand %xmm0, %xmm10
868; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
869; SSE-NEXT: pand %xmm1, %xmm11
870; SSE-NEXT: pextrb $15, %xmm11, %eax
871; SSE-NEXT: andb $1, %al
872; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
873; SSE-NEXT: pextrb $14, %xmm11, %eax
874; SSE-NEXT: andb $1, %al
875; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
876; SSE-NEXT: pextrb $13, %xmm11, %eax
877; SSE-NEXT: andb $1, %al
878; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
879; SSE-NEXT: pextrb $12, %xmm11, %eax
880; SSE-NEXT: andb $1, %al
881; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
882; SSE-NEXT: pextrb $11, %xmm11, %eax
883; SSE-NEXT: andb $1, %al
884; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
885; SSE-NEXT: pextrb $10, %xmm11, %eax
886; SSE-NEXT: andb $1, %al
887; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
888; SSE-NEXT: pextrb $9, %xmm11, %eax
889; SSE-NEXT: andb $1, %al
890; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
891; SSE-NEXT: pextrb $8, %xmm11, %eax
892; SSE-NEXT: andb $1, %al
893; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
894; SSE-NEXT: pextrb $7, %xmm11, %eax
895; SSE-NEXT: andb $1, %al
896; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
897; SSE-NEXT: pextrb $6, %xmm11, %eax
898; SSE-NEXT: andb $1, %al
899; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
900; SSE-NEXT: pextrb $5, %xmm11, %eax
901; SSE-NEXT: andb $1, %al
902; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
903; SSE-NEXT: pextrb $4, %xmm11, %eax
904; SSE-NEXT: andb $1, %al
905; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
906; SSE-NEXT: pextrb $3, %xmm11, %eax
907; SSE-NEXT: andb $1, %al
908; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
909; SSE-NEXT: pextrb $2, %xmm11, %eax
910; SSE-NEXT: andb $1, %al
911; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
912; SSE-NEXT: pextrb $1, %xmm11, %eax
913; SSE-NEXT: andb $1, %al
914; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
915; SSE-NEXT: pextrb $0, %xmm11, %eax
916; SSE-NEXT: andb $1, %al
917; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
918; SSE-NEXT: pextrb $15, %xmm10, %eax
919; SSE-NEXT: andb $1, %al
920; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
921; SSE-NEXT: pextrb $14, %xmm10, %eax
922; SSE-NEXT: andb $1, %al
923; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
924; SSE-NEXT: pextrb $13, %xmm10, %eax
925; SSE-NEXT: andb $1, %al
926; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
927; SSE-NEXT: pextrb $12, %xmm10, %eax
928; SSE-NEXT: andb $1, %al
929; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
930; SSE-NEXT: pextrb $11, %xmm10, %eax
931; SSE-NEXT: andb $1, %al
932; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
933; SSE-NEXT: pextrb $10, %xmm10, %eax
934; SSE-NEXT: andb $1, %al
935; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
936; SSE-NEXT: pextrb $9, %xmm10, %eax
937; SSE-NEXT: andb $1, %al
938; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
939; SSE-NEXT: pextrb $8, %xmm10, %eax
940; SSE-NEXT: andb $1, %al
941; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
942; SSE-NEXT: pextrb $7, %xmm10, %eax
943; SSE-NEXT: andb $1, %al
944; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
945; SSE-NEXT: pextrb $6, %xmm10, %eax
946; SSE-NEXT: andb $1, %al
947; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
948; SSE-NEXT: pextrb $5, %xmm10, %eax
949; SSE-NEXT: andb $1, %al
950; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
951; SSE-NEXT: pextrb $4, %xmm10, %eax
952; SSE-NEXT: andb $1, %al
953; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
954; SSE-NEXT: pextrb $3, %xmm10, %eax
955; SSE-NEXT: andb $1, %al
956; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
957; SSE-NEXT: pextrb $2, %xmm10, %eax
958; SSE-NEXT: andb $1, %al
959; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
960; SSE-NEXT: pextrb $1, %xmm10, %eax
961; SSE-NEXT: andb $1, %al
962; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
963; SSE-NEXT: pextrb $0, %xmm10, %eax
964; SSE-NEXT: andb $1, %al
965; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
966; SSE-NEXT: pextrb $15, %xmm9, %eax
967; SSE-NEXT: andb $1, %al
968; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
969; SSE-NEXT: pextrb $14, %xmm9, %eax
970; SSE-NEXT: andb $1, %al
971; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
972; SSE-NEXT: pextrb $13, %xmm9, %eax
973; SSE-NEXT: andb $1, %al
974; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
975; SSE-NEXT: pextrb $12, %xmm9, %eax
976; SSE-NEXT: andb $1, %al
977; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
978; SSE-NEXT: pextrb $11, %xmm9, %eax
979; SSE-NEXT: andb $1, %al
980; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
981; SSE-NEXT: pextrb $10, %xmm9, %eax
982; SSE-NEXT: andb $1, %al
983; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
984; SSE-NEXT: pextrb $9, %xmm9, %eax
985; SSE-NEXT: andb $1, %al
986; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
987; SSE-NEXT: pextrb $8, %xmm9, %eax
988; SSE-NEXT: andb $1, %al
989; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
990; SSE-NEXT: pextrb $7, %xmm9, %eax
991; SSE-NEXT: andb $1, %al
992; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
993; SSE-NEXT: pextrb $6, %xmm9, %eax
994; SSE-NEXT: andb $1, %al
995; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
996; SSE-NEXT: pextrb $5, %xmm9, %eax
997; SSE-NEXT: andb $1, %al
998; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
999; SSE-NEXT: pextrb $4, %xmm9, %eax
1000; SSE-NEXT: andb $1, %al
1001; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1002; SSE-NEXT: pextrb $3, %xmm9, %eax
1003; SSE-NEXT: andb $1, %al
1004; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1005; SSE-NEXT: pextrb $2, %xmm9, %eax
1006; SSE-NEXT: andb $1, %al
1007; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1008; SSE-NEXT: pextrb $1, %xmm9, %eax
1009; SSE-NEXT: andb $1, %al
1010; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1011; SSE-NEXT: pextrb $0, %xmm9, %eax
1012; SSE-NEXT: andb $1, %al
1013; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1014; SSE-NEXT: pextrb $15, %xmm8, %eax
1015; SSE-NEXT: andb $1, %al
1016; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1017; SSE-NEXT: pextrb $14, %xmm8, %eax
1018; SSE-NEXT: andb $1, %al
1019; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1020; SSE-NEXT: pextrb $13, %xmm8, %eax
1021; SSE-NEXT: andb $1, %al
1022; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1023; SSE-NEXT: pextrb $12, %xmm8, %eax
1024; SSE-NEXT: andb $1, %al
1025; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1026; SSE-NEXT: pextrb $11, %xmm8, %eax
1027; SSE-NEXT: andb $1, %al
1028; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1029; SSE-NEXT: pextrb $10, %xmm8, %eax
1030; SSE-NEXT: andb $1, %al
1031; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1032; SSE-NEXT: pextrb $9, %xmm8, %eax
1033; SSE-NEXT: andb $1, %al
1034; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1035; SSE-NEXT: pextrb $8, %xmm8, %eax
1036; SSE-NEXT: andb $1, %al
1037; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1038; SSE-NEXT: pextrb $7, %xmm8, %eax
1039; SSE-NEXT: andb $1, %al
1040; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1041; SSE-NEXT: pextrb $6, %xmm8, %eax
1042; SSE-NEXT: andb $1, %al
1043; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1044; SSE-NEXT: pextrb $5, %xmm8, %eax
1045; SSE-NEXT: andb $1, %al
1046; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1047; SSE-NEXT: pextrb $4, %xmm8, %eax
1048; SSE-NEXT: andb $1, %al
1049; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1050; SSE-NEXT: pextrb $3, %xmm8, %eax
1051; SSE-NEXT: andb $1, %al
1052; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1053; SSE-NEXT: pextrb $2, %xmm8, %eax
1054; SSE-NEXT: andb $1, %al
1055; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1056; SSE-NEXT: pextrb $1, %xmm8, %eax
1057; SSE-NEXT: andb $1, %al
1058; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1059; SSE-NEXT: pextrb $0, %xmm8, %eax
1060; SSE-NEXT: andb $1, %al
1061; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1062; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1063; SSE-NEXT: shll $16, %eax
1064; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1065; SSE-NEXT: orl %eax, %ecx
1066; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1067; SSE-NEXT: shll $16, %edx
1068; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1069; SSE-NEXT: orl %edx, %eax
1070; SSE-NEXT: shlq $32, %rax
1071; SSE-NEXT: orq %rcx, %rax
1072; SSE-NEXT: retq
1073;
1074; AVX1-LABEL: v64i8:
1075; AVX1: # BB#0:
1076; AVX1-NEXT: pushq %rbp
1077; AVX1-NEXT: .Lcfi0:
1078; AVX1-NEXT: .cfi_def_cfa_offset 16
1079; AVX1-NEXT: .Lcfi1:
1080; AVX1-NEXT: .cfi_offset %rbp, -16
1081; AVX1-NEXT: movq %rsp, %rbp
1082; AVX1-NEXT: .Lcfi2:
1083; AVX1-NEXT: .cfi_def_cfa_register %rbp
1084; AVX1-NEXT: andq $-32, %rsp
1085; AVX1-NEXT: subq $64, %rsp
1086; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1087; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1088; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1089; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1090; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1091; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1092; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1093; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1094; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1095; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1096; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1097; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1098; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1099; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1100; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1101; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1102; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1103; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1104; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1105; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1106; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1107; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1108; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1109; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1110; AVX1-NEXT: andb $1, %al
1111; AVX1-NEXT: movb %al, (%rsp)
1112; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1113; AVX1-NEXT: andb $1, %al
1114; AVX1-NEXT: movb %al, (%rsp)
1115; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1116; AVX1-NEXT: andb $1, %al
1117; AVX1-NEXT: movb %al, (%rsp)
1118; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1119; AVX1-NEXT: andb $1, %al
1120; AVX1-NEXT: movb %al, (%rsp)
1121; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1122; AVX1-NEXT: andb $1, %al
1123; AVX1-NEXT: movb %al, (%rsp)
1124; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1125; AVX1-NEXT: andb $1, %al
1126; AVX1-NEXT: movb %al, (%rsp)
1127; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1128; AVX1-NEXT: andb $1, %al
1129; AVX1-NEXT: movb %al, (%rsp)
1130; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1131; AVX1-NEXT: andb $1, %al
1132; AVX1-NEXT: movb %al, (%rsp)
1133; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1134; AVX1-NEXT: andb $1, %al
1135; AVX1-NEXT: movb %al, (%rsp)
1136; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1137; AVX1-NEXT: andb $1, %al
1138; AVX1-NEXT: movb %al, (%rsp)
1139; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1140; AVX1-NEXT: andb $1, %al
1141; AVX1-NEXT: movb %al, (%rsp)
1142; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1143; AVX1-NEXT: andb $1, %al
1144; AVX1-NEXT: movb %al, (%rsp)
1145; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1146; AVX1-NEXT: andb $1, %al
1147; AVX1-NEXT: movb %al, (%rsp)
1148; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1149; AVX1-NEXT: andb $1, %al
1150; AVX1-NEXT: movb %al, (%rsp)
1151; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1152; AVX1-NEXT: andb $1, %al
1153; AVX1-NEXT: movb %al, (%rsp)
1154; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1155; AVX1-NEXT: andb $1, %al
1156; AVX1-NEXT: movb %al, (%rsp)
1157; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1158; AVX1-NEXT: andb $1, %al
1159; AVX1-NEXT: movb %al, (%rsp)
1160; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1161; AVX1-NEXT: andb $1, %al
1162; AVX1-NEXT: movb %al, (%rsp)
1163; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1164; AVX1-NEXT: andb $1, %al
1165; AVX1-NEXT: movb %al, (%rsp)
1166; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1167; AVX1-NEXT: andb $1, %al
1168; AVX1-NEXT: movb %al, (%rsp)
1169; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1170; AVX1-NEXT: andb $1, %al
1171; AVX1-NEXT: movb %al, (%rsp)
1172; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1173; AVX1-NEXT: andb $1, %al
1174; AVX1-NEXT: movb %al, (%rsp)
1175; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1176; AVX1-NEXT: andb $1, %al
1177; AVX1-NEXT: movb %al, (%rsp)
1178; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1179; AVX1-NEXT: andb $1, %al
1180; AVX1-NEXT: movb %al, (%rsp)
1181; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1182; AVX1-NEXT: andb $1, %al
1183; AVX1-NEXT: movb %al, (%rsp)
1184; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1185; AVX1-NEXT: andb $1, %al
1186; AVX1-NEXT: movb %al, (%rsp)
1187; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1188; AVX1-NEXT: andb $1, %al
1189; AVX1-NEXT: movb %al, (%rsp)
1190; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1191; AVX1-NEXT: andb $1, %al
1192; AVX1-NEXT: movb %al, (%rsp)
1193; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1194; AVX1-NEXT: andb $1, %al
1195; AVX1-NEXT: movb %al, (%rsp)
1196; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1197; AVX1-NEXT: andb $1, %al
1198; AVX1-NEXT: movb %al, (%rsp)
1199; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1200; AVX1-NEXT: andb $1, %al
1201; AVX1-NEXT: movb %al, (%rsp)
1202; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1203; AVX1-NEXT: andb $1, %al
1204; AVX1-NEXT: movb %al, (%rsp)
1205; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1206; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1207; AVX1-NEXT: andb $1, %al
1208; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1209; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1210; AVX1-NEXT: andb $1, %al
1211; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1212; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1213; AVX1-NEXT: andb $1, %al
1214; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1215; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1216; AVX1-NEXT: andb $1, %al
1217; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1218; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1219; AVX1-NEXT: andb $1, %al
1220; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1221; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1222; AVX1-NEXT: andb $1, %al
1223; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1224; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1225; AVX1-NEXT: andb $1, %al
1226; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1227; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1228; AVX1-NEXT: andb $1, %al
1229; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1230; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1231; AVX1-NEXT: andb $1, %al
1232; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1233; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1234; AVX1-NEXT: andb $1, %al
1235; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1236; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1237; AVX1-NEXT: andb $1, %al
1238; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1239; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1240; AVX1-NEXT: andb $1, %al
1241; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1242; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1243; AVX1-NEXT: andb $1, %al
1244; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1245; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1246; AVX1-NEXT: andb $1, %al
1247; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1248; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1249; AVX1-NEXT: andb $1, %al
1250; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1251; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1252; AVX1-NEXT: andb $1, %al
1253; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1254; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1255; AVX1-NEXT: andb $1, %al
1256; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1257; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1258; AVX1-NEXT: andb $1, %al
1259; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1260; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1261; AVX1-NEXT: andb $1, %al
1262; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1263; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1264; AVX1-NEXT: andb $1, %al
1265; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1266; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1267; AVX1-NEXT: andb $1, %al
1268; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1269; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1270; AVX1-NEXT: andb $1, %al
1271; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1272; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1273; AVX1-NEXT: andb $1, %al
1274; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1275; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1276; AVX1-NEXT: andb $1, %al
1277; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1278; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1279; AVX1-NEXT: andb $1, %al
1280; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1281; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1282; AVX1-NEXT: andb $1, %al
1283; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1284; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1285; AVX1-NEXT: andb $1, %al
1286; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1287; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1288; AVX1-NEXT: andb $1, %al
1289; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1290; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1291; AVX1-NEXT: andb $1, %al
1292; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1293; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1294; AVX1-NEXT: andb $1, %al
1295; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1296; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1297; AVX1-NEXT: andb $1, %al
1298; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1299; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1300; AVX1-NEXT: andb $1, %al
1301; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1302; AVX1-NEXT: movl (%rsp), %ecx
1303; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1304; AVX1-NEXT: shlq $32, %rax
1305; AVX1-NEXT: orq %rcx, %rax
1306; AVX1-NEXT: movq %rbp, %rsp
1307; AVX1-NEXT: popq %rbp
1308; AVX1-NEXT: vzeroupper
1309; AVX1-NEXT: retq
1310;
1311; AVX2-LABEL: v64i8:
1312; AVX2: # BB#0:
1313; AVX2-NEXT: pushq %rbp
1314; AVX2-NEXT: .Lcfi0:
1315; AVX2-NEXT: .cfi_def_cfa_offset 16
1316; AVX2-NEXT: .Lcfi1:
1317; AVX2-NEXT: .cfi_offset %rbp, -16
1318; AVX2-NEXT: movq %rsp, %rbp
1319; AVX2-NEXT: .Lcfi2:
1320; AVX2-NEXT: .cfi_def_cfa_register %rbp
1321; AVX2-NEXT: andq $-32, %rsp
1322; AVX2-NEXT: subq $64, %rsp
1323; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1324; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1325; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1326; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1327; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1328; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1329; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1330; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1331; AVX2-NEXT: andb $1, %al
1332; AVX2-NEXT: movb %al, (%rsp)
1333; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1334; AVX2-NEXT: andb $1, %al
1335; AVX2-NEXT: movb %al, (%rsp)
1336; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1337; AVX2-NEXT: andb $1, %al
1338; AVX2-NEXT: movb %al, (%rsp)
1339; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1340; AVX2-NEXT: andb $1, %al
1341; AVX2-NEXT: movb %al, (%rsp)
1342; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1343; AVX2-NEXT: andb $1, %al
1344; AVX2-NEXT: movb %al, (%rsp)
1345; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1346; AVX2-NEXT: andb $1, %al
1347; AVX2-NEXT: movb %al, (%rsp)
1348; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1349; AVX2-NEXT: andb $1, %al
1350; AVX2-NEXT: movb %al, (%rsp)
1351; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1352; AVX2-NEXT: andb $1, %al
1353; AVX2-NEXT: movb %al, (%rsp)
1354; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1355; AVX2-NEXT: andb $1, %al
1356; AVX2-NEXT: movb %al, (%rsp)
1357; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1358; AVX2-NEXT: andb $1, %al
1359; AVX2-NEXT: movb %al, (%rsp)
1360; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1361; AVX2-NEXT: andb $1, %al
1362; AVX2-NEXT: movb %al, (%rsp)
1363; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1364; AVX2-NEXT: andb $1, %al
1365; AVX2-NEXT: movb %al, (%rsp)
1366; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1367; AVX2-NEXT: andb $1, %al
1368; AVX2-NEXT: movb %al, (%rsp)
1369; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1370; AVX2-NEXT: andb $1, %al
1371; AVX2-NEXT: movb %al, (%rsp)
1372; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1373; AVX2-NEXT: andb $1, %al
1374; AVX2-NEXT: movb %al, (%rsp)
1375; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1376; AVX2-NEXT: andb $1, %al
1377; AVX2-NEXT: movb %al, (%rsp)
1378; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1379; AVX2-NEXT: andb $1, %al
1380; AVX2-NEXT: movb %al, (%rsp)
1381; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1382; AVX2-NEXT: andb $1, %al
1383; AVX2-NEXT: movb %al, (%rsp)
1384; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1385; AVX2-NEXT: andb $1, %al
1386; AVX2-NEXT: movb %al, (%rsp)
1387; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1388; AVX2-NEXT: andb $1, %al
1389; AVX2-NEXT: movb %al, (%rsp)
1390; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1391; AVX2-NEXT: andb $1, %al
1392; AVX2-NEXT: movb %al, (%rsp)
1393; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1394; AVX2-NEXT: andb $1, %al
1395; AVX2-NEXT: movb %al, (%rsp)
1396; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1397; AVX2-NEXT: andb $1, %al
1398; AVX2-NEXT: movb %al, (%rsp)
1399; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1400; AVX2-NEXT: andb $1, %al
1401; AVX2-NEXT: movb %al, (%rsp)
1402; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1403; AVX2-NEXT: andb $1, %al
1404; AVX2-NEXT: movb %al, (%rsp)
1405; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1406; AVX2-NEXT: andb $1, %al
1407; AVX2-NEXT: movb %al, (%rsp)
1408; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1409; AVX2-NEXT: andb $1, %al
1410; AVX2-NEXT: movb %al, (%rsp)
1411; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1412; AVX2-NEXT: andb $1, %al
1413; AVX2-NEXT: movb %al, (%rsp)
1414; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1415; AVX2-NEXT: andb $1, %al
1416; AVX2-NEXT: movb %al, (%rsp)
1417; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1418; AVX2-NEXT: andb $1, %al
1419; AVX2-NEXT: movb %al, (%rsp)
1420; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1421; AVX2-NEXT: andb $1, %al
1422; AVX2-NEXT: movb %al, (%rsp)
1423; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1424; AVX2-NEXT: andb $1, %al
1425; AVX2-NEXT: movb %al, (%rsp)
1426; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1427; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1428; AVX2-NEXT: andb $1, %al
1429; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1430; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1431; AVX2-NEXT: andb $1, %al
1432; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1433; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1434; AVX2-NEXT: andb $1, %al
1435; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1436; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1437; AVX2-NEXT: andb $1, %al
1438; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1439; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1440; AVX2-NEXT: andb $1, %al
1441; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1442; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1443; AVX2-NEXT: andb $1, %al
1444; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1445; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1446; AVX2-NEXT: andb $1, %al
1447; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1448; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1449; AVX2-NEXT: andb $1, %al
1450; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1451; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1452; AVX2-NEXT: andb $1, %al
1453; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1454; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1455; AVX2-NEXT: andb $1, %al
1456; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1457; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1458; AVX2-NEXT: andb $1, %al
1459; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1460; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1461; AVX2-NEXT: andb $1, %al
1462; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1463; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1464; AVX2-NEXT: andb $1, %al
1465; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1466; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1467; AVX2-NEXT: andb $1, %al
1468; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1469; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1470; AVX2-NEXT: andb $1, %al
1471; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1472; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1473; AVX2-NEXT: andb $1, %al
1474; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1475; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1476; AVX2-NEXT: andb $1, %al
1477; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1478; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1479; AVX2-NEXT: andb $1, %al
1480; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1481; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1482; AVX2-NEXT: andb $1, %al
1483; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1484; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1485; AVX2-NEXT: andb $1, %al
1486; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1487; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1488; AVX2-NEXT: andb $1, %al
1489; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1490; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1491; AVX2-NEXT: andb $1, %al
1492; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1493; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1494; AVX2-NEXT: andb $1, %al
1495; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1496; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1497; AVX2-NEXT: andb $1, %al
1498; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1499; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1500; AVX2-NEXT: andb $1, %al
1501; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1502; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1503; AVX2-NEXT: andb $1, %al
1504; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1505; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1506; AVX2-NEXT: andb $1, %al
1507; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1508; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1509; AVX2-NEXT: andb $1, %al
1510; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1511; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1512; AVX2-NEXT: andb $1, %al
1513; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1514; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1515; AVX2-NEXT: andb $1, %al
1516; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1517; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1518; AVX2-NEXT: andb $1, %al
1519; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1520; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1521; AVX2-NEXT: andb $1, %al
1522; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1523; AVX2-NEXT: movl (%rsp), %ecx
1524; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1525; AVX2-NEXT: shlq $32, %rax
1526; AVX2-NEXT: orq %rcx, %rax
1527; AVX2-NEXT: movq %rbp, %rsp
1528; AVX2-NEXT: popq %rbp
1529; AVX2-NEXT: vzeroupper
1530; AVX2-NEXT: retq
1531;
1532; AVX512F-LABEL: v64i8:
1533; AVX512F: # BB#0:
1534; AVX512F-NEXT: pushq %rbp
1535; AVX512F-NEXT: .Lcfi3:
1536; AVX512F-NEXT: .cfi_def_cfa_offset 16
1537; AVX512F-NEXT: .Lcfi4:
1538; AVX512F-NEXT: .cfi_offset %rbp, -16
1539; AVX512F-NEXT: movq %rsp, %rbp
1540; AVX512F-NEXT: .Lcfi5:
1541; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1542; AVX512F-NEXT: andq $-32, %rsp
1543; AVX512F-NEXT: subq $64, %rsp
1544; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1545; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1546; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1547; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1548; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1549; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1550; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1551; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1552; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1553; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1554; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1555; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1556; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1557; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1558; AVX512F-NEXT: kmovw %k0, (%rsp)
1559; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1560; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1561; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1562; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1563; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1564; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1565; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1566; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1567; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1568; AVX512F-NEXT: movl (%rsp), %ecx
1569; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1570; AVX512F-NEXT: shlq $32, %rax
1571; AVX512F-NEXT: orq %rcx, %rax
1572; AVX512F-NEXT: movq %rbp, %rsp
1573; AVX512F-NEXT: popq %rbp
1574; AVX512F-NEXT: vzeroupper
1575; AVX512F-NEXT: retq
1576;
1577; AVX512BW-LABEL: v64i8:
1578; AVX512BW: # BB#0:
1579; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1580; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1581; AVX512BW-NEXT: kmovq %k0, %rax
1582; AVX512BW-NEXT: vzeroupper
1583; AVX512BW-NEXT: retq
1584 %x0 = icmp sgt <64 x i8> %a, %b
1585 %x1 = icmp sgt <64 x i8> %c, %d
1586 %y = and <64 x i1> %x0, %x1
1587 %res = bitcast <64 x i1> %y to i64
1588 ret i64 %res
1589}