blob: dfb03e05ca99fc10d8ea0638841ea6225107a6b4 [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
15; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000018; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +000019; SSE-NEXT: pshufb %xmm3, %xmm2
20; SSE-NEXT: pcmpgtq %xmm5, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000021; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
22; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000023; SSE-NEXT: pcmpgtq %xmm4, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000024; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
25; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
26; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
27; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000028; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +000029; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000030; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +000031; SSE-NEXT: pshufb %xmm3, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000032; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
33; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm10[0,2,2,3]
34; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
35; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
36; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm9[0,2,2,3]
37; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
38; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
39; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
40; SSE-NEXT: pand %xmm0, %xmm2
41; SSE-NEXT: psllw $15, %xmm2
42; SSE-NEXT: psraw $15, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000043; SSE-NEXT: packsswb %xmm0, %xmm2
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000044; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000045; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
46; SSE-NEXT: retq
47;
48; AVX1-LABEL: v8i64:
49; AVX1: # BB#0:
50; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
51; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
52; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
53; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
54; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
55; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
56; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
57; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
58; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
59; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
60; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
61; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
62; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
63; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000064; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
65; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
66; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
67; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
68; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
69; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
70; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
71; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
72; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
73; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
74; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
75; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
76; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000077; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000078; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
79; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000080; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000081; AVX1-NEXT: vpmovmskb %xmm0, %eax
82; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
83; AVX1-NEXT: vzeroupper
84; AVX1-NEXT: retq
85;
86; AVX2-LABEL: v8i64:
87; AVX2: # BB#0:
88; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
89; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
90; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
91; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
92; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
93; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
94; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
95; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
96; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
97; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000098; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
99; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
100; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
101; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
102; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
103; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
104; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
105; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
106; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000107; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000108; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
109; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000110; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000111; AVX2-NEXT: vpmovmskb %xmm0, %eax
112; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
113; AVX2-NEXT: vzeroupper
114; AVX2-NEXT: retq
115;
116; AVX512F-LABEL: v8i64:
117; AVX512F: # BB#0:
118; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
119; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
120; AVX512F-NEXT: kmovw %k0, %eax
121; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
122; AVX512F-NEXT: vzeroupper
123; AVX512F-NEXT: retq
124;
125; AVX512BW-LABEL: v8i64:
126; AVX512BW: # BB#0:
127; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
128; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
129; AVX512BW-NEXT: kmovd %k0, %eax
130; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
131; AVX512BW-NEXT: vzeroupper
132; AVX512BW-NEXT: retq
133 %x0 = icmp sgt <8 x i64> %a, %b
134 %x1 = icmp sgt <8 x i64> %c, %d
135 %y = and <8 x i1> %x0, %x1
136 %res = bitcast <8 x i1> %y to i8
137 ret i8 %res
138}
139
140define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
141; SSE-LABEL: v8f64:
142; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000143; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000144; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
145; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000146; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
147; SSE-NEXT: cmpltpd %xmm3, %xmm7
148; SSE-NEXT: cmpltpd %xmm2, %xmm6
149; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000150; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000151; SSE-NEXT: pshufb %xmm2, %xmm6
152; SSE-NEXT: cmpltpd %xmm1, %xmm5
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000153; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
154; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000155; SSE-NEXT: cmpltpd %xmm0, %xmm4
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000156; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
157; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
158; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
159; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000161; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000162; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000163; SSE-NEXT: pshufb %xmm2, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000164; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
165; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
166; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
167; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
168; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
169; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[0,2,2,3,4,5,6,7]
170; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
171; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
172; SSE-NEXT: pand %xmm0, %xmm2
173; SSE-NEXT: psllw $15, %xmm2
174; SSE-NEXT: psraw $15, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000175; SSE-NEXT: packsswb %xmm0, %xmm2
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000176; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000177; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
178; SSE-NEXT: retq
179;
180; AVX12-LABEL: v8f64:
181; AVX12: # BB#0:
182; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
183; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
184; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
185; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
186; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
187; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
188; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
189; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
190; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
191; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000192; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
193; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
194; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
195; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
196; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
197; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
198; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
199; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
200; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000201; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000202; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
203; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000204; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000205; AVX12-NEXT: vpmovmskb %xmm0, %eax
206; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
207; AVX12-NEXT: vzeroupper
208; AVX12-NEXT: retq
209;
210; AVX512F-LABEL: v8f64:
211; AVX512F: # BB#0:
212; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
213; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
214; AVX512F-NEXT: kmovw %k0, %eax
215; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
216; AVX512F-NEXT: vzeroupper
217; AVX512F-NEXT: retq
218;
219; AVX512BW-LABEL: v8f64:
220; AVX512BW: # BB#0:
221; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
222; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
223; AVX512BW-NEXT: kmovd %k0, %eax
224; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
225; AVX512BW-NEXT: vzeroupper
226; AVX512BW-NEXT: retq
227 %x0 = fcmp ogt <8 x double> %a, %b
228 %x1 = fcmp ogt <8 x double> %c, %d
229 %y = and <8 x i1> %x0, %x1
230 %res = bitcast <8 x i1> %y to i8
231 ret i8 %res
232}
233
234define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
235; SSE-LABEL: v32i16:
236; SSE: # BB#0:
237; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000238; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000239; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000240; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
241; SSE-NEXT: pcmpgtw %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000242; SSE-NEXT: pcmpgtw %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000243; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000244; SSE-NEXT: pcmpgtw %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000245; SSE-NEXT: pcmpgtw %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000246; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000247; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000248; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000249; SSE-NEXT: packsswb %xmm11, %xmm10
250; SSE-NEXT: pand %xmm0, %xmm10
251; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000252; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000253; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000254; SSE-NEXT: pand %xmm2, %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000255; SSE-NEXT: pmovmskb %xmm10, %ecx
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000256; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000257; SSE-NEXT: shll $16, %eax
258; SSE-NEXT: orl %ecx, %eax
259; SSE-NEXT: retq
260;
261; AVX1-LABEL: v32i16:
262; AVX1: # BB#0:
263; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
264; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
265; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
266; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
267; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
268; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
269; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
270; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
271; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
272; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
273; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
274; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
275; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
276; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
277; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
278; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
279; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
280; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
281; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
282; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
283; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
284; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
285; AVX1-NEXT: vpmovmskb %xmm0, %ecx
286; AVX1-NEXT: vpmovmskb %xmm1, %eax
287; AVX1-NEXT: shll $16, %eax
288; AVX1-NEXT: orl %ecx, %eax
289; AVX1-NEXT: vzeroupper
290; AVX1-NEXT: retq
291;
292; AVX2-LABEL: v32i16:
293; AVX2: # BB#0:
294; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
295; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
296; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
297; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
298; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
299; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
300; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
301; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
302; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
303; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
304; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
305; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
306; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
307; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
308; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
309; AVX2-NEXT: vpmovmskb %ymm0, %eax
310; AVX2-NEXT: vzeroupper
311; AVX2-NEXT: retq
312;
313; AVX512F-LABEL: v32i16:
314; AVX512F: # BB#0:
315; AVX512F-NEXT: pushq %rbp
316; AVX512F-NEXT: .Lcfi0:
317; AVX512F-NEXT: .cfi_def_cfa_offset 16
318; AVX512F-NEXT: .Lcfi1:
319; AVX512F-NEXT: .cfi_offset %rbp, -16
320; AVX512F-NEXT: movq %rsp, %rbp
321; AVX512F-NEXT: .Lcfi2:
322; AVX512F-NEXT: .cfi_def_cfa_register %rbp
323; AVX512F-NEXT: andq $-32, %rsp
324; AVX512F-NEXT: subq $32, %rsp
325; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
326; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
327; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
328; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
329; AVX512F-NEXT: kshiftlw $14, %k0, %k1
330; AVX512F-NEXT: kshiftrw $15, %k1, %k1
331; AVX512F-NEXT: kmovw %k1, %eax
332; AVX512F-NEXT: kshiftlw $15, %k0, %k1
333; AVX512F-NEXT: kshiftrw $15, %k1, %k1
334; AVX512F-NEXT: kmovw %k1, %ecx
335; AVX512F-NEXT: vmovd %ecx, %xmm1
336; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
337; AVX512F-NEXT: kshiftlw $13, %k0, %k1
338; AVX512F-NEXT: kshiftrw $15, %k1, %k1
339; AVX512F-NEXT: kmovw %k1, %eax
340; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
341; AVX512F-NEXT: kshiftlw $12, %k0, %k1
342; AVX512F-NEXT: kshiftrw $15, %k1, %k1
343; AVX512F-NEXT: kmovw %k1, %eax
344; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
345; AVX512F-NEXT: kshiftlw $11, %k0, %k1
346; AVX512F-NEXT: kshiftrw $15, %k1, %k1
347; AVX512F-NEXT: kmovw %k1, %eax
348; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
349; AVX512F-NEXT: kshiftlw $10, %k0, %k1
350; AVX512F-NEXT: kshiftrw $15, %k1, %k1
351; AVX512F-NEXT: kmovw %k1, %eax
352; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
353; AVX512F-NEXT: kshiftlw $9, %k0, %k1
354; AVX512F-NEXT: kshiftrw $15, %k1, %k1
355; AVX512F-NEXT: kmovw %k1, %eax
356; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
357; AVX512F-NEXT: kshiftlw $8, %k0, %k1
358; AVX512F-NEXT: kshiftrw $15, %k1, %k1
359; AVX512F-NEXT: kmovw %k1, %eax
360; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
361; AVX512F-NEXT: kshiftlw $7, %k0, %k1
362; AVX512F-NEXT: kshiftrw $15, %k1, %k1
363; AVX512F-NEXT: kmovw %k1, %eax
364; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
365; AVX512F-NEXT: kshiftlw $6, %k0, %k1
366; AVX512F-NEXT: kshiftrw $15, %k1, %k1
367; AVX512F-NEXT: kmovw %k1, %eax
368; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
369; AVX512F-NEXT: kshiftlw $5, %k0, %k1
370; AVX512F-NEXT: kshiftrw $15, %k1, %k1
371; AVX512F-NEXT: kmovw %k1, %eax
372; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
373; AVX512F-NEXT: kshiftlw $4, %k0, %k1
374; AVX512F-NEXT: kshiftrw $15, %k1, %k1
375; AVX512F-NEXT: kmovw %k1, %eax
376; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
377; AVX512F-NEXT: kshiftlw $3, %k0, %k1
378; AVX512F-NEXT: kshiftrw $15, %k1, %k1
379; AVX512F-NEXT: kmovw %k1, %eax
380; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
381; AVX512F-NEXT: kshiftlw $2, %k0, %k1
382; AVX512F-NEXT: kshiftrw $15, %k1, %k1
383; AVX512F-NEXT: kmovw %k1, %eax
384; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
385; AVX512F-NEXT: kshiftlw $1, %k0, %k1
386; AVX512F-NEXT: kshiftrw $15, %k1, %k1
387; AVX512F-NEXT: kmovw %k1, %eax
388; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
389; AVX512F-NEXT: kshiftrw $15, %k0, %k0
390; AVX512F-NEXT: kmovw %k0, %eax
391; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
392; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
393; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
394; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
395; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
396; AVX512F-NEXT: kshiftlw $14, %k0, %k1
397; AVX512F-NEXT: kshiftrw $15, %k1, %k1
398; AVX512F-NEXT: kmovw %k1, %eax
399; AVX512F-NEXT: kshiftlw $15, %k0, %k1
400; AVX512F-NEXT: kshiftrw $15, %k1, %k1
401; AVX512F-NEXT: kmovw %k1, %ecx
402; AVX512F-NEXT: vmovd %ecx, %xmm0
403; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
404; AVX512F-NEXT: kshiftlw $13, %k0, %k1
405; AVX512F-NEXT: kshiftrw $15, %k1, %k1
406; AVX512F-NEXT: kmovw %k1, %eax
407; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
408; AVX512F-NEXT: kshiftlw $12, %k0, %k1
409; AVX512F-NEXT: kshiftrw $15, %k1, %k1
410; AVX512F-NEXT: kmovw %k1, %eax
411; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
412; AVX512F-NEXT: kshiftlw $11, %k0, %k1
413; AVX512F-NEXT: kshiftrw $15, %k1, %k1
414; AVX512F-NEXT: kmovw %k1, %eax
415; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
416; AVX512F-NEXT: kshiftlw $10, %k0, %k1
417; AVX512F-NEXT: kshiftrw $15, %k1, %k1
418; AVX512F-NEXT: kmovw %k1, %eax
419; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
420; AVX512F-NEXT: kshiftlw $9, %k0, %k1
421; AVX512F-NEXT: kshiftrw $15, %k1, %k1
422; AVX512F-NEXT: kmovw %k1, %eax
423; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
424; AVX512F-NEXT: kshiftlw $8, %k0, %k1
425; AVX512F-NEXT: kshiftrw $15, %k1, %k1
426; AVX512F-NEXT: kmovw %k1, %eax
427; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
428; AVX512F-NEXT: kshiftlw $7, %k0, %k1
429; AVX512F-NEXT: kshiftrw $15, %k1, %k1
430; AVX512F-NEXT: kmovw %k1, %eax
431; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
432; AVX512F-NEXT: kshiftlw $6, %k0, %k1
433; AVX512F-NEXT: kshiftrw $15, %k1, %k1
434; AVX512F-NEXT: kmovw %k1, %eax
435; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
436; AVX512F-NEXT: kshiftlw $5, %k0, %k1
437; AVX512F-NEXT: kshiftrw $15, %k1, %k1
438; AVX512F-NEXT: kmovw %k1, %eax
439; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
440; AVX512F-NEXT: kshiftlw $4, %k0, %k1
441; AVX512F-NEXT: kshiftrw $15, %k1, %k1
442; AVX512F-NEXT: kmovw %k1, %eax
443; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
444; AVX512F-NEXT: kshiftlw $3, %k0, %k1
445; AVX512F-NEXT: kshiftrw $15, %k1, %k1
446; AVX512F-NEXT: kmovw %k1, %eax
447; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
448; AVX512F-NEXT: kshiftlw $2, %k0, %k1
449; AVX512F-NEXT: kshiftrw $15, %k1, %k1
450; AVX512F-NEXT: kmovw %k1, %eax
451; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
452; AVX512F-NEXT: kshiftlw $1, %k0, %k1
453; AVX512F-NEXT: kshiftrw $15, %k1, %k1
454; AVX512F-NEXT: kmovw %k1, %eax
455; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
456; AVX512F-NEXT: kshiftrw $15, %k0, %k0
457; AVX512F-NEXT: kmovw %k0, %eax
458; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
459; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
460; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
461; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
462; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
463; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
464; AVX512F-NEXT: kshiftlw $14, %k0, %k1
465; AVX512F-NEXT: kshiftrw $15, %k1, %k1
466; AVX512F-NEXT: kmovw %k1, %eax
467; AVX512F-NEXT: kshiftlw $15, %k0, %k1
468; AVX512F-NEXT: kshiftrw $15, %k1, %k1
469; AVX512F-NEXT: kmovw %k1, %ecx
470; AVX512F-NEXT: vmovd %ecx, %xmm1
471; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
472; AVX512F-NEXT: kshiftlw $13, %k0, %k1
473; AVX512F-NEXT: kshiftrw $15, %k1, %k1
474; AVX512F-NEXT: kmovw %k1, %eax
475; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
476; AVX512F-NEXT: kshiftlw $12, %k0, %k1
477; AVX512F-NEXT: kshiftrw $15, %k1, %k1
478; AVX512F-NEXT: kmovw %k1, %eax
479; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
480; AVX512F-NEXT: kshiftlw $11, %k0, %k1
481; AVX512F-NEXT: kshiftrw $15, %k1, %k1
482; AVX512F-NEXT: kmovw %k1, %eax
483; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
484; AVX512F-NEXT: kshiftlw $10, %k0, %k1
485; AVX512F-NEXT: kshiftrw $15, %k1, %k1
486; AVX512F-NEXT: kmovw %k1, %eax
487; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
488; AVX512F-NEXT: kshiftlw $9, %k0, %k1
489; AVX512F-NEXT: kshiftrw $15, %k1, %k1
490; AVX512F-NEXT: kmovw %k1, %eax
491; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
492; AVX512F-NEXT: kshiftlw $8, %k0, %k1
493; AVX512F-NEXT: kshiftrw $15, %k1, %k1
494; AVX512F-NEXT: kmovw %k1, %eax
495; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
496; AVX512F-NEXT: kshiftlw $7, %k0, %k1
497; AVX512F-NEXT: kshiftrw $15, %k1, %k1
498; AVX512F-NEXT: kmovw %k1, %eax
499; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
500; AVX512F-NEXT: kshiftlw $6, %k0, %k1
501; AVX512F-NEXT: kshiftrw $15, %k1, %k1
502; AVX512F-NEXT: kmovw %k1, %eax
503; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
504; AVX512F-NEXT: kshiftlw $5, %k0, %k1
505; AVX512F-NEXT: kshiftrw $15, %k1, %k1
506; AVX512F-NEXT: kmovw %k1, %eax
507; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
508; AVX512F-NEXT: kshiftlw $4, %k0, %k1
509; AVX512F-NEXT: kshiftrw $15, %k1, %k1
510; AVX512F-NEXT: kmovw %k1, %eax
511; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
512; AVX512F-NEXT: kshiftlw $3, %k0, %k1
513; AVX512F-NEXT: kshiftrw $15, %k1, %k1
514; AVX512F-NEXT: kmovw %k1, %eax
515; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
516; AVX512F-NEXT: kshiftlw $2, %k0, %k1
517; AVX512F-NEXT: kshiftrw $15, %k1, %k1
518; AVX512F-NEXT: kmovw %k1, %eax
519; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
520; AVX512F-NEXT: kshiftlw $1, %k0, %k1
521; AVX512F-NEXT: kshiftrw $15, %k1, %k1
522; AVX512F-NEXT: kmovw %k1, %eax
523; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
524; AVX512F-NEXT: kshiftrw $15, %k0, %k0
525; AVX512F-NEXT: kmovw %k0, %eax
526; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
527; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
528; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
529; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
530; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
531; AVX512F-NEXT: kshiftlw $14, %k0, %k1
532; AVX512F-NEXT: kshiftrw $15, %k1, %k1
533; AVX512F-NEXT: kmovw %k1, %eax
534; AVX512F-NEXT: kshiftlw $15, %k0, %k1
535; AVX512F-NEXT: kshiftrw $15, %k1, %k1
536; AVX512F-NEXT: kmovw %k1, %ecx
537; AVX512F-NEXT: vmovd %ecx, %xmm2
538; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
539; AVX512F-NEXT: kshiftlw $13, %k0, %k1
540; AVX512F-NEXT: kshiftrw $15, %k1, %k1
541; AVX512F-NEXT: kmovw %k1, %eax
542; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
543; AVX512F-NEXT: kshiftlw $12, %k0, %k1
544; AVX512F-NEXT: kshiftrw $15, %k1, %k1
545; AVX512F-NEXT: kmovw %k1, %eax
546; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
547; AVX512F-NEXT: kshiftlw $11, %k0, %k1
548; AVX512F-NEXT: kshiftrw $15, %k1, %k1
549; AVX512F-NEXT: kmovw %k1, %eax
550; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
551; AVX512F-NEXT: kshiftlw $10, %k0, %k1
552; AVX512F-NEXT: kshiftrw $15, %k1, %k1
553; AVX512F-NEXT: kmovw %k1, %eax
554; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
555; AVX512F-NEXT: kshiftlw $9, %k0, %k1
556; AVX512F-NEXT: kshiftrw $15, %k1, %k1
557; AVX512F-NEXT: kmovw %k1, %eax
558; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
559; AVX512F-NEXT: kshiftlw $8, %k0, %k1
560; AVX512F-NEXT: kshiftrw $15, %k1, %k1
561; AVX512F-NEXT: kmovw %k1, %eax
562; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
563; AVX512F-NEXT: kshiftlw $7, %k0, %k1
564; AVX512F-NEXT: kshiftrw $15, %k1, %k1
565; AVX512F-NEXT: kmovw %k1, %eax
566; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
567; AVX512F-NEXT: kshiftlw $6, %k0, %k1
568; AVX512F-NEXT: kshiftrw $15, %k1, %k1
569; AVX512F-NEXT: kmovw %k1, %eax
570; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
571; AVX512F-NEXT: kshiftlw $5, %k0, %k1
572; AVX512F-NEXT: kshiftrw $15, %k1, %k1
573; AVX512F-NEXT: kmovw %k1, %eax
574; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
575; AVX512F-NEXT: kshiftlw $4, %k0, %k1
576; AVX512F-NEXT: kshiftrw $15, %k1, %k1
577; AVX512F-NEXT: kmovw %k1, %eax
578; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
579; AVX512F-NEXT: kshiftlw $3, %k0, %k1
580; AVX512F-NEXT: kshiftrw $15, %k1, %k1
581; AVX512F-NEXT: kmovw %k1, %eax
582; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
583; AVX512F-NEXT: kshiftlw $2, %k0, %k1
584; AVX512F-NEXT: kshiftrw $15, %k1, %k1
585; AVX512F-NEXT: kmovw %k1, %eax
586; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
587; AVX512F-NEXT: kshiftlw $1, %k0, %k1
588; AVX512F-NEXT: kshiftrw $15, %k1, %k1
589; AVX512F-NEXT: kmovw %k1, %eax
590; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
591; AVX512F-NEXT: kshiftrw $15, %k0, %k0
592; AVX512F-NEXT: kmovw %k0, %eax
593; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
594; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
595; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
596; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
597; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
598; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
599; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
600; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
601; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
602; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
603; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
604; AVX512F-NEXT: kmovw %k0, (%rsp)
605; AVX512F-NEXT: movl (%rsp), %eax
606; AVX512F-NEXT: movq %rbp, %rsp
607; AVX512F-NEXT: popq %rbp
608; AVX512F-NEXT: vzeroupper
609; AVX512F-NEXT: retq
610;
611; AVX512BW-LABEL: v32i16:
612; AVX512BW: # BB#0:
613; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
614; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
615; AVX512BW-NEXT: kmovd %k0, %eax
616; AVX512BW-NEXT: vzeroupper
617; AVX512BW-NEXT: retq
618 %x0 = icmp sgt <32 x i16> %a, %b
619 %x1 = icmp sgt <32 x i16> %c, %d
620 %y = and <32 x i1> %x0, %x1
621 %res = bitcast <32 x i1> %y to i32
622 ret i32 %res
623}
624
625define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
626; SSE-LABEL: v16i32:
627; SSE: # BB#0:
628; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
629; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
630; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
631; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
632; SSE-NEXT: pcmpgtd %xmm7, %xmm3
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000633; SSE-NEXT: packssdw %xmm0, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000634; SSE-NEXT: pcmpgtd %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000635; SSE-NEXT: packssdw %xmm0, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000636; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000637; SSE-NEXT: pcmpgtd %xmm5, %xmm1
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000638; SSE-NEXT: packssdw %xmm0, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000639; SSE-NEXT: pcmpgtd %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000640; SSE-NEXT: packssdw %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000641; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000642; SSE-NEXT: packsswb %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000643; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000644; SSE-NEXT: packssdw %xmm0, %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000645; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000646; SSE-NEXT: packssdw %xmm0, %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000647; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000648; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000649; SSE-NEXT: packssdw %xmm0, %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000650; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000651; SSE-NEXT: packssdw %xmm0, %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000652; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000653; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000654; SSE-NEXT: pand %xmm0, %xmm8
655; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000656; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
657; SSE-NEXT: retq
658;
659; AVX1-LABEL: v16i32:
660; AVX1: # BB#0:
661; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
662; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
663; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
664; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
665; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
666; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
667; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
668; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
669; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
670; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
671; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
672; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
673; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
674; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000675; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
676; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
677; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
678; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
679; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000680; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000681; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
682; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
683; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
684; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
685; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
686; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
687; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000688; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
689; AVX1-NEXT: vpmovmskb %xmm0, %eax
690; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
691; AVX1-NEXT: vzeroupper
692; AVX1-NEXT: retq
693;
694; AVX2-LABEL: v16i32:
695; AVX2: # BB#0:
696; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
697; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
698; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
699; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
700; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
701; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
702; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
703; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
704; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
705; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000706; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
707; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
708; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
709; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
710; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
711; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
712; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
713; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
714; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000715; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
716; AVX2-NEXT: vpmovmskb %xmm0, %eax
717; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
718; AVX2-NEXT: vzeroupper
719; AVX2-NEXT: retq
720;
721; AVX512F-LABEL: v16i32:
722; AVX512F: # BB#0:
723; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
724; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
725; AVX512F-NEXT: kmovw %k0, %eax
726; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
727; AVX512F-NEXT: vzeroupper
728; AVX512F-NEXT: retq
729;
730; AVX512BW-LABEL: v16i32:
731; AVX512BW: # BB#0:
732; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
733; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
734; AVX512BW-NEXT: kmovd %k0, %eax
735; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
736; AVX512BW-NEXT: vzeroupper
737; AVX512BW-NEXT: retq
738 %x0 = icmp sgt <16 x i32> %a, %b
739 %x1 = icmp sgt <16 x i32> %c, %d
740 %y = and <16 x i1> %x0, %x1
741 %res = bitcast <16 x i1> %y to i16
742 ret i16 %res
743}
744
745define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
746; SSE-LABEL: v16f32:
747; SSE: # BB#0:
748; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
749; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
750; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
751; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
752; SSE-NEXT: cmpltps %xmm3, %xmm7
753; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
754; SSE-NEXT: pshufb %xmm3, %xmm7
755; SSE-NEXT: cmpltps %xmm2, %xmm6
756; SSE-NEXT: pshufb %xmm3, %xmm6
757; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000758; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
759; SSE-NEXT: pshufb %xmm2, %xmm6
760; SSE-NEXT: cmpltps %xmm1, %xmm5
761; SSE-NEXT: pshufb %xmm3, %xmm5
762; SSE-NEXT: cmpltps %xmm0, %xmm4
763; SSE-NEXT: pshufb %xmm3, %xmm4
764; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000765; SSE-NEXT: pshufb %xmm2, %xmm4
766; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000767; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
768; SSE-NEXT: pshufb %xmm3, %xmm11
769; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
770; SSE-NEXT: pshufb %xmm3, %xmm9
771; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000772; SSE-NEXT: pshufb %xmm2, %xmm9
773; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
774; SSE-NEXT: pshufb %xmm3, %xmm10
775; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
776; SSE-NEXT: pshufb %xmm3, %xmm8
777; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000778; SSE-NEXT: pshufb %xmm2, %xmm8
779; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000780; SSE-NEXT: pand %xmm4, %xmm8
781; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000782; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
783; SSE-NEXT: retq
784;
785; AVX12-LABEL: v16f32:
786; AVX12: # BB#0:
787; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
788; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
789; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
790; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
791; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
792; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
793; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
794; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
795; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
796; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000797; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
798; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
799; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
800; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
801; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
802; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
803; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
804; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
805; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000806; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
807; AVX12-NEXT: vpmovmskb %xmm0, %eax
808; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
809; AVX12-NEXT: vzeroupper
810; AVX12-NEXT: retq
811;
812; AVX512F-LABEL: v16f32:
813; AVX512F: # BB#0:
814; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
815; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
816; AVX512F-NEXT: kmovw %k0, %eax
817; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
818; AVX512F-NEXT: vzeroupper
819; AVX512F-NEXT: retq
820;
821; AVX512BW-LABEL: v16f32:
822; AVX512BW: # BB#0:
823; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
824; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
825; AVX512BW-NEXT: kmovd %k0, %eax
826; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
827; AVX512BW-NEXT: vzeroupper
828; AVX512BW-NEXT: retq
829 %x0 = fcmp ogt <16 x float> %a, %b
830 %x1 = fcmp ogt <16 x float> %c, %d
831 %y = and <16 x i1> %x0, %x1
832 %res = bitcast <16 x i1> %y to i16
833 ret i16 %res
834}
835
836define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
837; SSE-LABEL: v64i8:
838; SSE: # BB#0:
839; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
840; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
841; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
842; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
843; SSE-NEXT: pcmpgtb %xmm6, %xmm2
844; SSE-NEXT: pcmpgtb %xmm7, %xmm3
845; SSE-NEXT: pcmpgtb %xmm4, %xmm0
846; SSE-NEXT: pcmpgtb %xmm5, %xmm1
847; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
848; SSE-NEXT: pand %xmm2, %xmm8
849; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
850; SSE-NEXT: pand %xmm3, %xmm9
851; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
852; SSE-NEXT: pand %xmm0, %xmm10
853; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
854; SSE-NEXT: pand %xmm1, %xmm11
855; SSE-NEXT: pextrb $15, %xmm11, %eax
856; SSE-NEXT: andb $1, %al
857; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
858; SSE-NEXT: pextrb $14, %xmm11, %eax
859; SSE-NEXT: andb $1, %al
860; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
861; SSE-NEXT: pextrb $13, %xmm11, %eax
862; SSE-NEXT: andb $1, %al
863; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
864; SSE-NEXT: pextrb $12, %xmm11, %eax
865; SSE-NEXT: andb $1, %al
866; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
867; SSE-NEXT: pextrb $11, %xmm11, %eax
868; SSE-NEXT: andb $1, %al
869; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
870; SSE-NEXT: pextrb $10, %xmm11, %eax
871; SSE-NEXT: andb $1, %al
872; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
873; SSE-NEXT: pextrb $9, %xmm11, %eax
874; SSE-NEXT: andb $1, %al
875; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
876; SSE-NEXT: pextrb $8, %xmm11, %eax
877; SSE-NEXT: andb $1, %al
878; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
879; SSE-NEXT: pextrb $7, %xmm11, %eax
880; SSE-NEXT: andb $1, %al
881; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
882; SSE-NEXT: pextrb $6, %xmm11, %eax
883; SSE-NEXT: andb $1, %al
884; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
885; SSE-NEXT: pextrb $5, %xmm11, %eax
886; SSE-NEXT: andb $1, %al
887; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
888; SSE-NEXT: pextrb $4, %xmm11, %eax
889; SSE-NEXT: andb $1, %al
890; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
891; SSE-NEXT: pextrb $3, %xmm11, %eax
892; SSE-NEXT: andb $1, %al
893; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
894; SSE-NEXT: pextrb $2, %xmm11, %eax
895; SSE-NEXT: andb $1, %al
896; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
897; SSE-NEXT: pextrb $1, %xmm11, %eax
898; SSE-NEXT: andb $1, %al
899; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
900; SSE-NEXT: pextrb $0, %xmm11, %eax
901; SSE-NEXT: andb $1, %al
902; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
903; SSE-NEXT: pextrb $15, %xmm10, %eax
904; SSE-NEXT: andb $1, %al
905; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
906; SSE-NEXT: pextrb $14, %xmm10, %eax
907; SSE-NEXT: andb $1, %al
908; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
909; SSE-NEXT: pextrb $13, %xmm10, %eax
910; SSE-NEXT: andb $1, %al
911; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
912; SSE-NEXT: pextrb $12, %xmm10, %eax
913; SSE-NEXT: andb $1, %al
914; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
915; SSE-NEXT: pextrb $11, %xmm10, %eax
916; SSE-NEXT: andb $1, %al
917; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
918; SSE-NEXT: pextrb $10, %xmm10, %eax
919; SSE-NEXT: andb $1, %al
920; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
921; SSE-NEXT: pextrb $9, %xmm10, %eax
922; SSE-NEXT: andb $1, %al
923; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
924; SSE-NEXT: pextrb $8, %xmm10, %eax
925; SSE-NEXT: andb $1, %al
926; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
927; SSE-NEXT: pextrb $7, %xmm10, %eax
928; SSE-NEXT: andb $1, %al
929; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
930; SSE-NEXT: pextrb $6, %xmm10, %eax
931; SSE-NEXT: andb $1, %al
932; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
933; SSE-NEXT: pextrb $5, %xmm10, %eax
934; SSE-NEXT: andb $1, %al
935; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
936; SSE-NEXT: pextrb $4, %xmm10, %eax
937; SSE-NEXT: andb $1, %al
938; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
939; SSE-NEXT: pextrb $3, %xmm10, %eax
940; SSE-NEXT: andb $1, %al
941; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
942; SSE-NEXT: pextrb $2, %xmm10, %eax
943; SSE-NEXT: andb $1, %al
944; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
945; SSE-NEXT: pextrb $1, %xmm10, %eax
946; SSE-NEXT: andb $1, %al
947; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
948; SSE-NEXT: pextrb $0, %xmm10, %eax
949; SSE-NEXT: andb $1, %al
950; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
951; SSE-NEXT: pextrb $15, %xmm9, %eax
952; SSE-NEXT: andb $1, %al
953; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
954; SSE-NEXT: pextrb $14, %xmm9, %eax
955; SSE-NEXT: andb $1, %al
956; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
957; SSE-NEXT: pextrb $13, %xmm9, %eax
958; SSE-NEXT: andb $1, %al
959; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
960; SSE-NEXT: pextrb $12, %xmm9, %eax
961; SSE-NEXT: andb $1, %al
962; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
963; SSE-NEXT: pextrb $11, %xmm9, %eax
964; SSE-NEXT: andb $1, %al
965; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
966; SSE-NEXT: pextrb $10, %xmm9, %eax
967; SSE-NEXT: andb $1, %al
968; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
969; SSE-NEXT: pextrb $9, %xmm9, %eax
970; SSE-NEXT: andb $1, %al
971; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
972; SSE-NEXT: pextrb $8, %xmm9, %eax
973; SSE-NEXT: andb $1, %al
974; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
975; SSE-NEXT: pextrb $7, %xmm9, %eax
976; SSE-NEXT: andb $1, %al
977; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
978; SSE-NEXT: pextrb $6, %xmm9, %eax
979; SSE-NEXT: andb $1, %al
980; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
981; SSE-NEXT: pextrb $5, %xmm9, %eax
982; SSE-NEXT: andb $1, %al
983; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
984; SSE-NEXT: pextrb $4, %xmm9, %eax
985; SSE-NEXT: andb $1, %al
986; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
987; SSE-NEXT: pextrb $3, %xmm9, %eax
988; SSE-NEXT: andb $1, %al
989; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
990; SSE-NEXT: pextrb $2, %xmm9, %eax
991; SSE-NEXT: andb $1, %al
992; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
993; SSE-NEXT: pextrb $1, %xmm9, %eax
994; SSE-NEXT: andb $1, %al
995; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
996; SSE-NEXT: pextrb $0, %xmm9, %eax
997; SSE-NEXT: andb $1, %al
998; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
999; SSE-NEXT: pextrb $15, %xmm8, %eax
1000; SSE-NEXT: andb $1, %al
1001; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1002; SSE-NEXT: pextrb $14, %xmm8, %eax
1003; SSE-NEXT: andb $1, %al
1004; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1005; SSE-NEXT: pextrb $13, %xmm8, %eax
1006; SSE-NEXT: andb $1, %al
1007; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1008; SSE-NEXT: pextrb $12, %xmm8, %eax
1009; SSE-NEXT: andb $1, %al
1010; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1011; SSE-NEXT: pextrb $11, %xmm8, %eax
1012; SSE-NEXT: andb $1, %al
1013; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1014; SSE-NEXT: pextrb $10, %xmm8, %eax
1015; SSE-NEXT: andb $1, %al
1016; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1017; SSE-NEXT: pextrb $9, %xmm8, %eax
1018; SSE-NEXT: andb $1, %al
1019; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1020; SSE-NEXT: pextrb $8, %xmm8, %eax
1021; SSE-NEXT: andb $1, %al
1022; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1023; SSE-NEXT: pextrb $7, %xmm8, %eax
1024; SSE-NEXT: andb $1, %al
1025; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1026; SSE-NEXT: pextrb $6, %xmm8, %eax
1027; SSE-NEXT: andb $1, %al
1028; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1029; SSE-NEXT: pextrb $5, %xmm8, %eax
1030; SSE-NEXT: andb $1, %al
1031; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1032; SSE-NEXT: pextrb $4, %xmm8, %eax
1033; SSE-NEXT: andb $1, %al
1034; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1035; SSE-NEXT: pextrb $3, %xmm8, %eax
1036; SSE-NEXT: andb $1, %al
1037; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1038; SSE-NEXT: pextrb $2, %xmm8, %eax
1039; SSE-NEXT: andb $1, %al
1040; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1041; SSE-NEXT: pextrb $1, %xmm8, %eax
1042; SSE-NEXT: andb $1, %al
1043; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1044; SSE-NEXT: pextrb $0, %xmm8, %eax
1045; SSE-NEXT: andb $1, %al
1046; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1047; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1048; SSE-NEXT: shll $16, %eax
1049; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1050; SSE-NEXT: orl %eax, %ecx
1051; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1052; SSE-NEXT: shll $16, %edx
1053; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1054; SSE-NEXT: orl %edx, %eax
1055; SSE-NEXT: shlq $32, %rax
1056; SSE-NEXT: orq %rcx, %rax
1057; SSE-NEXT: retq
1058;
1059; AVX1-LABEL: v64i8:
1060; AVX1: # BB#0:
1061; AVX1-NEXT: pushq %rbp
1062; AVX1-NEXT: .Lcfi0:
1063; AVX1-NEXT: .cfi_def_cfa_offset 16
1064; AVX1-NEXT: .Lcfi1:
1065; AVX1-NEXT: .cfi_offset %rbp, -16
1066; AVX1-NEXT: movq %rsp, %rbp
1067; AVX1-NEXT: .Lcfi2:
1068; AVX1-NEXT: .cfi_def_cfa_register %rbp
1069; AVX1-NEXT: andq $-32, %rsp
1070; AVX1-NEXT: subq $64, %rsp
1071; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1072; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1073; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1074; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1075; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1076; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1077; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1078; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1079; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1080; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1081; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1082; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1083; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1084; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1085; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1086; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1087; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1088; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1089; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1090; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1091; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1092; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1093; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1094; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1095; AVX1-NEXT: andb $1, %al
1096; AVX1-NEXT: movb %al, (%rsp)
1097; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1098; AVX1-NEXT: andb $1, %al
1099; AVX1-NEXT: movb %al, (%rsp)
1100; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1101; AVX1-NEXT: andb $1, %al
1102; AVX1-NEXT: movb %al, (%rsp)
1103; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1104; AVX1-NEXT: andb $1, %al
1105; AVX1-NEXT: movb %al, (%rsp)
1106; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1107; AVX1-NEXT: andb $1, %al
1108; AVX1-NEXT: movb %al, (%rsp)
1109; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1110; AVX1-NEXT: andb $1, %al
1111; AVX1-NEXT: movb %al, (%rsp)
1112; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1113; AVX1-NEXT: andb $1, %al
1114; AVX1-NEXT: movb %al, (%rsp)
1115; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1116; AVX1-NEXT: andb $1, %al
1117; AVX1-NEXT: movb %al, (%rsp)
1118; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1119; AVX1-NEXT: andb $1, %al
1120; AVX1-NEXT: movb %al, (%rsp)
1121; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1122; AVX1-NEXT: andb $1, %al
1123; AVX1-NEXT: movb %al, (%rsp)
1124; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1125; AVX1-NEXT: andb $1, %al
1126; AVX1-NEXT: movb %al, (%rsp)
1127; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1128; AVX1-NEXT: andb $1, %al
1129; AVX1-NEXT: movb %al, (%rsp)
1130; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1131; AVX1-NEXT: andb $1, %al
1132; AVX1-NEXT: movb %al, (%rsp)
1133; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1134; AVX1-NEXT: andb $1, %al
1135; AVX1-NEXT: movb %al, (%rsp)
1136; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1137; AVX1-NEXT: andb $1, %al
1138; AVX1-NEXT: movb %al, (%rsp)
1139; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1140; AVX1-NEXT: andb $1, %al
1141; AVX1-NEXT: movb %al, (%rsp)
1142; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1143; AVX1-NEXT: andb $1, %al
1144; AVX1-NEXT: movb %al, (%rsp)
1145; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1146; AVX1-NEXT: andb $1, %al
1147; AVX1-NEXT: movb %al, (%rsp)
1148; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1149; AVX1-NEXT: andb $1, %al
1150; AVX1-NEXT: movb %al, (%rsp)
1151; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1152; AVX1-NEXT: andb $1, %al
1153; AVX1-NEXT: movb %al, (%rsp)
1154; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1155; AVX1-NEXT: andb $1, %al
1156; AVX1-NEXT: movb %al, (%rsp)
1157; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1158; AVX1-NEXT: andb $1, %al
1159; AVX1-NEXT: movb %al, (%rsp)
1160; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1161; AVX1-NEXT: andb $1, %al
1162; AVX1-NEXT: movb %al, (%rsp)
1163; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1164; AVX1-NEXT: andb $1, %al
1165; AVX1-NEXT: movb %al, (%rsp)
1166; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1167; AVX1-NEXT: andb $1, %al
1168; AVX1-NEXT: movb %al, (%rsp)
1169; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1170; AVX1-NEXT: andb $1, %al
1171; AVX1-NEXT: movb %al, (%rsp)
1172; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1173; AVX1-NEXT: andb $1, %al
1174; AVX1-NEXT: movb %al, (%rsp)
1175; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1176; AVX1-NEXT: andb $1, %al
1177; AVX1-NEXT: movb %al, (%rsp)
1178; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1179; AVX1-NEXT: andb $1, %al
1180; AVX1-NEXT: movb %al, (%rsp)
1181; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1182; AVX1-NEXT: andb $1, %al
1183; AVX1-NEXT: movb %al, (%rsp)
1184; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1185; AVX1-NEXT: andb $1, %al
1186; AVX1-NEXT: movb %al, (%rsp)
1187; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1188; AVX1-NEXT: andb $1, %al
1189; AVX1-NEXT: movb %al, (%rsp)
1190; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1191; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1192; AVX1-NEXT: andb $1, %al
1193; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1194; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1195; AVX1-NEXT: andb $1, %al
1196; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1197; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1198; AVX1-NEXT: andb $1, %al
1199; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1200; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1201; AVX1-NEXT: andb $1, %al
1202; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1203; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1204; AVX1-NEXT: andb $1, %al
1205; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1206; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1207; AVX1-NEXT: andb $1, %al
1208; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1209; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1210; AVX1-NEXT: andb $1, %al
1211; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1212; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1213; AVX1-NEXT: andb $1, %al
1214; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1215; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1216; AVX1-NEXT: andb $1, %al
1217; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1218; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1219; AVX1-NEXT: andb $1, %al
1220; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1221; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1222; AVX1-NEXT: andb $1, %al
1223; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1224; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1225; AVX1-NEXT: andb $1, %al
1226; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1227; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1228; AVX1-NEXT: andb $1, %al
1229; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1230; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1231; AVX1-NEXT: andb $1, %al
1232; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1233; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1234; AVX1-NEXT: andb $1, %al
1235; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1236; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1237; AVX1-NEXT: andb $1, %al
1238; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1239; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1240; AVX1-NEXT: andb $1, %al
1241; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1242; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1243; AVX1-NEXT: andb $1, %al
1244; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1245; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1246; AVX1-NEXT: andb $1, %al
1247; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1248; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1249; AVX1-NEXT: andb $1, %al
1250; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1251; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1252; AVX1-NEXT: andb $1, %al
1253; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1254; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1255; AVX1-NEXT: andb $1, %al
1256; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1257; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1258; AVX1-NEXT: andb $1, %al
1259; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1260; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1261; AVX1-NEXT: andb $1, %al
1262; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1263; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1264; AVX1-NEXT: andb $1, %al
1265; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1266; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1267; AVX1-NEXT: andb $1, %al
1268; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1269; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1270; AVX1-NEXT: andb $1, %al
1271; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1272; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1273; AVX1-NEXT: andb $1, %al
1274; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1275; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1276; AVX1-NEXT: andb $1, %al
1277; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1278; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1279; AVX1-NEXT: andb $1, %al
1280; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1281; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1282; AVX1-NEXT: andb $1, %al
1283; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1284; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1285; AVX1-NEXT: andb $1, %al
1286; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1287; AVX1-NEXT: movl (%rsp), %ecx
1288; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1289; AVX1-NEXT: shlq $32, %rax
1290; AVX1-NEXT: orq %rcx, %rax
1291; AVX1-NEXT: movq %rbp, %rsp
1292; AVX1-NEXT: popq %rbp
1293; AVX1-NEXT: vzeroupper
1294; AVX1-NEXT: retq
1295;
1296; AVX2-LABEL: v64i8:
1297; AVX2: # BB#0:
1298; AVX2-NEXT: pushq %rbp
1299; AVX2-NEXT: .Lcfi0:
1300; AVX2-NEXT: .cfi_def_cfa_offset 16
1301; AVX2-NEXT: .Lcfi1:
1302; AVX2-NEXT: .cfi_offset %rbp, -16
1303; AVX2-NEXT: movq %rsp, %rbp
1304; AVX2-NEXT: .Lcfi2:
1305; AVX2-NEXT: .cfi_def_cfa_register %rbp
1306; AVX2-NEXT: andq $-32, %rsp
1307; AVX2-NEXT: subq $64, %rsp
1308; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1309; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1310; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1311; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1312; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1313; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1314; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1315; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1316; AVX2-NEXT: andb $1, %al
1317; AVX2-NEXT: movb %al, (%rsp)
1318; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1319; AVX2-NEXT: andb $1, %al
1320; AVX2-NEXT: movb %al, (%rsp)
1321; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1322; AVX2-NEXT: andb $1, %al
1323; AVX2-NEXT: movb %al, (%rsp)
1324; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1325; AVX2-NEXT: andb $1, %al
1326; AVX2-NEXT: movb %al, (%rsp)
1327; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1328; AVX2-NEXT: andb $1, %al
1329; AVX2-NEXT: movb %al, (%rsp)
1330; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1331; AVX2-NEXT: andb $1, %al
1332; AVX2-NEXT: movb %al, (%rsp)
1333; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1334; AVX2-NEXT: andb $1, %al
1335; AVX2-NEXT: movb %al, (%rsp)
1336; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1337; AVX2-NEXT: andb $1, %al
1338; AVX2-NEXT: movb %al, (%rsp)
1339; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1340; AVX2-NEXT: andb $1, %al
1341; AVX2-NEXT: movb %al, (%rsp)
1342; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1343; AVX2-NEXT: andb $1, %al
1344; AVX2-NEXT: movb %al, (%rsp)
1345; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1346; AVX2-NEXT: andb $1, %al
1347; AVX2-NEXT: movb %al, (%rsp)
1348; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1349; AVX2-NEXT: andb $1, %al
1350; AVX2-NEXT: movb %al, (%rsp)
1351; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1352; AVX2-NEXT: andb $1, %al
1353; AVX2-NEXT: movb %al, (%rsp)
1354; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1355; AVX2-NEXT: andb $1, %al
1356; AVX2-NEXT: movb %al, (%rsp)
1357; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1358; AVX2-NEXT: andb $1, %al
1359; AVX2-NEXT: movb %al, (%rsp)
1360; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1361; AVX2-NEXT: andb $1, %al
1362; AVX2-NEXT: movb %al, (%rsp)
1363; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1364; AVX2-NEXT: andb $1, %al
1365; AVX2-NEXT: movb %al, (%rsp)
1366; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1367; AVX2-NEXT: andb $1, %al
1368; AVX2-NEXT: movb %al, (%rsp)
1369; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1370; AVX2-NEXT: andb $1, %al
1371; AVX2-NEXT: movb %al, (%rsp)
1372; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1373; AVX2-NEXT: andb $1, %al
1374; AVX2-NEXT: movb %al, (%rsp)
1375; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1376; AVX2-NEXT: andb $1, %al
1377; AVX2-NEXT: movb %al, (%rsp)
1378; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1379; AVX2-NEXT: andb $1, %al
1380; AVX2-NEXT: movb %al, (%rsp)
1381; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1382; AVX2-NEXT: andb $1, %al
1383; AVX2-NEXT: movb %al, (%rsp)
1384; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1385; AVX2-NEXT: andb $1, %al
1386; AVX2-NEXT: movb %al, (%rsp)
1387; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1388; AVX2-NEXT: andb $1, %al
1389; AVX2-NEXT: movb %al, (%rsp)
1390; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1391; AVX2-NEXT: andb $1, %al
1392; AVX2-NEXT: movb %al, (%rsp)
1393; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1394; AVX2-NEXT: andb $1, %al
1395; AVX2-NEXT: movb %al, (%rsp)
1396; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1397; AVX2-NEXT: andb $1, %al
1398; AVX2-NEXT: movb %al, (%rsp)
1399; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1400; AVX2-NEXT: andb $1, %al
1401; AVX2-NEXT: movb %al, (%rsp)
1402; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1403; AVX2-NEXT: andb $1, %al
1404; AVX2-NEXT: movb %al, (%rsp)
1405; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1406; AVX2-NEXT: andb $1, %al
1407; AVX2-NEXT: movb %al, (%rsp)
1408; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1409; AVX2-NEXT: andb $1, %al
1410; AVX2-NEXT: movb %al, (%rsp)
1411; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1412; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1413; AVX2-NEXT: andb $1, %al
1414; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1415; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1416; AVX2-NEXT: andb $1, %al
1417; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1418; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1419; AVX2-NEXT: andb $1, %al
1420; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1421; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1422; AVX2-NEXT: andb $1, %al
1423; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1424; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1425; AVX2-NEXT: andb $1, %al
1426; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1427; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1428; AVX2-NEXT: andb $1, %al
1429; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1430; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1431; AVX2-NEXT: andb $1, %al
1432; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1433; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1434; AVX2-NEXT: andb $1, %al
1435; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1436; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1437; AVX2-NEXT: andb $1, %al
1438; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1439; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1440; AVX2-NEXT: andb $1, %al
1441; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1442; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1443; AVX2-NEXT: andb $1, %al
1444; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1445; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1446; AVX2-NEXT: andb $1, %al
1447; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1448; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1449; AVX2-NEXT: andb $1, %al
1450; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1451; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1452; AVX2-NEXT: andb $1, %al
1453; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1454; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1455; AVX2-NEXT: andb $1, %al
1456; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1457; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1458; AVX2-NEXT: andb $1, %al
1459; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1460; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1461; AVX2-NEXT: andb $1, %al
1462; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1463; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1464; AVX2-NEXT: andb $1, %al
1465; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1466; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1467; AVX2-NEXT: andb $1, %al
1468; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1469; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1470; AVX2-NEXT: andb $1, %al
1471; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1472; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1473; AVX2-NEXT: andb $1, %al
1474; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1475; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1476; AVX2-NEXT: andb $1, %al
1477; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1478; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1479; AVX2-NEXT: andb $1, %al
1480; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1481; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1482; AVX2-NEXT: andb $1, %al
1483; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1484; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1485; AVX2-NEXT: andb $1, %al
1486; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1487; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1488; AVX2-NEXT: andb $1, %al
1489; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1490; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1491; AVX2-NEXT: andb $1, %al
1492; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1493; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1494; AVX2-NEXT: andb $1, %al
1495; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1496; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1497; AVX2-NEXT: andb $1, %al
1498; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1499; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1500; AVX2-NEXT: andb $1, %al
1501; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1502; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1503; AVX2-NEXT: andb $1, %al
1504; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1505; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1506; AVX2-NEXT: andb $1, %al
1507; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1508; AVX2-NEXT: movl (%rsp), %ecx
1509; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1510; AVX2-NEXT: shlq $32, %rax
1511; AVX2-NEXT: orq %rcx, %rax
1512; AVX2-NEXT: movq %rbp, %rsp
1513; AVX2-NEXT: popq %rbp
1514; AVX2-NEXT: vzeroupper
1515; AVX2-NEXT: retq
1516;
1517; AVX512F-LABEL: v64i8:
1518; AVX512F: # BB#0:
1519; AVX512F-NEXT: pushq %rbp
1520; AVX512F-NEXT: .Lcfi3:
1521; AVX512F-NEXT: .cfi_def_cfa_offset 16
1522; AVX512F-NEXT: .Lcfi4:
1523; AVX512F-NEXT: .cfi_offset %rbp, -16
1524; AVX512F-NEXT: movq %rsp, %rbp
1525; AVX512F-NEXT: .Lcfi5:
1526; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1527; AVX512F-NEXT: andq $-32, %rsp
1528; AVX512F-NEXT: subq $64, %rsp
1529; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1530; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1531; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1532; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1533; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1534; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1535; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1536; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1537; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1538; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1539; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1540; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1541; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1542; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1543; AVX512F-NEXT: kmovw %k0, (%rsp)
1544; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1545; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1546; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1547; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1548; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1549; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1550; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1551; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1552; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1553; AVX512F-NEXT: movl (%rsp), %ecx
1554; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1555; AVX512F-NEXT: shlq $32, %rax
1556; AVX512F-NEXT: orq %rcx, %rax
1557; AVX512F-NEXT: movq %rbp, %rsp
1558; AVX512F-NEXT: popq %rbp
1559; AVX512F-NEXT: vzeroupper
1560; AVX512F-NEXT: retq
1561;
1562; AVX512BW-LABEL: v64i8:
1563; AVX512BW: # BB#0:
1564; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1565; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1566; AVX512BW-NEXT: kmovq %k0, %rax
1567; AVX512BW-NEXT: vzeroupper
1568; AVX512BW-NEXT: retq
1569 %x0 = icmp sgt <64 x i8> %a, %b
1570 %x1 = icmp sgt <64 x i8> %c, %d
1571 %y = and <64 x i1> %x0, %x1
1572 %res = bitcast <64 x i1> %y to i64
1573 ret i64 %res
1574}