blob: 9d2766bcac6427fc12f265d6acd844705d40b71a [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000013; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +000015; SSE-NEXT: pcmpgtq %xmm5, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000016; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
17; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000018; SSE-NEXT: pcmpgtq %xmm4, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000019; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
20; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
21; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000022; SSE-NEXT: pcmpgtq %xmm7, %xmm3
23; SSE-NEXT: pcmpgtq %xmm6, %xmm2
24; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
25; SSE-NEXT: packssdw %xmm2, %xmm2
26; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000027; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000028; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm11[0,2,2,3]
29; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000030; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
31; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm10[0,2,2,3]
32; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000033; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000034; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000035; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
36; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
37; SSE-NEXT: packssdw %xmm8, %xmm8
38; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm1[0,1,2,3],xmm8[4,5,6,7]
39; SSE-NEXT: pand %xmm2, %xmm8
40; SSE-NEXT: psllw $15, %xmm8
41; SSE-NEXT: psraw $15, %xmm8
42; SSE-NEXT: packsswb %xmm0, %xmm8
43; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000044; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
45; SSE-NEXT: retq
46;
47; AVX1-LABEL: v8i64:
48; AVX1: # BB#0:
49; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
50; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
51; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
52; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
53; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
54; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
55; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
56; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
57; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
58; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
59; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
60; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
61; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
62; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000063; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
64; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
65; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
66; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
67; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
68; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
69; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
70; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
71; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
72; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
73; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
74; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
75; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000076; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000077; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
78; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000079; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000080; AVX1-NEXT: vpmovmskb %xmm0, %eax
81; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
82; AVX1-NEXT: vzeroupper
83; AVX1-NEXT: retq
84;
85; AVX2-LABEL: v8i64:
86; AVX2: # BB#0:
87; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
88; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
89; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
90; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
91; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
92; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
93; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
94; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
95; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
96; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000097; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
98; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
99; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
100; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
101; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
102; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
103; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
104; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
105; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000106; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000107; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
108; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000109; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000110; AVX2-NEXT: vpmovmskb %xmm0, %eax
111; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
112; AVX2-NEXT: vzeroupper
113; AVX2-NEXT: retq
114;
115; AVX512F-LABEL: v8i64:
116; AVX512F: # BB#0:
117; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
118; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
119; AVX512F-NEXT: kmovw %k0, %eax
120; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
121; AVX512F-NEXT: vzeroupper
122; AVX512F-NEXT: retq
123;
124; AVX512BW-LABEL: v8i64:
125; AVX512BW: # BB#0:
126; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
127; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
128; AVX512BW-NEXT: kmovd %k0, %eax
129; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
130; AVX512BW-NEXT: vzeroupper
131; AVX512BW-NEXT: retq
132 %x0 = icmp sgt <8 x i64> %a, %b
133 %x1 = icmp sgt <8 x i64> %c, %d
134 %y = and <8 x i1> %x0, %x1
135 %res = bitcast <8 x i1> %y to i8
136 ret i8 %res
137}
138
139define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
140; SSE-LABEL: v8f64:
141; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000142; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000143; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
144; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000145; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
146; SSE-NEXT: cmpltpd %xmm3, %xmm7
147; SSE-NEXT: cmpltpd %xmm2, %xmm6
148; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000149; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000150; SSE-NEXT: pshufb %xmm2, %xmm6
151; SSE-NEXT: cmpltpd %xmm1, %xmm5
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000152; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
153; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000154; SSE-NEXT: cmpltpd %xmm0, %xmm4
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000155; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
156; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
157; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
158; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000159; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000161; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000162; SSE-NEXT: pshufb %xmm2, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000163; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
164; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
165; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
166; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
167; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
168; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[0,2,2,3,4,5,6,7]
169; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
170; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
171; SSE-NEXT: pand %xmm0, %xmm2
172; SSE-NEXT: psllw $15, %xmm2
173; SSE-NEXT: psraw $15, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000174; SSE-NEXT: packsswb %xmm0, %xmm2
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000175; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000176; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
177; SSE-NEXT: retq
178;
179; AVX12-LABEL: v8f64:
180; AVX12: # BB#0:
181; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
182; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
183; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
184; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
185; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
186; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
187; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
188; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
189; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
190; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000191; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
192; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
193; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
194; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
195; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
196; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
197; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
198; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
199; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000200; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000201; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
202; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000203; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000204; AVX12-NEXT: vpmovmskb %xmm0, %eax
205; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
206; AVX12-NEXT: vzeroupper
207; AVX12-NEXT: retq
208;
209; AVX512F-LABEL: v8f64:
210; AVX512F: # BB#0:
211; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
212; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
213; AVX512F-NEXT: kmovw %k0, %eax
214; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
215; AVX512F-NEXT: vzeroupper
216; AVX512F-NEXT: retq
217;
218; AVX512BW-LABEL: v8f64:
219; AVX512BW: # BB#0:
220; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
221; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
222; AVX512BW-NEXT: kmovd %k0, %eax
223; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
224; AVX512BW-NEXT: vzeroupper
225; AVX512BW-NEXT: retq
226 %x0 = fcmp ogt <8 x double> %a, %b
227 %x1 = fcmp ogt <8 x double> %c, %d
228 %y = and <8 x i1> %x0, %x1
229 %res = bitcast <8 x i1> %y to i8
230 ret i8 %res
231}
232
233define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
234; SSE-LABEL: v32i16:
235; SSE: # BB#0:
236; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000237; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000238; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000239; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
240; SSE-NEXT: pcmpgtw %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000241; SSE-NEXT: pcmpgtw %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000242; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000243; SSE-NEXT: pcmpgtw %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000244; SSE-NEXT: pcmpgtw %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000245; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000246; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000247; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000248; SSE-NEXT: packsswb %xmm11, %xmm10
249; SSE-NEXT: pand %xmm0, %xmm10
250; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000251; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000252; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000253; SSE-NEXT: pand %xmm2, %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000254; SSE-NEXT: pmovmskb %xmm10, %ecx
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000255; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000256; SSE-NEXT: shll $16, %eax
257; SSE-NEXT: orl %ecx, %eax
258; SSE-NEXT: retq
259;
260; AVX1-LABEL: v32i16:
261; AVX1: # BB#0:
262; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
263; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
264; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
265; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
266; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
267; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
269; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
270; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
271; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
272; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
273; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
274; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
275; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
276; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
277; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
278; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
279; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
280; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
281; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
282; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
283; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
284; AVX1-NEXT: vpmovmskb %xmm0, %ecx
285; AVX1-NEXT: vpmovmskb %xmm1, %eax
286; AVX1-NEXT: shll $16, %eax
287; AVX1-NEXT: orl %ecx, %eax
288; AVX1-NEXT: vzeroupper
289; AVX1-NEXT: retq
290;
291; AVX2-LABEL: v32i16:
292; AVX2: # BB#0:
293; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
294; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
295; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
296; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
297; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
298; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
299; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
300; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
301; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
302; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
303; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
304; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
305; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
306; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
307; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
308; AVX2-NEXT: vpmovmskb %ymm0, %eax
309; AVX2-NEXT: vzeroupper
310; AVX2-NEXT: retq
311;
312; AVX512F-LABEL: v32i16:
313; AVX512F: # BB#0:
314; AVX512F-NEXT: pushq %rbp
315; AVX512F-NEXT: .Lcfi0:
316; AVX512F-NEXT: .cfi_def_cfa_offset 16
317; AVX512F-NEXT: .Lcfi1:
318; AVX512F-NEXT: .cfi_offset %rbp, -16
319; AVX512F-NEXT: movq %rsp, %rbp
320; AVX512F-NEXT: .Lcfi2:
321; AVX512F-NEXT: .cfi_def_cfa_register %rbp
322; AVX512F-NEXT: andq $-32, %rsp
323; AVX512F-NEXT: subq $32, %rsp
324; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
325; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
326; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
327; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
328; AVX512F-NEXT: kshiftlw $14, %k0, %k1
329; AVX512F-NEXT: kshiftrw $15, %k1, %k1
330; AVX512F-NEXT: kmovw %k1, %eax
331; AVX512F-NEXT: kshiftlw $15, %k0, %k1
332; AVX512F-NEXT: kshiftrw $15, %k1, %k1
333; AVX512F-NEXT: kmovw %k1, %ecx
334; AVX512F-NEXT: vmovd %ecx, %xmm1
335; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
336; AVX512F-NEXT: kshiftlw $13, %k0, %k1
337; AVX512F-NEXT: kshiftrw $15, %k1, %k1
338; AVX512F-NEXT: kmovw %k1, %eax
339; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
340; AVX512F-NEXT: kshiftlw $12, %k0, %k1
341; AVX512F-NEXT: kshiftrw $15, %k1, %k1
342; AVX512F-NEXT: kmovw %k1, %eax
343; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
344; AVX512F-NEXT: kshiftlw $11, %k0, %k1
345; AVX512F-NEXT: kshiftrw $15, %k1, %k1
346; AVX512F-NEXT: kmovw %k1, %eax
347; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
348; AVX512F-NEXT: kshiftlw $10, %k0, %k1
349; AVX512F-NEXT: kshiftrw $15, %k1, %k1
350; AVX512F-NEXT: kmovw %k1, %eax
351; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
352; AVX512F-NEXT: kshiftlw $9, %k0, %k1
353; AVX512F-NEXT: kshiftrw $15, %k1, %k1
354; AVX512F-NEXT: kmovw %k1, %eax
355; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
356; AVX512F-NEXT: kshiftlw $8, %k0, %k1
357; AVX512F-NEXT: kshiftrw $15, %k1, %k1
358; AVX512F-NEXT: kmovw %k1, %eax
359; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
360; AVX512F-NEXT: kshiftlw $7, %k0, %k1
361; AVX512F-NEXT: kshiftrw $15, %k1, %k1
362; AVX512F-NEXT: kmovw %k1, %eax
363; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
364; AVX512F-NEXT: kshiftlw $6, %k0, %k1
365; AVX512F-NEXT: kshiftrw $15, %k1, %k1
366; AVX512F-NEXT: kmovw %k1, %eax
367; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
368; AVX512F-NEXT: kshiftlw $5, %k0, %k1
369; AVX512F-NEXT: kshiftrw $15, %k1, %k1
370; AVX512F-NEXT: kmovw %k1, %eax
371; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
372; AVX512F-NEXT: kshiftlw $4, %k0, %k1
373; AVX512F-NEXT: kshiftrw $15, %k1, %k1
374; AVX512F-NEXT: kmovw %k1, %eax
375; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
376; AVX512F-NEXT: kshiftlw $3, %k0, %k1
377; AVX512F-NEXT: kshiftrw $15, %k1, %k1
378; AVX512F-NEXT: kmovw %k1, %eax
379; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
380; AVX512F-NEXT: kshiftlw $2, %k0, %k1
381; AVX512F-NEXT: kshiftrw $15, %k1, %k1
382; AVX512F-NEXT: kmovw %k1, %eax
383; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
384; AVX512F-NEXT: kshiftlw $1, %k0, %k1
385; AVX512F-NEXT: kshiftrw $15, %k1, %k1
386; AVX512F-NEXT: kmovw %k1, %eax
387; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
388; AVX512F-NEXT: kshiftrw $15, %k0, %k0
389; AVX512F-NEXT: kmovw %k0, %eax
390; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
391; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
392; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
393; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
394; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
395; AVX512F-NEXT: kshiftlw $14, %k0, %k1
396; AVX512F-NEXT: kshiftrw $15, %k1, %k1
397; AVX512F-NEXT: kmovw %k1, %eax
398; AVX512F-NEXT: kshiftlw $15, %k0, %k1
399; AVX512F-NEXT: kshiftrw $15, %k1, %k1
400; AVX512F-NEXT: kmovw %k1, %ecx
401; AVX512F-NEXT: vmovd %ecx, %xmm0
402; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
403; AVX512F-NEXT: kshiftlw $13, %k0, %k1
404; AVX512F-NEXT: kshiftrw $15, %k1, %k1
405; AVX512F-NEXT: kmovw %k1, %eax
406; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
407; AVX512F-NEXT: kshiftlw $12, %k0, %k1
408; AVX512F-NEXT: kshiftrw $15, %k1, %k1
409; AVX512F-NEXT: kmovw %k1, %eax
410; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
411; AVX512F-NEXT: kshiftlw $11, %k0, %k1
412; AVX512F-NEXT: kshiftrw $15, %k1, %k1
413; AVX512F-NEXT: kmovw %k1, %eax
414; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
415; AVX512F-NEXT: kshiftlw $10, %k0, %k1
416; AVX512F-NEXT: kshiftrw $15, %k1, %k1
417; AVX512F-NEXT: kmovw %k1, %eax
418; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
419; AVX512F-NEXT: kshiftlw $9, %k0, %k1
420; AVX512F-NEXT: kshiftrw $15, %k1, %k1
421; AVX512F-NEXT: kmovw %k1, %eax
422; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
423; AVX512F-NEXT: kshiftlw $8, %k0, %k1
424; AVX512F-NEXT: kshiftrw $15, %k1, %k1
425; AVX512F-NEXT: kmovw %k1, %eax
426; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
427; AVX512F-NEXT: kshiftlw $7, %k0, %k1
428; AVX512F-NEXT: kshiftrw $15, %k1, %k1
429; AVX512F-NEXT: kmovw %k1, %eax
430; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
431; AVX512F-NEXT: kshiftlw $6, %k0, %k1
432; AVX512F-NEXT: kshiftrw $15, %k1, %k1
433; AVX512F-NEXT: kmovw %k1, %eax
434; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
435; AVX512F-NEXT: kshiftlw $5, %k0, %k1
436; AVX512F-NEXT: kshiftrw $15, %k1, %k1
437; AVX512F-NEXT: kmovw %k1, %eax
438; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
439; AVX512F-NEXT: kshiftlw $4, %k0, %k1
440; AVX512F-NEXT: kshiftrw $15, %k1, %k1
441; AVX512F-NEXT: kmovw %k1, %eax
442; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
443; AVX512F-NEXT: kshiftlw $3, %k0, %k1
444; AVX512F-NEXT: kshiftrw $15, %k1, %k1
445; AVX512F-NEXT: kmovw %k1, %eax
446; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
447; AVX512F-NEXT: kshiftlw $2, %k0, %k1
448; AVX512F-NEXT: kshiftrw $15, %k1, %k1
449; AVX512F-NEXT: kmovw %k1, %eax
450; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
451; AVX512F-NEXT: kshiftlw $1, %k0, %k1
452; AVX512F-NEXT: kshiftrw $15, %k1, %k1
453; AVX512F-NEXT: kmovw %k1, %eax
454; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
455; AVX512F-NEXT: kshiftrw $15, %k0, %k0
456; AVX512F-NEXT: kmovw %k0, %eax
457; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
458; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
459; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
460; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
461; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
462; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
463; AVX512F-NEXT: kshiftlw $14, %k0, %k1
464; AVX512F-NEXT: kshiftrw $15, %k1, %k1
465; AVX512F-NEXT: kmovw %k1, %eax
466; AVX512F-NEXT: kshiftlw $15, %k0, %k1
467; AVX512F-NEXT: kshiftrw $15, %k1, %k1
468; AVX512F-NEXT: kmovw %k1, %ecx
469; AVX512F-NEXT: vmovd %ecx, %xmm1
470; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
471; AVX512F-NEXT: kshiftlw $13, %k0, %k1
472; AVX512F-NEXT: kshiftrw $15, %k1, %k1
473; AVX512F-NEXT: kmovw %k1, %eax
474; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
475; AVX512F-NEXT: kshiftlw $12, %k0, %k1
476; AVX512F-NEXT: kshiftrw $15, %k1, %k1
477; AVX512F-NEXT: kmovw %k1, %eax
478; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
479; AVX512F-NEXT: kshiftlw $11, %k0, %k1
480; AVX512F-NEXT: kshiftrw $15, %k1, %k1
481; AVX512F-NEXT: kmovw %k1, %eax
482; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
483; AVX512F-NEXT: kshiftlw $10, %k0, %k1
484; AVX512F-NEXT: kshiftrw $15, %k1, %k1
485; AVX512F-NEXT: kmovw %k1, %eax
486; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
487; AVX512F-NEXT: kshiftlw $9, %k0, %k1
488; AVX512F-NEXT: kshiftrw $15, %k1, %k1
489; AVX512F-NEXT: kmovw %k1, %eax
490; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
491; AVX512F-NEXT: kshiftlw $8, %k0, %k1
492; AVX512F-NEXT: kshiftrw $15, %k1, %k1
493; AVX512F-NEXT: kmovw %k1, %eax
494; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
495; AVX512F-NEXT: kshiftlw $7, %k0, %k1
496; AVX512F-NEXT: kshiftrw $15, %k1, %k1
497; AVX512F-NEXT: kmovw %k1, %eax
498; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
499; AVX512F-NEXT: kshiftlw $6, %k0, %k1
500; AVX512F-NEXT: kshiftrw $15, %k1, %k1
501; AVX512F-NEXT: kmovw %k1, %eax
502; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
503; AVX512F-NEXT: kshiftlw $5, %k0, %k1
504; AVX512F-NEXT: kshiftrw $15, %k1, %k1
505; AVX512F-NEXT: kmovw %k1, %eax
506; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
507; AVX512F-NEXT: kshiftlw $4, %k0, %k1
508; AVX512F-NEXT: kshiftrw $15, %k1, %k1
509; AVX512F-NEXT: kmovw %k1, %eax
510; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
511; AVX512F-NEXT: kshiftlw $3, %k0, %k1
512; AVX512F-NEXT: kshiftrw $15, %k1, %k1
513; AVX512F-NEXT: kmovw %k1, %eax
514; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
515; AVX512F-NEXT: kshiftlw $2, %k0, %k1
516; AVX512F-NEXT: kshiftrw $15, %k1, %k1
517; AVX512F-NEXT: kmovw %k1, %eax
518; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
519; AVX512F-NEXT: kshiftlw $1, %k0, %k1
520; AVX512F-NEXT: kshiftrw $15, %k1, %k1
521; AVX512F-NEXT: kmovw %k1, %eax
522; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
523; AVX512F-NEXT: kshiftrw $15, %k0, %k0
524; AVX512F-NEXT: kmovw %k0, %eax
525; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
526; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
527; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
528; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
529; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
530; AVX512F-NEXT: kshiftlw $14, %k0, %k1
531; AVX512F-NEXT: kshiftrw $15, %k1, %k1
532; AVX512F-NEXT: kmovw %k1, %eax
533; AVX512F-NEXT: kshiftlw $15, %k0, %k1
534; AVX512F-NEXT: kshiftrw $15, %k1, %k1
535; AVX512F-NEXT: kmovw %k1, %ecx
536; AVX512F-NEXT: vmovd %ecx, %xmm2
537; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
538; AVX512F-NEXT: kshiftlw $13, %k0, %k1
539; AVX512F-NEXT: kshiftrw $15, %k1, %k1
540; AVX512F-NEXT: kmovw %k1, %eax
541; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
542; AVX512F-NEXT: kshiftlw $12, %k0, %k1
543; AVX512F-NEXT: kshiftrw $15, %k1, %k1
544; AVX512F-NEXT: kmovw %k1, %eax
545; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
546; AVX512F-NEXT: kshiftlw $11, %k0, %k1
547; AVX512F-NEXT: kshiftrw $15, %k1, %k1
548; AVX512F-NEXT: kmovw %k1, %eax
549; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
550; AVX512F-NEXT: kshiftlw $10, %k0, %k1
551; AVX512F-NEXT: kshiftrw $15, %k1, %k1
552; AVX512F-NEXT: kmovw %k1, %eax
553; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
554; AVX512F-NEXT: kshiftlw $9, %k0, %k1
555; AVX512F-NEXT: kshiftrw $15, %k1, %k1
556; AVX512F-NEXT: kmovw %k1, %eax
557; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
558; AVX512F-NEXT: kshiftlw $8, %k0, %k1
559; AVX512F-NEXT: kshiftrw $15, %k1, %k1
560; AVX512F-NEXT: kmovw %k1, %eax
561; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
562; AVX512F-NEXT: kshiftlw $7, %k0, %k1
563; AVX512F-NEXT: kshiftrw $15, %k1, %k1
564; AVX512F-NEXT: kmovw %k1, %eax
565; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
566; AVX512F-NEXT: kshiftlw $6, %k0, %k1
567; AVX512F-NEXT: kshiftrw $15, %k1, %k1
568; AVX512F-NEXT: kmovw %k1, %eax
569; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
570; AVX512F-NEXT: kshiftlw $5, %k0, %k1
571; AVX512F-NEXT: kshiftrw $15, %k1, %k1
572; AVX512F-NEXT: kmovw %k1, %eax
573; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
574; AVX512F-NEXT: kshiftlw $4, %k0, %k1
575; AVX512F-NEXT: kshiftrw $15, %k1, %k1
576; AVX512F-NEXT: kmovw %k1, %eax
577; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
578; AVX512F-NEXT: kshiftlw $3, %k0, %k1
579; AVX512F-NEXT: kshiftrw $15, %k1, %k1
580; AVX512F-NEXT: kmovw %k1, %eax
581; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
582; AVX512F-NEXT: kshiftlw $2, %k0, %k1
583; AVX512F-NEXT: kshiftrw $15, %k1, %k1
584; AVX512F-NEXT: kmovw %k1, %eax
585; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
586; AVX512F-NEXT: kshiftlw $1, %k0, %k1
587; AVX512F-NEXT: kshiftrw $15, %k1, %k1
588; AVX512F-NEXT: kmovw %k1, %eax
589; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
590; AVX512F-NEXT: kshiftrw $15, %k0, %k0
591; AVX512F-NEXT: kmovw %k0, %eax
592; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
593; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
594; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
595; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
596; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
597; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
598; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
599; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
600; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
601; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
602; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
603; AVX512F-NEXT: kmovw %k0, (%rsp)
604; AVX512F-NEXT: movl (%rsp), %eax
605; AVX512F-NEXT: movq %rbp, %rsp
606; AVX512F-NEXT: popq %rbp
607; AVX512F-NEXT: vzeroupper
608; AVX512F-NEXT: retq
609;
610; AVX512BW-LABEL: v32i16:
611; AVX512BW: # BB#0:
612; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
613; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
614; AVX512BW-NEXT: kmovd %k0, %eax
615; AVX512BW-NEXT: vzeroupper
616; AVX512BW-NEXT: retq
617 %x0 = icmp sgt <32 x i16> %a, %b
618 %x1 = icmp sgt <32 x i16> %c, %d
619 %y = and <32 x i1> %x0, %x1
620 %res = bitcast <32 x i1> %y to i32
621 ret i32 %res
622}
623
624define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
625; SSE-LABEL: v16i32:
626; SSE: # BB#0:
627; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
628; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
629; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
630; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
631; SSE-NEXT: pcmpgtd %xmm7, %xmm3
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000632; SSE-NEXT: packssdw %xmm0, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000633; SSE-NEXT: pcmpgtd %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000634; SSE-NEXT: packssdw %xmm0, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000635; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000636; SSE-NEXT: pcmpgtd %xmm5, %xmm1
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000637; SSE-NEXT: packssdw %xmm0, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000638; SSE-NEXT: pcmpgtd %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000639; SSE-NEXT: packssdw %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000640; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000641; SSE-NEXT: packsswb %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000642; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000643; SSE-NEXT: packssdw %xmm0, %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000644; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000645; SSE-NEXT: packssdw %xmm0, %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000646; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000647; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000648; SSE-NEXT: packssdw %xmm0, %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000649; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000650; SSE-NEXT: packssdw %xmm0, %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000651; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000652; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000653; SSE-NEXT: pand %xmm0, %xmm8
654; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000655; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
656; SSE-NEXT: retq
657;
658; AVX1-LABEL: v16i32:
659; AVX1: # BB#0:
660; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
661; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
662; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
663; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
664; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
665; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
666; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
667; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
668; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
669; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
670; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
671; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
672; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
673; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000674; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
675; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
676; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
677; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
678; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000679; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000680; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
681; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
682; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
683; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
684; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
685; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
686; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000687; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
688; AVX1-NEXT: vpmovmskb %xmm0, %eax
689; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
690; AVX1-NEXT: vzeroupper
691; AVX1-NEXT: retq
692;
693; AVX2-LABEL: v16i32:
694; AVX2: # BB#0:
695; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
696; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
697; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
698; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
699; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
700; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
701; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
702; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
703; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
704; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000705; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
706; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
707; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
708; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
709; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
710; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
711; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
712; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
713; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000714; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
715; AVX2-NEXT: vpmovmskb %xmm0, %eax
716; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
717; AVX2-NEXT: vzeroupper
718; AVX2-NEXT: retq
719;
720; AVX512F-LABEL: v16i32:
721; AVX512F: # BB#0:
722; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
723; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
724; AVX512F-NEXT: kmovw %k0, %eax
725; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
726; AVX512F-NEXT: vzeroupper
727; AVX512F-NEXT: retq
728;
729; AVX512BW-LABEL: v16i32:
730; AVX512BW: # BB#0:
731; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
732; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
733; AVX512BW-NEXT: kmovd %k0, %eax
734; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
735; AVX512BW-NEXT: vzeroupper
736; AVX512BW-NEXT: retq
737 %x0 = icmp sgt <16 x i32> %a, %b
738 %x1 = icmp sgt <16 x i32> %c, %d
739 %y = and <16 x i1> %x0, %x1
740 %res = bitcast <16 x i1> %y to i16
741 ret i16 %res
742}
743
744define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
745; SSE-LABEL: v16f32:
746; SSE: # BB#0:
747; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
748; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
749; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
750; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
751; SSE-NEXT: cmpltps %xmm3, %xmm7
752; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
753; SSE-NEXT: pshufb %xmm3, %xmm7
754; SSE-NEXT: cmpltps %xmm2, %xmm6
755; SSE-NEXT: pshufb %xmm3, %xmm6
756; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000757; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
758; SSE-NEXT: pshufb %xmm2, %xmm6
759; SSE-NEXT: cmpltps %xmm1, %xmm5
760; SSE-NEXT: pshufb %xmm3, %xmm5
761; SSE-NEXT: cmpltps %xmm0, %xmm4
762; SSE-NEXT: pshufb %xmm3, %xmm4
763; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000764; SSE-NEXT: pshufb %xmm2, %xmm4
765; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000766; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
767; SSE-NEXT: pshufb %xmm3, %xmm11
768; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
769; SSE-NEXT: pshufb %xmm3, %xmm9
770; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000771; SSE-NEXT: pshufb %xmm2, %xmm9
772; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
773; SSE-NEXT: pshufb %xmm3, %xmm10
774; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
775; SSE-NEXT: pshufb %xmm3, %xmm8
776; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000777; SSE-NEXT: pshufb %xmm2, %xmm8
778; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000779; SSE-NEXT: pand %xmm4, %xmm8
780; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000781; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
782; SSE-NEXT: retq
783;
784; AVX12-LABEL: v16f32:
785; AVX12: # BB#0:
786; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
787; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
788; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
789; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
790; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
791; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
792; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
793; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
794; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
795; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000796; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
797; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
798; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
799; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
800; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
801; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
802; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
803; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
804; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000805; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
806; AVX12-NEXT: vpmovmskb %xmm0, %eax
807; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
808; AVX12-NEXT: vzeroupper
809; AVX12-NEXT: retq
810;
811; AVX512F-LABEL: v16f32:
812; AVX512F: # BB#0:
813; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
814; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
815; AVX512F-NEXT: kmovw %k0, %eax
816; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
817; AVX512F-NEXT: vzeroupper
818; AVX512F-NEXT: retq
819;
820; AVX512BW-LABEL: v16f32:
821; AVX512BW: # BB#0:
822; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
823; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
824; AVX512BW-NEXT: kmovd %k0, %eax
825; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
826; AVX512BW-NEXT: vzeroupper
827; AVX512BW-NEXT: retq
828 %x0 = fcmp ogt <16 x float> %a, %b
829 %x1 = fcmp ogt <16 x float> %c, %d
830 %y = and <16 x i1> %x0, %x1
831 %res = bitcast <16 x i1> %y to i16
832 ret i16 %res
833}
834
835define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
836; SSE-LABEL: v64i8:
837; SSE: # BB#0:
838; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
839; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
840; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
841; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
842; SSE-NEXT: pcmpgtb %xmm6, %xmm2
843; SSE-NEXT: pcmpgtb %xmm7, %xmm3
844; SSE-NEXT: pcmpgtb %xmm4, %xmm0
845; SSE-NEXT: pcmpgtb %xmm5, %xmm1
846; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
847; SSE-NEXT: pand %xmm2, %xmm8
848; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
849; SSE-NEXT: pand %xmm3, %xmm9
850; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
851; SSE-NEXT: pand %xmm0, %xmm10
852; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
853; SSE-NEXT: pand %xmm1, %xmm11
854; SSE-NEXT: pextrb $15, %xmm11, %eax
855; SSE-NEXT: andb $1, %al
856; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
857; SSE-NEXT: pextrb $14, %xmm11, %eax
858; SSE-NEXT: andb $1, %al
859; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
860; SSE-NEXT: pextrb $13, %xmm11, %eax
861; SSE-NEXT: andb $1, %al
862; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
863; SSE-NEXT: pextrb $12, %xmm11, %eax
864; SSE-NEXT: andb $1, %al
865; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
866; SSE-NEXT: pextrb $11, %xmm11, %eax
867; SSE-NEXT: andb $1, %al
868; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
869; SSE-NEXT: pextrb $10, %xmm11, %eax
870; SSE-NEXT: andb $1, %al
871; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
872; SSE-NEXT: pextrb $9, %xmm11, %eax
873; SSE-NEXT: andb $1, %al
874; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
875; SSE-NEXT: pextrb $8, %xmm11, %eax
876; SSE-NEXT: andb $1, %al
877; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
878; SSE-NEXT: pextrb $7, %xmm11, %eax
879; SSE-NEXT: andb $1, %al
880; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
881; SSE-NEXT: pextrb $6, %xmm11, %eax
882; SSE-NEXT: andb $1, %al
883; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
884; SSE-NEXT: pextrb $5, %xmm11, %eax
885; SSE-NEXT: andb $1, %al
886; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
887; SSE-NEXT: pextrb $4, %xmm11, %eax
888; SSE-NEXT: andb $1, %al
889; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
890; SSE-NEXT: pextrb $3, %xmm11, %eax
891; SSE-NEXT: andb $1, %al
892; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
893; SSE-NEXT: pextrb $2, %xmm11, %eax
894; SSE-NEXT: andb $1, %al
895; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
896; SSE-NEXT: pextrb $1, %xmm11, %eax
897; SSE-NEXT: andb $1, %al
898; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
899; SSE-NEXT: pextrb $0, %xmm11, %eax
900; SSE-NEXT: andb $1, %al
901; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
902; SSE-NEXT: pextrb $15, %xmm10, %eax
903; SSE-NEXT: andb $1, %al
904; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
905; SSE-NEXT: pextrb $14, %xmm10, %eax
906; SSE-NEXT: andb $1, %al
907; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
908; SSE-NEXT: pextrb $13, %xmm10, %eax
909; SSE-NEXT: andb $1, %al
910; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
911; SSE-NEXT: pextrb $12, %xmm10, %eax
912; SSE-NEXT: andb $1, %al
913; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
914; SSE-NEXT: pextrb $11, %xmm10, %eax
915; SSE-NEXT: andb $1, %al
916; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
917; SSE-NEXT: pextrb $10, %xmm10, %eax
918; SSE-NEXT: andb $1, %al
919; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
920; SSE-NEXT: pextrb $9, %xmm10, %eax
921; SSE-NEXT: andb $1, %al
922; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
923; SSE-NEXT: pextrb $8, %xmm10, %eax
924; SSE-NEXT: andb $1, %al
925; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
926; SSE-NEXT: pextrb $7, %xmm10, %eax
927; SSE-NEXT: andb $1, %al
928; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
929; SSE-NEXT: pextrb $6, %xmm10, %eax
930; SSE-NEXT: andb $1, %al
931; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
932; SSE-NEXT: pextrb $5, %xmm10, %eax
933; SSE-NEXT: andb $1, %al
934; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
935; SSE-NEXT: pextrb $4, %xmm10, %eax
936; SSE-NEXT: andb $1, %al
937; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
938; SSE-NEXT: pextrb $3, %xmm10, %eax
939; SSE-NEXT: andb $1, %al
940; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
941; SSE-NEXT: pextrb $2, %xmm10, %eax
942; SSE-NEXT: andb $1, %al
943; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
944; SSE-NEXT: pextrb $1, %xmm10, %eax
945; SSE-NEXT: andb $1, %al
946; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
947; SSE-NEXT: pextrb $0, %xmm10, %eax
948; SSE-NEXT: andb $1, %al
949; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
950; SSE-NEXT: pextrb $15, %xmm9, %eax
951; SSE-NEXT: andb $1, %al
952; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
953; SSE-NEXT: pextrb $14, %xmm9, %eax
954; SSE-NEXT: andb $1, %al
955; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
956; SSE-NEXT: pextrb $13, %xmm9, %eax
957; SSE-NEXT: andb $1, %al
958; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
959; SSE-NEXT: pextrb $12, %xmm9, %eax
960; SSE-NEXT: andb $1, %al
961; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
962; SSE-NEXT: pextrb $11, %xmm9, %eax
963; SSE-NEXT: andb $1, %al
964; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
965; SSE-NEXT: pextrb $10, %xmm9, %eax
966; SSE-NEXT: andb $1, %al
967; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
968; SSE-NEXT: pextrb $9, %xmm9, %eax
969; SSE-NEXT: andb $1, %al
970; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
971; SSE-NEXT: pextrb $8, %xmm9, %eax
972; SSE-NEXT: andb $1, %al
973; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
974; SSE-NEXT: pextrb $7, %xmm9, %eax
975; SSE-NEXT: andb $1, %al
976; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
977; SSE-NEXT: pextrb $6, %xmm9, %eax
978; SSE-NEXT: andb $1, %al
979; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
980; SSE-NEXT: pextrb $5, %xmm9, %eax
981; SSE-NEXT: andb $1, %al
982; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
983; SSE-NEXT: pextrb $4, %xmm9, %eax
984; SSE-NEXT: andb $1, %al
985; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
986; SSE-NEXT: pextrb $3, %xmm9, %eax
987; SSE-NEXT: andb $1, %al
988; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
989; SSE-NEXT: pextrb $2, %xmm9, %eax
990; SSE-NEXT: andb $1, %al
991; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
992; SSE-NEXT: pextrb $1, %xmm9, %eax
993; SSE-NEXT: andb $1, %al
994; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
995; SSE-NEXT: pextrb $0, %xmm9, %eax
996; SSE-NEXT: andb $1, %al
997; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
998; SSE-NEXT: pextrb $15, %xmm8, %eax
999; SSE-NEXT: andb $1, %al
1000; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1001; SSE-NEXT: pextrb $14, %xmm8, %eax
1002; SSE-NEXT: andb $1, %al
1003; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1004; SSE-NEXT: pextrb $13, %xmm8, %eax
1005; SSE-NEXT: andb $1, %al
1006; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1007; SSE-NEXT: pextrb $12, %xmm8, %eax
1008; SSE-NEXT: andb $1, %al
1009; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1010; SSE-NEXT: pextrb $11, %xmm8, %eax
1011; SSE-NEXT: andb $1, %al
1012; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1013; SSE-NEXT: pextrb $10, %xmm8, %eax
1014; SSE-NEXT: andb $1, %al
1015; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1016; SSE-NEXT: pextrb $9, %xmm8, %eax
1017; SSE-NEXT: andb $1, %al
1018; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1019; SSE-NEXT: pextrb $8, %xmm8, %eax
1020; SSE-NEXT: andb $1, %al
1021; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1022; SSE-NEXT: pextrb $7, %xmm8, %eax
1023; SSE-NEXT: andb $1, %al
1024; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1025; SSE-NEXT: pextrb $6, %xmm8, %eax
1026; SSE-NEXT: andb $1, %al
1027; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1028; SSE-NEXT: pextrb $5, %xmm8, %eax
1029; SSE-NEXT: andb $1, %al
1030; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1031; SSE-NEXT: pextrb $4, %xmm8, %eax
1032; SSE-NEXT: andb $1, %al
1033; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1034; SSE-NEXT: pextrb $3, %xmm8, %eax
1035; SSE-NEXT: andb $1, %al
1036; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1037; SSE-NEXT: pextrb $2, %xmm8, %eax
1038; SSE-NEXT: andb $1, %al
1039; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1040; SSE-NEXT: pextrb $1, %xmm8, %eax
1041; SSE-NEXT: andb $1, %al
1042; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1043; SSE-NEXT: pextrb $0, %xmm8, %eax
1044; SSE-NEXT: andb $1, %al
1045; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1046; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1047; SSE-NEXT: shll $16, %eax
1048; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1049; SSE-NEXT: orl %eax, %ecx
1050; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1051; SSE-NEXT: shll $16, %edx
1052; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1053; SSE-NEXT: orl %edx, %eax
1054; SSE-NEXT: shlq $32, %rax
1055; SSE-NEXT: orq %rcx, %rax
1056; SSE-NEXT: retq
1057;
1058; AVX1-LABEL: v64i8:
1059; AVX1: # BB#0:
1060; AVX1-NEXT: pushq %rbp
1061; AVX1-NEXT: .Lcfi0:
1062; AVX1-NEXT: .cfi_def_cfa_offset 16
1063; AVX1-NEXT: .Lcfi1:
1064; AVX1-NEXT: .cfi_offset %rbp, -16
1065; AVX1-NEXT: movq %rsp, %rbp
1066; AVX1-NEXT: .Lcfi2:
1067; AVX1-NEXT: .cfi_def_cfa_register %rbp
1068; AVX1-NEXT: andq $-32, %rsp
1069; AVX1-NEXT: subq $64, %rsp
1070; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1071; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1072; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1073; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1074; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1075; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1076; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1077; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1078; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1079; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1080; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1081; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1082; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1083; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1084; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1085; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1086; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1087; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1088; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1089; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1090; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1091; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1092; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1093; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1094; AVX1-NEXT: andb $1, %al
1095; AVX1-NEXT: movb %al, (%rsp)
1096; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1097; AVX1-NEXT: andb $1, %al
1098; AVX1-NEXT: movb %al, (%rsp)
1099; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1100; AVX1-NEXT: andb $1, %al
1101; AVX1-NEXT: movb %al, (%rsp)
1102; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1103; AVX1-NEXT: andb $1, %al
1104; AVX1-NEXT: movb %al, (%rsp)
1105; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1106; AVX1-NEXT: andb $1, %al
1107; AVX1-NEXT: movb %al, (%rsp)
1108; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1109; AVX1-NEXT: andb $1, %al
1110; AVX1-NEXT: movb %al, (%rsp)
1111; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1112; AVX1-NEXT: andb $1, %al
1113; AVX1-NEXT: movb %al, (%rsp)
1114; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1115; AVX1-NEXT: andb $1, %al
1116; AVX1-NEXT: movb %al, (%rsp)
1117; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1118; AVX1-NEXT: andb $1, %al
1119; AVX1-NEXT: movb %al, (%rsp)
1120; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1121; AVX1-NEXT: andb $1, %al
1122; AVX1-NEXT: movb %al, (%rsp)
1123; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1124; AVX1-NEXT: andb $1, %al
1125; AVX1-NEXT: movb %al, (%rsp)
1126; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1127; AVX1-NEXT: andb $1, %al
1128; AVX1-NEXT: movb %al, (%rsp)
1129; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1130; AVX1-NEXT: andb $1, %al
1131; AVX1-NEXT: movb %al, (%rsp)
1132; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1133; AVX1-NEXT: andb $1, %al
1134; AVX1-NEXT: movb %al, (%rsp)
1135; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1136; AVX1-NEXT: andb $1, %al
1137; AVX1-NEXT: movb %al, (%rsp)
1138; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1139; AVX1-NEXT: andb $1, %al
1140; AVX1-NEXT: movb %al, (%rsp)
1141; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1142; AVX1-NEXT: andb $1, %al
1143; AVX1-NEXT: movb %al, (%rsp)
1144; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1145; AVX1-NEXT: andb $1, %al
1146; AVX1-NEXT: movb %al, (%rsp)
1147; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1148; AVX1-NEXT: andb $1, %al
1149; AVX1-NEXT: movb %al, (%rsp)
1150; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1151; AVX1-NEXT: andb $1, %al
1152; AVX1-NEXT: movb %al, (%rsp)
1153; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1154; AVX1-NEXT: andb $1, %al
1155; AVX1-NEXT: movb %al, (%rsp)
1156; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1157; AVX1-NEXT: andb $1, %al
1158; AVX1-NEXT: movb %al, (%rsp)
1159; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1160; AVX1-NEXT: andb $1, %al
1161; AVX1-NEXT: movb %al, (%rsp)
1162; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1163; AVX1-NEXT: andb $1, %al
1164; AVX1-NEXT: movb %al, (%rsp)
1165; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1166; AVX1-NEXT: andb $1, %al
1167; AVX1-NEXT: movb %al, (%rsp)
1168; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1169; AVX1-NEXT: andb $1, %al
1170; AVX1-NEXT: movb %al, (%rsp)
1171; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1172; AVX1-NEXT: andb $1, %al
1173; AVX1-NEXT: movb %al, (%rsp)
1174; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1175; AVX1-NEXT: andb $1, %al
1176; AVX1-NEXT: movb %al, (%rsp)
1177; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1178; AVX1-NEXT: andb $1, %al
1179; AVX1-NEXT: movb %al, (%rsp)
1180; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1181; AVX1-NEXT: andb $1, %al
1182; AVX1-NEXT: movb %al, (%rsp)
1183; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1184; AVX1-NEXT: andb $1, %al
1185; AVX1-NEXT: movb %al, (%rsp)
1186; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1187; AVX1-NEXT: andb $1, %al
1188; AVX1-NEXT: movb %al, (%rsp)
1189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1190; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1191; AVX1-NEXT: andb $1, %al
1192; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1193; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1194; AVX1-NEXT: andb $1, %al
1195; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1196; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1197; AVX1-NEXT: andb $1, %al
1198; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1199; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1200; AVX1-NEXT: andb $1, %al
1201; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1202; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1203; AVX1-NEXT: andb $1, %al
1204; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1205; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1206; AVX1-NEXT: andb $1, %al
1207; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1208; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1209; AVX1-NEXT: andb $1, %al
1210; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1211; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1212; AVX1-NEXT: andb $1, %al
1213; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1214; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1215; AVX1-NEXT: andb $1, %al
1216; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1217; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1218; AVX1-NEXT: andb $1, %al
1219; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1220; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1221; AVX1-NEXT: andb $1, %al
1222; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1223; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1224; AVX1-NEXT: andb $1, %al
1225; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1226; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1227; AVX1-NEXT: andb $1, %al
1228; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1229; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1230; AVX1-NEXT: andb $1, %al
1231; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1232; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1233; AVX1-NEXT: andb $1, %al
1234; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1235; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1236; AVX1-NEXT: andb $1, %al
1237; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1238; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1239; AVX1-NEXT: andb $1, %al
1240; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1241; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1242; AVX1-NEXT: andb $1, %al
1243; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1244; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1245; AVX1-NEXT: andb $1, %al
1246; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1247; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1248; AVX1-NEXT: andb $1, %al
1249; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1250; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1251; AVX1-NEXT: andb $1, %al
1252; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1253; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1254; AVX1-NEXT: andb $1, %al
1255; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1256; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1257; AVX1-NEXT: andb $1, %al
1258; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1259; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1260; AVX1-NEXT: andb $1, %al
1261; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1262; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1263; AVX1-NEXT: andb $1, %al
1264; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1265; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1266; AVX1-NEXT: andb $1, %al
1267; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1268; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1269; AVX1-NEXT: andb $1, %al
1270; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1271; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1272; AVX1-NEXT: andb $1, %al
1273; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1274; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1275; AVX1-NEXT: andb $1, %al
1276; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1277; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1278; AVX1-NEXT: andb $1, %al
1279; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1280; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1281; AVX1-NEXT: andb $1, %al
1282; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1283; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1284; AVX1-NEXT: andb $1, %al
1285; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1286; AVX1-NEXT: movl (%rsp), %ecx
1287; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1288; AVX1-NEXT: shlq $32, %rax
1289; AVX1-NEXT: orq %rcx, %rax
1290; AVX1-NEXT: movq %rbp, %rsp
1291; AVX1-NEXT: popq %rbp
1292; AVX1-NEXT: vzeroupper
1293; AVX1-NEXT: retq
1294;
1295; AVX2-LABEL: v64i8:
1296; AVX2: # BB#0:
1297; AVX2-NEXT: pushq %rbp
1298; AVX2-NEXT: .Lcfi0:
1299; AVX2-NEXT: .cfi_def_cfa_offset 16
1300; AVX2-NEXT: .Lcfi1:
1301; AVX2-NEXT: .cfi_offset %rbp, -16
1302; AVX2-NEXT: movq %rsp, %rbp
1303; AVX2-NEXT: .Lcfi2:
1304; AVX2-NEXT: .cfi_def_cfa_register %rbp
1305; AVX2-NEXT: andq $-32, %rsp
1306; AVX2-NEXT: subq $64, %rsp
1307; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1308; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1309; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1310; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1311; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1312; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1313; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1314; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1315; AVX2-NEXT: andb $1, %al
1316; AVX2-NEXT: movb %al, (%rsp)
1317; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1318; AVX2-NEXT: andb $1, %al
1319; AVX2-NEXT: movb %al, (%rsp)
1320; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1321; AVX2-NEXT: andb $1, %al
1322; AVX2-NEXT: movb %al, (%rsp)
1323; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1324; AVX2-NEXT: andb $1, %al
1325; AVX2-NEXT: movb %al, (%rsp)
1326; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1327; AVX2-NEXT: andb $1, %al
1328; AVX2-NEXT: movb %al, (%rsp)
1329; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1330; AVX2-NEXT: andb $1, %al
1331; AVX2-NEXT: movb %al, (%rsp)
1332; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1333; AVX2-NEXT: andb $1, %al
1334; AVX2-NEXT: movb %al, (%rsp)
1335; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1336; AVX2-NEXT: andb $1, %al
1337; AVX2-NEXT: movb %al, (%rsp)
1338; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1339; AVX2-NEXT: andb $1, %al
1340; AVX2-NEXT: movb %al, (%rsp)
1341; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1342; AVX2-NEXT: andb $1, %al
1343; AVX2-NEXT: movb %al, (%rsp)
1344; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1345; AVX2-NEXT: andb $1, %al
1346; AVX2-NEXT: movb %al, (%rsp)
1347; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1348; AVX2-NEXT: andb $1, %al
1349; AVX2-NEXT: movb %al, (%rsp)
1350; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1351; AVX2-NEXT: andb $1, %al
1352; AVX2-NEXT: movb %al, (%rsp)
1353; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1354; AVX2-NEXT: andb $1, %al
1355; AVX2-NEXT: movb %al, (%rsp)
1356; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1357; AVX2-NEXT: andb $1, %al
1358; AVX2-NEXT: movb %al, (%rsp)
1359; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1360; AVX2-NEXT: andb $1, %al
1361; AVX2-NEXT: movb %al, (%rsp)
1362; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1363; AVX2-NEXT: andb $1, %al
1364; AVX2-NEXT: movb %al, (%rsp)
1365; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1366; AVX2-NEXT: andb $1, %al
1367; AVX2-NEXT: movb %al, (%rsp)
1368; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1369; AVX2-NEXT: andb $1, %al
1370; AVX2-NEXT: movb %al, (%rsp)
1371; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1372; AVX2-NEXT: andb $1, %al
1373; AVX2-NEXT: movb %al, (%rsp)
1374; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1375; AVX2-NEXT: andb $1, %al
1376; AVX2-NEXT: movb %al, (%rsp)
1377; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1378; AVX2-NEXT: andb $1, %al
1379; AVX2-NEXT: movb %al, (%rsp)
1380; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1381; AVX2-NEXT: andb $1, %al
1382; AVX2-NEXT: movb %al, (%rsp)
1383; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1384; AVX2-NEXT: andb $1, %al
1385; AVX2-NEXT: movb %al, (%rsp)
1386; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1387; AVX2-NEXT: andb $1, %al
1388; AVX2-NEXT: movb %al, (%rsp)
1389; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1390; AVX2-NEXT: andb $1, %al
1391; AVX2-NEXT: movb %al, (%rsp)
1392; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1393; AVX2-NEXT: andb $1, %al
1394; AVX2-NEXT: movb %al, (%rsp)
1395; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1396; AVX2-NEXT: andb $1, %al
1397; AVX2-NEXT: movb %al, (%rsp)
1398; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1399; AVX2-NEXT: andb $1, %al
1400; AVX2-NEXT: movb %al, (%rsp)
1401; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1402; AVX2-NEXT: andb $1, %al
1403; AVX2-NEXT: movb %al, (%rsp)
1404; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1405; AVX2-NEXT: andb $1, %al
1406; AVX2-NEXT: movb %al, (%rsp)
1407; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1408; AVX2-NEXT: andb $1, %al
1409; AVX2-NEXT: movb %al, (%rsp)
1410; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1411; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1412; AVX2-NEXT: andb $1, %al
1413; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1414; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1415; AVX2-NEXT: andb $1, %al
1416; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1417; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1418; AVX2-NEXT: andb $1, %al
1419; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1420; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1421; AVX2-NEXT: andb $1, %al
1422; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1423; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1424; AVX2-NEXT: andb $1, %al
1425; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1426; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1427; AVX2-NEXT: andb $1, %al
1428; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1429; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1430; AVX2-NEXT: andb $1, %al
1431; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1432; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1433; AVX2-NEXT: andb $1, %al
1434; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1435; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1436; AVX2-NEXT: andb $1, %al
1437; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1438; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1439; AVX2-NEXT: andb $1, %al
1440; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1441; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1442; AVX2-NEXT: andb $1, %al
1443; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1444; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1445; AVX2-NEXT: andb $1, %al
1446; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1447; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1448; AVX2-NEXT: andb $1, %al
1449; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1450; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1451; AVX2-NEXT: andb $1, %al
1452; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1453; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1454; AVX2-NEXT: andb $1, %al
1455; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1456; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1457; AVX2-NEXT: andb $1, %al
1458; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1459; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1460; AVX2-NEXT: andb $1, %al
1461; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1462; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1463; AVX2-NEXT: andb $1, %al
1464; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1465; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1466; AVX2-NEXT: andb $1, %al
1467; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1468; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1469; AVX2-NEXT: andb $1, %al
1470; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1471; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1472; AVX2-NEXT: andb $1, %al
1473; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1474; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1475; AVX2-NEXT: andb $1, %al
1476; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1477; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1478; AVX2-NEXT: andb $1, %al
1479; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1480; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1481; AVX2-NEXT: andb $1, %al
1482; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1483; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1484; AVX2-NEXT: andb $1, %al
1485; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1486; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1487; AVX2-NEXT: andb $1, %al
1488; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1489; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1490; AVX2-NEXT: andb $1, %al
1491; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1492; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1493; AVX2-NEXT: andb $1, %al
1494; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1495; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1496; AVX2-NEXT: andb $1, %al
1497; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1498; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1499; AVX2-NEXT: andb $1, %al
1500; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1501; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1502; AVX2-NEXT: andb $1, %al
1503; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1504; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1505; AVX2-NEXT: andb $1, %al
1506; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1507; AVX2-NEXT: movl (%rsp), %ecx
1508; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1509; AVX2-NEXT: shlq $32, %rax
1510; AVX2-NEXT: orq %rcx, %rax
1511; AVX2-NEXT: movq %rbp, %rsp
1512; AVX2-NEXT: popq %rbp
1513; AVX2-NEXT: vzeroupper
1514; AVX2-NEXT: retq
1515;
1516; AVX512F-LABEL: v64i8:
1517; AVX512F: # BB#0:
1518; AVX512F-NEXT: pushq %rbp
1519; AVX512F-NEXT: .Lcfi3:
1520; AVX512F-NEXT: .cfi_def_cfa_offset 16
1521; AVX512F-NEXT: .Lcfi4:
1522; AVX512F-NEXT: .cfi_offset %rbp, -16
1523; AVX512F-NEXT: movq %rsp, %rbp
1524; AVX512F-NEXT: .Lcfi5:
1525; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1526; AVX512F-NEXT: andq $-32, %rsp
1527; AVX512F-NEXT: subq $64, %rsp
1528; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1529; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1530; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1531; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1532; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1533; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1534; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1535; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1536; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1537; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1538; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1539; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1540; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1541; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1542; AVX512F-NEXT: kmovw %k0, (%rsp)
1543; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1544; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1545; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1546; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1547; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1548; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1549; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1550; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1551; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1552; AVX512F-NEXT: movl (%rsp), %ecx
1553; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1554; AVX512F-NEXT: shlq $32, %rax
1555; AVX512F-NEXT: orq %rcx, %rax
1556; AVX512F-NEXT: movq %rbp, %rsp
1557; AVX512F-NEXT: popq %rbp
1558; AVX512F-NEXT: vzeroupper
1559; AVX512F-NEXT: retq
1560;
1561; AVX512BW-LABEL: v64i8:
1562; AVX512BW: # BB#0:
1563; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1564; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1565; AVX512BW-NEXT: kmovq %k0, %rax
1566; AVX512BW-NEXT: vzeroupper
1567; AVX512BW-NEXT: retq
1568 %x0 = icmp sgt <64 x i8> %a, %b
1569 %x1 = icmp sgt <64 x i8> %c, %d
1570 %y = and <64 x i1> %x0, %x1
1571 %res = bitcast <64 x i1> %y to i64
1572 ret i64 %res
1573}