blob: 3402acc1462002818eab82b8895355e2066594b9 [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000013; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +000015; SSE-NEXT: pcmpgtq %xmm5, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000016; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
17; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000018; SSE-NEXT: pcmpgtq %xmm4, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000019; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
20; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
21; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000022; SSE-NEXT: pcmpgtq %xmm7, %xmm3
23; SSE-NEXT: pcmpgtq %xmm6, %xmm2
24; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
25; SSE-NEXT: packssdw %xmm2, %xmm2
26; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +000027; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000028; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm11[0,2,2,3]
29; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000030; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
31; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm10[0,2,2,3]
32; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000033; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000034; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000035; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
36; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
37; SSE-NEXT: packssdw %xmm8, %xmm8
38; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm1[0,1,2,3],xmm8[4,5,6,7]
39; SSE-NEXT: pand %xmm2, %xmm8
40; SSE-NEXT: psllw $15, %xmm8
41; SSE-NEXT: psraw $15, %xmm8
42; SSE-NEXT: packsswb %xmm0, %xmm8
43; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000044; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
45; SSE-NEXT: retq
46;
47; AVX1-LABEL: v8i64:
48; AVX1: # BB#0:
49; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
50; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
51; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
52; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
53; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
54; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
55; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
56; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
57; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
58; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
59; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
60; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
61; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
62; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000063; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
64; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
65; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
66; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
67; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
68; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
69; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
70; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
71; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
72; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
73; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
74; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
75; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000076; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000077; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
78; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000079; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000080; AVX1-NEXT: vpmovmskb %xmm0, %eax
81; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
82; AVX1-NEXT: vzeroupper
83; AVX1-NEXT: retq
84;
85; AVX2-LABEL: v8i64:
86; AVX2: # BB#0:
87; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
88; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
89; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
90; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
91; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
92; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
93; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
94; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
95; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
96; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000097; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
98; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
99; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
100; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
101; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
102; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
103; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
104; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
105; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000106; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000107; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
108; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000109; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000110; AVX2-NEXT: vpmovmskb %xmm0, %eax
111; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
112; AVX2-NEXT: vzeroupper
113; AVX2-NEXT: retq
114;
115; AVX512F-LABEL: v8i64:
116; AVX512F: # BB#0:
117; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
118; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
119; AVX512F-NEXT: kmovw %k0, %eax
120; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
121; AVX512F-NEXT: vzeroupper
122; AVX512F-NEXT: retq
123;
124; AVX512BW-LABEL: v8i64:
125; AVX512BW: # BB#0:
126; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
127; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
128; AVX512BW-NEXT: kmovd %k0, %eax
129; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
130; AVX512BW-NEXT: vzeroupper
131; AVX512BW-NEXT: retq
132 %x0 = icmp sgt <8 x i64> %a, %b
133 %x1 = icmp sgt <8 x i64> %c, %d
134 %y = and <8 x i1> %x0, %x1
135 %res = bitcast <8 x i1> %y to i8
136 ret i8 %res
137}
138
139define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
140; SSE-LABEL: v8f64:
141; SSE: # BB#0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000142; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000143; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
144; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000145; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
146; SSE-NEXT: cmpltpd %xmm3, %xmm7
147; SSE-NEXT: cmpltpd %xmm2, %xmm6
148; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000149; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000150; SSE-NEXT: pshufb %xmm2, %xmm6
151; SSE-NEXT: cmpltpd %xmm1, %xmm5
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000152; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
153; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000154; SSE-NEXT: cmpltpd %xmm0, %xmm4
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000155; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
156; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
157; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
158; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000159; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000161; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000162; SSE-NEXT: pshufb %xmm2, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000163; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
164; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
165; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
166; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
167; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3]
168; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[0,2,2,3,4,5,6,7]
169; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
170; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm8[4,5,6,7]
171; SSE-NEXT: pand %xmm0, %xmm2
172; SSE-NEXT: psllw $15, %xmm2
173; SSE-NEXT: psraw $15, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000174; SSE-NEXT: packsswb %xmm0, %xmm2
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000175; SSE-NEXT: pmovmskb %xmm2, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000176; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
177; SSE-NEXT: retq
178;
179; AVX12-LABEL: v8f64:
180; AVX12: # BB#0:
181; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
182; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
183; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
184; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
185; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
186; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
187; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
188; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
189; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
190; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000191; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
192; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
193; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
194; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
195; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
196; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
197; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
198; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
199; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000200; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000201; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
202; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000203; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000204; AVX12-NEXT: vpmovmskb %xmm0, %eax
205; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
206; AVX12-NEXT: vzeroupper
207; AVX12-NEXT: retq
208;
209; AVX512F-LABEL: v8f64:
210; AVX512F: # BB#0:
211; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
212; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
213; AVX512F-NEXT: kmovw %k0, %eax
214; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
215; AVX512F-NEXT: vzeroupper
216; AVX512F-NEXT: retq
217;
218; AVX512BW-LABEL: v8f64:
219; AVX512BW: # BB#0:
220; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
221; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
222; AVX512BW-NEXT: kmovd %k0, %eax
223; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
224; AVX512BW-NEXT: vzeroupper
225; AVX512BW-NEXT: retq
226 %x0 = fcmp ogt <8 x double> %a, %b
227 %x1 = fcmp ogt <8 x double> %c, %d
228 %y = and <8 x i1> %x0, %x1
229 %res = bitcast <8 x i1> %y to i8
230 ret i8 %res
231}
232
233define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
234; SSE-LABEL: v32i16:
235; SSE: # BB#0:
236; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000237; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000238; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000239; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
240; SSE-NEXT: pcmpgtw %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000241; SSE-NEXT: pcmpgtw %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000242; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000243; SSE-NEXT: pcmpgtw %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000244; SSE-NEXT: pcmpgtw %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000245; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000246; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000247; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000248; SSE-NEXT: packsswb %xmm11, %xmm10
249; SSE-NEXT: pand %xmm0, %xmm10
250; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000251; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000252; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000253; SSE-NEXT: pand %xmm2, %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000254; SSE-NEXT: pmovmskb %xmm10, %ecx
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000255; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000256; SSE-NEXT: shll $16, %eax
257; SSE-NEXT: orl %ecx, %eax
258; SSE-NEXT: retq
259;
260; AVX1-LABEL: v32i16:
261; AVX1: # BB#0:
262; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
263; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
264; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
265; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
266; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
267; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
269; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
270; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
271; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
272; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
273; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
274; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
275; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
276; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
277; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
278; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
279; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
280; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
281; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
282; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
283; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
284; AVX1-NEXT: vpmovmskb %xmm0, %ecx
285; AVX1-NEXT: vpmovmskb %xmm1, %eax
286; AVX1-NEXT: shll $16, %eax
287; AVX1-NEXT: orl %ecx, %eax
288; AVX1-NEXT: vzeroupper
289; AVX1-NEXT: retq
290;
291; AVX2-LABEL: v32i16:
292; AVX2: # BB#0:
293; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
294; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
295; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
296; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
297; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
298; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
299; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
300; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
301; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
302; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
303; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
304; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
305; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
306; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
307; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
308; AVX2-NEXT: vpmovmskb %ymm0, %eax
309; AVX2-NEXT: vzeroupper
310; AVX2-NEXT: retq
311;
312; AVX512F-LABEL: v32i16:
313; AVX512F: # BB#0:
314; AVX512F-NEXT: pushq %rbp
315; AVX512F-NEXT: .Lcfi0:
316; AVX512F-NEXT: .cfi_def_cfa_offset 16
317; AVX512F-NEXT: .Lcfi1:
318; AVX512F-NEXT: .cfi_offset %rbp, -16
319; AVX512F-NEXT: movq %rsp, %rbp
320; AVX512F-NEXT: .Lcfi2:
321; AVX512F-NEXT: .cfi_def_cfa_register %rbp
322; AVX512F-NEXT: andq $-32, %rsp
323; AVX512F-NEXT: subq $32, %rsp
324; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
325; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
326; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
327; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
328; AVX512F-NEXT: kshiftlw $14, %k0, %k1
329; AVX512F-NEXT: kshiftrw $15, %k1, %k1
330; AVX512F-NEXT: kmovw %k1, %eax
331; AVX512F-NEXT: kshiftlw $15, %k0, %k1
332; AVX512F-NEXT: kshiftrw $15, %k1, %k1
333; AVX512F-NEXT: kmovw %k1, %ecx
334; AVX512F-NEXT: vmovd %ecx, %xmm1
335; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
336; AVX512F-NEXT: kshiftlw $13, %k0, %k1
337; AVX512F-NEXT: kshiftrw $15, %k1, %k1
338; AVX512F-NEXT: kmovw %k1, %eax
339; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
340; AVX512F-NEXT: kshiftlw $12, %k0, %k1
341; AVX512F-NEXT: kshiftrw $15, %k1, %k1
342; AVX512F-NEXT: kmovw %k1, %eax
343; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
344; AVX512F-NEXT: kshiftlw $11, %k0, %k1
345; AVX512F-NEXT: kshiftrw $15, %k1, %k1
346; AVX512F-NEXT: kmovw %k1, %eax
347; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
348; AVX512F-NEXT: kshiftlw $10, %k0, %k1
349; AVX512F-NEXT: kshiftrw $15, %k1, %k1
350; AVX512F-NEXT: kmovw %k1, %eax
351; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
352; AVX512F-NEXT: kshiftlw $9, %k0, %k1
353; AVX512F-NEXT: kshiftrw $15, %k1, %k1
354; AVX512F-NEXT: kmovw %k1, %eax
355; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
356; AVX512F-NEXT: kshiftlw $8, %k0, %k1
357; AVX512F-NEXT: kshiftrw $15, %k1, %k1
358; AVX512F-NEXT: kmovw %k1, %eax
359; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
360; AVX512F-NEXT: kshiftlw $7, %k0, %k1
361; AVX512F-NEXT: kshiftrw $15, %k1, %k1
362; AVX512F-NEXT: kmovw %k1, %eax
363; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
364; AVX512F-NEXT: kshiftlw $6, %k0, %k1
365; AVX512F-NEXT: kshiftrw $15, %k1, %k1
366; AVX512F-NEXT: kmovw %k1, %eax
367; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
368; AVX512F-NEXT: kshiftlw $5, %k0, %k1
369; AVX512F-NEXT: kshiftrw $15, %k1, %k1
370; AVX512F-NEXT: kmovw %k1, %eax
371; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
372; AVX512F-NEXT: kshiftlw $4, %k0, %k1
373; AVX512F-NEXT: kshiftrw $15, %k1, %k1
374; AVX512F-NEXT: kmovw %k1, %eax
375; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
376; AVX512F-NEXT: kshiftlw $3, %k0, %k1
377; AVX512F-NEXT: kshiftrw $15, %k1, %k1
378; AVX512F-NEXT: kmovw %k1, %eax
379; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
380; AVX512F-NEXT: kshiftlw $2, %k0, %k1
381; AVX512F-NEXT: kshiftrw $15, %k1, %k1
382; AVX512F-NEXT: kmovw %k1, %eax
383; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
384; AVX512F-NEXT: kshiftlw $1, %k0, %k1
385; AVX512F-NEXT: kshiftrw $15, %k1, %k1
386; AVX512F-NEXT: kmovw %k1, %eax
387; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
388; AVX512F-NEXT: kshiftrw $15, %k0, %k0
389; AVX512F-NEXT: kmovw %k0, %eax
390; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
391; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
392; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
393; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
394; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
395; AVX512F-NEXT: kshiftlw $14, %k0, %k1
396; AVX512F-NEXT: kshiftrw $15, %k1, %k1
397; AVX512F-NEXT: kmovw %k1, %eax
398; AVX512F-NEXT: kshiftlw $15, %k0, %k1
399; AVX512F-NEXT: kshiftrw $15, %k1, %k1
400; AVX512F-NEXT: kmovw %k1, %ecx
401; AVX512F-NEXT: vmovd %ecx, %xmm0
402; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
403; AVX512F-NEXT: kshiftlw $13, %k0, %k1
404; AVX512F-NEXT: kshiftrw $15, %k1, %k1
405; AVX512F-NEXT: kmovw %k1, %eax
406; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
407; AVX512F-NEXT: kshiftlw $12, %k0, %k1
408; AVX512F-NEXT: kshiftrw $15, %k1, %k1
409; AVX512F-NEXT: kmovw %k1, %eax
410; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
411; AVX512F-NEXT: kshiftlw $11, %k0, %k1
412; AVX512F-NEXT: kshiftrw $15, %k1, %k1
413; AVX512F-NEXT: kmovw %k1, %eax
414; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
415; AVX512F-NEXT: kshiftlw $10, %k0, %k1
416; AVX512F-NEXT: kshiftrw $15, %k1, %k1
417; AVX512F-NEXT: kmovw %k1, %eax
418; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
419; AVX512F-NEXT: kshiftlw $9, %k0, %k1
420; AVX512F-NEXT: kshiftrw $15, %k1, %k1
421; AVX512F-NEXT: kmovw %k1, %eax
422; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
423; AVX512F-NEXT: kshiftlw $8, %k0, %k1
424; AVX512F-NEXT: kshiftrw $15, %k1, %k1
425; AVX512F-NEXT: kmovw %k1, %eax
426; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
427; AVX512F-NEXT: kshiftlw $7, %k0, %k1
428; AVX512F-NEXT: kshiftrw $15, %k1, %k1
429; AVX512F-NEXT: kmovw %k1, %eax
430; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
431; AVX512F-NEXT: kshiftlw $6, %k0, %k1
432; AVX512F-NEXT: kshiftrw $15, %k1, %k1
433; AVX512F-NEXT: kmovw %k1, %eax
434; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
435; AVX512F-NEXT: kshiftlw $5, %k0, %k1
436; AVX512F-NEXT: kshiftrw $15, %k1, %k1
437; AVX512F-NEXT: kmovw %k1, %eax
438; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
439; AVX512F-NEXT: kshiftlw $4, %k0, %k1
440; AVX512F-NEXT: kshiftrw $15, %k1, %k1
441; AVX512F-NEXT: kmovw %k1, %eax
442; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
443; AVX512F-NEXT: kshiftlw $3, %k0, %k1
444; AVX512F-NEXT: kshiftrw $15, %k1, %k1
445; AVX512F-NEXT: kmovw %k1, %eax
446; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
447; AVX512F-NEXT: kshiftlw $2, %k0, %k1
448; AVX512F-NEXT: kshiftrw $15, %k1, %k1
449; AVX512F-NEXT: kmovw %k1, %eax
450; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
451; AVX512F-NEXT: kshiftlw $1, %k0, %k1
452; AVX512F-NEXT: kshiftrw $15, %k1, %k1
453; AVX512F-NEXT: kmovw %k1, %eax
454; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
455; AVX512F-NEXT: kshiftrw $15, %k0, %k0
456; AVX512F-NEXT: kmovw %k0, %eax
457; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
458; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
459; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
460; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
461; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
462; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
463; AVX512F-NEXT: kshiftlw $14, %k0, %k1
464; AVX512F-NEXT: kshiftrw $15, %k1, %k1
465; AVX512F-NEXT: kmovw %k1, %eax
466; AVX512F-NEXT: kshiftlw $15, %k0, %k1
467; AVX512F-NEXT: kshiftrw $15, %k1, %k1
468; AVX512F-NEXT: kmovw %k1, %ecx
469; AVX512F-NEXT: vmovd %ecx, %xmm1
470; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
471; AVX512F-NEXT: kshiftlw $13, %k0, %k1
472; AVX512F-NEXT: kshiftrw $15, %k1, %k1
473; AVX512F-NEXT: kmovw %k1, %eax
474; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
475; AVX512F-NEXT: kshiftlw $12, %k0, %k1
476; AVX512F-NEXT: kshiftrw $15, %k1, %k1
477; AVX512F-NEXT: kmovw %k1, %eax
478; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
479; AVX512F-NEXT: kshiftlw $11, %k0, %k1
480; AVX512F-NEXT: kshiftrw $15, %k1, %k1
481; AVX512F-NEXT: kmovw %k1, %eax
482; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
483; AVX512F-NEXT: kshiftlw $10, %k0, %k1
484; AVX512F-NEXT: kshiftrw $15, %k1, %k1
485; AVX512F-NEXT: kmovw %k1, %eax
486; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
487; AVX512F-NEXT: kshiftlw $9, %k0, %k1
488; AVX512F-NEXT: kshiftrw $15, %k1, %k1
489; AVX512F-NEXT: kmovw %k1, %eax
490; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
491; AVX512F-NEXT: kshiftlw $8, %k0, %k1
492; AVX512F-NEXT: kshiftrw $15, %k1, %k1
493; AVX512F-NEXT: kmovw %k1, %eax
494; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
495; AVX512F-NEXT: kshiftlw $7, %k0, %k1
496; AVX512F-NEXT: kshiftrw $15, %k1, %k1
497; AVX512F-NEXT: kmovw %k1, %eax
498; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
499; AVX512F-NEXT: kshiftlw $6, %k0, %k1
500; AVX512F-NEXT: kshiftrw $15, %k1, %k1
501; AVX512F-NEXT: kmovw %k1, %eax
502; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
503; AVX512F-NEXT: kshiftlw $5, %k0, %k1
504; AVX512F-NEXT: kshiftrw $15, %k1, %k1
505; AVX512F-NEXT: kmovw %k1, %eax
506; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
507; AVX512F-NEXT: kshiftlw $4, %k0, %k1
508; AVX512F-NEXT: kshiftrw $15, %k1, %k1
509; AVX512F-NEXT: kmovw %k1, %eax
510; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
511; AVX512F-NEXT: kshiftlw $3, %k0, %k1
512; AVX512F-NEXT: kshiftrw $15, %k1, %k1
513; AVX512F-NEXT: kmovw %k1, %eax
514; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
515; AVX512F-NEXT: kshiftlw $2, %k0, %k1
516; AVX512F-NEXT: kshiftrw $15, %k1, %k1
517; AVX512F-NEXT: kmovw %k1, %eax
518; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
519; AVX512F-NEXT: kshiftlw $1, %k0, %k1
520; AVX512F-NEXT: kshiftrw $15, %k1, %k1
521; AVX512F-NEXT: kmovw %k1, %eax
522; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
523; AVX512F-NEXT: kshiftrw $15, %k0, %k0
524; AVX512F-NEXT: kmovw %k0, %eax
525; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
526; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
527; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
528; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
529; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
530; AVX512F-NEXT: kshiftlw $14, %k0, %k1
531; AVX512F-NEXT: kshiftrw $15, %k1, %k1
532; AVX512F-NEXT: kmovw %k1, %eax
533; AVX512F-NEXT: kshiftlw $15, %k0, %k1
534; AVX512F-NEXT: kshiftrw $15, %k1, %k1
535; AVX512F-NEXT: kmovw %k1, %ecx
536; AVX512F-NEXT: vmovd %ecx, %xmm2
537; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
538; AVX512F-NEXT: kshiftlw $13, %k0, %k1
539; AVX512F-NEXT: kshiftrw $15, %k1, %k1
540; AVX512F-NEXT: kmovw %k1, %eax
541; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
542; AVX512F-NEXT: kshiftlw $12, %k0, %k1
543; AVX512F-NEXT: kshiftrw $15, %k1, %k1
544; AVX512F-NEXT: kmovw %k1, %eax
545; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
546; AVX512F-NEXT: kshiftlw $11, %k0, %k1
547; AVX512F-NEXT: kshiftrw $15, %k1, %k1
548; AVX512F-NEXT: kmovw %k1, %eax
549; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
550; AVX512F-NEXT: kshiftlw $10, %k0, %k1
551; AVX512F-NEXT: kshiftrw $15, %k1, %k1
552; AVX512F-NEXT: kmovw %k1, %eax
553; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
554; AVX512F-NEXT: kshiftlw $9, %k0, %k1
555; AVX512F-NEXT: kshiftrw $15, %k1, %k1
556; AVX512F-NEXT: kmovw %k1, %eax
557; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
558; AVX512F-NEXT: kshiftlw $8, %k0, %k1
559; AVX512F-NEXT: kshiftrw $15, %k1, %k1
560; AVX512F-NEXT: kmovw %k1, %eax
561; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
562; AVX512F-NEXT: kshiftlw $7, %k0, %k1
563; AVX512F-NEXT: kshiftrw $15, %k1, %k1
564; AVX512F-NEXT: kmovw %k1, %eax
565; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
566; AVX512F-NEXT: kshiftlw $6, %k0, %k1
567; AVX512F-NEXT: kshiftrw $15, %k1, %k1
568; AVX512F-NEXT: kmovw %k1, %eax
569; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
570; AVX512F-NEXT: kshiftlw $5, %k0, %k1
571; AVX512F-NEXT: kshiftrw $15, %k1, %k1
572; AVX512F-NEXT: kmovw %k1, %eax
573; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
574; AVX512F-NEXT: kshiftlw $4, %k0, %k1
575; AVX512F-NEXT: kshiftrw $15, %k1, %k1
576; AVX512F-NEXT: kmovw %k1, %eax
577; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
578; AVX512F-NEXT: kshiftlw $3, %k0, %k1
579; AVX512F-NEXT: kshiftrw $15, %k1, %k1
580; AVX512F-NEXT: kmovw %k1, %eax
581; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
582; AVX512F-NEXT: kshiftlw $2, %k0, %k1
583; AVX512F-NEXT: kshiftrw $15, %k1, %k1
584; AVX512F-NEXT: kmovw %k1, %eax
585; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
586; AVX512F-NEXT: kshiftlw $1, %k0, %k1
587; AVX512F-NEXT: kshiftrw $15, %k1, %k1
588; AVX512F-NEXT: kmovw %k1, %eax
589; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
590; AVX512F-NEXT: kshiftrw $15, %k0, %k0
591; AVX512F-NEXT: kmovw %k0, %eax
592; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
593; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
594; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
595; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
596; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
597; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
598; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
599; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
600; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
601; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
602; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
603; AVX512F-NEXT: kmovw %k0, (%rsp)
604; AVX512F-NEXT: movl (%rsp), %eax
605; AVX512F-NEXT: movq %rbp, %rsp
606; AVX512F-NEXT: popq %rbp
607; AVX512F-NEXT: vzeroupper
608; AVX512F-NEXT: retq
609;
610; AVX512BW-LABEL: v32i16:
611; AVX512BW: # BB#0:
612; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
613; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
614; AVX512BW-NEXT: kmovd %k0, %eax
615; AVX512BW-NEXT: vzeroupper
616; AVX512BW-NEXT: retq
617 %x0 = icmp sgt <32 x i16> %a, %b
618 %x1 = icmp sgt <32 x i16> %c, %d
619 %y = and <32 x i1> %x0, %x1
620 %res = bitcast <32 x i1> %y to i32
621 ret i32 %res
622}
623
624define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
625; SSE-LABEL: v16i32:
626; SSE: # BB#0:
627; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000628; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000629; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000630; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
631; SSE-NEXT: pcmpgtd %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000632; SSE-NEXT: pcmpgtd %xmm6, %xmm2
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000633; SSE-NEXT: packssdw %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000634; SSE-NEXT: pcmpgtd %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000635; SSE-NEXT: pcmpgtd %xmm4, %xmm0
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000636; SSE-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000637; SSE-NEXT: packsswb %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000638; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000639; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000640; SSE-NEXT: packssdw %xmm11, %xmm10
641; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000642; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000643; SSE-NEXT: packssdw %xmm9, %xmm8
644; SSE-NEXT: packsswb %xmm10, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000645; SSE-NEXT: pand %xmm0, %xmm8
646; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000647; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
648; SSE-NEXT: retq
649;
650; AVX1-LABEL: v16i32:
651; AVX1: # BB#0:
652; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
653; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
654; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
655; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
656; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
657; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
658; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
659; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
660; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
661; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
662; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
663; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
664; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
665; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000666; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
667; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
668; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
669; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
670; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000671; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000672; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
673; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
674; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
675; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
676; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
677; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
678; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000679; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
680; AVX1-NEXT: vpmovmskb %xmm0, %eax
681; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
682; AVX1-NEXT: vzeroupper
683; AVX1-NEXT: retq
684;
685; AVX2-LABEL: v16i32:
686; AVX2: # BB#0:
687; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
688; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
689; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
690; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
691; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
692; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
693; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
694; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
695; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
696; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000697; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
698; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
699; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
700; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
701; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
702; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
703; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
704; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
705; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000706; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
707; AVX2-NEXT: vpmovmskb %xmm0, %eax
708; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
709; AVX2-NEXT: vzeroupper
710; AVX2-NEXT: retq
711;
712; AVX512F-LABEL: v16i32:
713; AVX512F: # BB#0:
714; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
715; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
716; AVX512F-NEXT: kmovw %k0, %eax
717; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
718; AVX512F-NEXT: vzeroupper
719; AVX512F-NEXT: retq
720;
721; AVX512BW-LABEL: v16i32:
722; AVX512BW: # BB#0:
723; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
724; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
725; AVX512BW-NEXT: kmovd %k0, %eax
726; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
727; AVX512BW-NEXT: vzeroupper
728; AVX512BW-NEXT: retq
729 %x0 = icmp sgt <16 x i32> %a, %b
730 %x1 = icmp sgt <16 x i32> %c, %d
731 %y = and <16 x i1> %x0, %x1
732 %res = bitcast <16 x i1> %y to i16
733 ret i16 %res
734}
735
736define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
737; SSE-LABEL: v16f32:
738; SSE: # BB#0:
739; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
740; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
741; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
742; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
743; SSE-NEXT: cmpltps %xmm3, %xmm7
744; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
745; SSE-NEXT: pshufb %xmm3, %xmm7
746; SSE-NEXT: cmpltps %xmm2, %xmm6
747; SSE-NEXT: pshufb %xmm3, %xmm6
748; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000749; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
750; SSE-NEXT: pshufb %xmm2, %xmm6
751; SSE-NEXT: cmpltps %xmm1, %xmm5
752; SSE-NEXT: pshufb %xmm3, %xmm5
753; SSE-NEXT: cmpltps %xmm0, %xmm4
754; SSE-NEXT: pshufb %xmm3, %xmm4
755; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000756; SSE-NEXT: pshufb %xmm2, %xmm4
757; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000758; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
759; SSE-NEXT: pshufb %xmm3, %xmm11
760; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
761; SSE-NEXT: pshufb %xmm3, %xmm9
762; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000763; SSE-NEXT: pshufb %xmm2, %xmm9
764; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
765; SSE-NEXT: pshufb %xmm3, %xmm10
766; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
767; SSE-NEXT: pshufb %xmm3, %xmm8
768; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000769; SSE-NEXT: pshufb %xmm2, %xmm8
770; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000771; SSE-NEXT: pand %xmm4, %xmm8
772; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000773; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
774; SSE-NEXT: retq
775;
776; AVX12-LABEL: v16f32:
777; AVX12: # BB#0:
778; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
779; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
780; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
781; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
782; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
783; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
784; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
785; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
786; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
787; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000788; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
789; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
790; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
791; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
792; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
793; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
794; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
795; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
796; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000797; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
798; AVX12-NEXT: vpmovmskb %xmm0, %eax
799; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
800; AVX12-NEXT: vzeroupper
801; AVX12-NEXT: retq
802;
803; AVX512F-LABEL: v16f32:
804; AVX512F: # BB#0:
805; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
806; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
807; AVX512F-NEXT: kmovw %k0, %eax
808; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
809; AVX512F-NEXT: vzeroupper
810; AVX512F-NEXT: retq
811;
812; AVX512BW-LABEL: v16f32:
813; AVX512BW: # BB#0:
814; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
815; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
816; AVX512BW-NEXT: kmovd %k0, %eax
817; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
818; AVX512BW-NEXT: vzeroupper
819; AVX512BW-NEXT: retq
820 %x0 = fcmp ogt <16 x float> %a, %b
821 %x1 = fcmp ogt <16 x float> %c, %d
822 %y = and <16 x i1> %x0, %x1
823 %res = bitcast <16 x i1> %y to i16
824 ret i16 %res
825}
826
827define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
828; SSE-LABEL: v64i8:
829; SSE: # BB#0:
830; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
831; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
832; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
833; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
834; SSE-NEXT: pcmpgtb %xmm6, %xmm2
835; SSE-NEXT: pcmpgtb %xmm7, %xmm3
836; SSE-NEXT: pcmpgtb %xmm4, %xmm0
837; SSE-NEXT: pcmpgtb %xmm5, %xmm1
838; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
839; SSE-NEXT: pand %xmm2, %xmm8
840; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
841; SSE-NEXT: pand %xmm3, %xmm9
842; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
843; SSE-NEXT: pand %xmm0, %xmm10
844; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
845; SSE-NEXT: pand %xmm1, %xmm11
846; SSE-NEXT: pextrb $15, %xmm11, %eax
847; SSE-NEXT: andb $1, %al
848; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
849; SSE-NEXT: pextrb $14, %xmm11, %eax
850; SSE-NEXT: andb $1, %al
851; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
852; SSE-NEXT: pextrb $13, %xmm11, %eax
853; SSE-NEXT: andb $1, %al
854; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
855; SSE-NEXT: pextrb $12, %xmm11, %eax
856; SSE-NEXT: andb $1, %al
857; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
858; SSE-NEXT: pextrb $11, %xmm11, %eax
859; SSE-NEXT: andb $1, %al
860; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
861; SSE-NEXT: pextrb $10, %xmm11, %eax
862; SSE-NEXT: andb $1, %al
863; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
864; SSE-NEXT: pextrb $9, %xmm11, %eax
865; SSE-NEXT: andb $1, %al
866; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
867; SSE-NEXT: pextrb $8, %xmm11, %eax
868; SSE-NEXT: andb $1, %al
869; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
870; SSE-NEXT: pextrb $7, %xmm11, %eax
871; SSE-NEXT: andb $1, %al
872; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
873; SSE-NEXT: pextrb $6, %xmm11, %eax
874; SSE-NEXT: andb $1, %al
875; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
876; SSE-NEXT: pextrb $5, %xmm11, %eax
877; SSE-NEXT: andb $1, %al
878; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
879; SSE-NEXT: pextrb $4, %xmm11, %eax
880; SSE-NEXT: andb $1, %al
881; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
882; SSE-NEXT: pextrb $3, %xmm11, %eax
883; SSE-NEXT: andb $1, %al
884; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
885; SSE-NEXT: pextrb $2, %xmm11, %eax
886; SSE-NEXT: andb $1, %al
887; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
888; SSE-NEXT: pextrb $1, %xmm11, %eax
889; SSE-NEXT: andb $1, %al
890; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
891; SSE-NEXT: pextrb $0, %xmm11, %eax
892; SSE-NEXT: andb $1, %al
893; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
894; SSE-NEXT: pextrb $15, %xmm10, %eax
895; SSE-NEXT: andb $1, %al
896; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
897; SSE-NEXT: pextrb $14, %xmm10, %eax
898; SSE-NEXT: andb $1, %al
899; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
900; SSE-NEXT: pextrb $13, %xmm10, %eax
901; SSE-NEXT: andb $1, %al
902; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
903; SSE-NEXT: pextrb $12, %xmm10, %eax
904; SSE-NEXT: andb $1, %al
905; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
906; SSE-NEXT: pextrb $11, %xmm10, %eax
907; SSE-NEXT: andb $1, %al
908; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
909; SSE-NEXT: pextrb $10, %xmm10, %eax
910; SSE-NEXT: andb $1, %al
911; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
912; SSE-NEXT: pextrb $9, %xmm10, %eax
913; SSE-NEXT: andb $1, %al
914; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
915; SSE-NEXT: pextrb $8, %xmm10, %eax
916; SSE-NEXT: andb $1, %al
917; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
918; SSE-NEXT: pextrb $7, %xmm10, %eax
919; SSE-NEXT: andb $1, %al
920; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
921; SSE-NEXT: pextrb $6, %xmm10, %eax
922; SSE-NEXT: andb $1, %al
923; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
924; SSE-NEXT: pextrb $5, %xmm10, %eax
925; SSE-NEXT: andb $1, %al
926; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
927; SSE-NEXT: pextrb $4, %xmm10, %eax
928; SSE-NEXT: andb $1, %al
929; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
930; SSE-NEXT: pextrb $3, %xmm10, %eax
931; SSE-NEXT: andb $1, %al
932; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
933; SSE-NEXT: pextrb $2, %xmm10, %eax
934; SSE-NEXT: andb $1, %al
935; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
936; SSE-NEXT: pextrb $1, %xmm10, %eax
937; SSE-NEXT: andb $1, %al
938; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
939; SSE-NEXT: pextrb $0, %xmm10, %eax
940; SSE-NEXT: andb $1, %al
941; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
942; SSE-NEXT: pextrb $15, %xmm9, %eax
943; SSE-NEXT: andb $1, %al
944; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
945; SSE-NEXT: pextrb $14, %xmm9, %eax
946; SSE-NEXT: andb $1, %al
947; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
948; SSE-NEXT: pextrb $13, %xmm9, %eax
949; SSE-NEXT: andb $1, %al
950; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
951; SSE-NEXT: pextrb $12, %xmm9, %eax
952; SSE-NEXT: andb $1, %al
953; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
954; SSE-NEXT: pextrb $11, %xmm9, %eax
955; SSE-NEXT: andb $1, %al
956; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
957; SSE-NEXT: pextrb $10, %xmm9, %eax
958; SSE-NEXT: andb $1, %al
959; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
960; SSE-NEXT: pextrb $9, %xmm9, %eax
961; SSE-NEXT: andb $1, %al
962; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
963; SSE-NEXT: pextrb $8, %xmm9, %eax
964; SSE-NEXT: andb $1, %al
965; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
966; SSE-NEXT: pextrb $7, %xmm9, %eax
967; SSE-NEXT: andb $1, %al
968; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
969; SSE-NEXT: pextrb $6, %xmm9, %eax
970; SSE-NEXT: andb $1, %al
971; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
972; SSE-NEXT: pextrb $5, %xmm9, %eax
973; SSE-NEXT: andb $1, %al
974; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
975; SSE-NEXT: pextrb $4, %xmm9, %eax
976; SSE-NEXT: andb $1, %al
977; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
978; SSE-NEXT: pextrb $3, %xmm9, %eax
979; SSE-NEXT: andb $1, %al
980; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
981; SSE-NEXT: pextrb $2, %xmm9, %eax
982; SSE-NEXT: andb $1, %al
983; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
984; SSE-NEXT: pextrb $1, %xmm9, %eax
985; SSE-NEXT: andb $1, %al
986; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
987; SSE-NEXT: pextrb $0, %xmm9, %eax
988; SSE-NEXT: andb $1, %al
989; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
990; SSE-NEXT: pextrb $15, %xmm8, %eax
991; SSE-NEXT: andb $1, %al
992; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
993; SSE-NEXT: pextrb $14, %xmm8, %eax
994; SSE-NEXT: andb $1, %al
995; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
996; SSE-NEXT: pextrb $13, %xmm8, %eax
997; SSE-NEXT: andb $1, %al
998; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
999; SSE-NEXT: pextrb $12, %xmm8, %eax
1000; SSE-NEXT: andb $1, %al
1001; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1002; SSE-NEXT: pextrb $11, %xmm8, %eax
1003; SSE-NEXT: andb $1, %al
1004; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1005; SSE-NEXT: pextrb $10, %xmm8, %eax
1006; SSE-NEXT: andb $1, %al
1007; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1008; SSE-NEXT: pextrb $9, %xmm8, %eax
1009; SSE-NEXT: andb $1, %al
1010; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1011; SSE-NEXT: pextrb $8, %xmm8, %eax
1012; SSE-NEXT: andb $1, %al
1013; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1014; SSE-NEXT: pextrb $7, %xmm8, %eax
1015; SSE-NEXT: andb $1, %al
1016; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1017; SSE-NEXT: pextrb $6, %xmm8, %eax
1018; SSE-NEXT: andb $1, %al
1019; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1020; SSE-NEXT: pextrb $5, %xmm8, %eax
1021; SSE-NEXT: andb $1, %al
1022; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1023; SSE-NEXT: pextrb $4, %xmm8, %eax
1024; SSE-NEXT: andb $1, %al
1025; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1026; SSE-NEXT: pextrb $3, %xmm8, %eax
1027; SSE-NEXT: andb $1, %al
1028; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1029; SSE-NEXT: pextrb $2, %xmm8, %eax
1030; SSE-NEXT: andb $1, %al
1031; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1032; SSE-NEXT: pextrb $1, %xmm8, %eax
1033; SSE-NEXT: andb $1, %al
1034; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1035; SSE-NEXT: pextrb $0, %xmm8, %eax
1036; SSE-NEXT: andb $1, %al
1037; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1038; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1039; SSE-NEXT: shll $16, %eax
1040; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1041; SSE-NEXT: orl %eax, %ecx
1042; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1043; SSE-NEXT: shll $16, %edx
1044; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1045; SSE-NEXT: orl %edx, %eax
1046; SSE-NEXT: shlq $32, %rax
1047; SSE-NEXT: orq %rcx, %rax
1048; SSE-NEXT: retq
1049;
1050; AVX1-LABEL: v64i8:
1051; AVX1: # BB#0:
1052; AVX1-NEXT: pushq %rbp
1053; AVX1-NEXT: .Lcfi0:
1054; AVX1-NEXT: .cfi_def_cfa_offset 16
1055; AVX1-NEXT: .Lcfi1:
1056; AVX1-NEXT: .cfi_offset %rbp, -16
1057; AVX1-NEXT: movq %rsp, %rbp
1058; AVX1-NEXT: .Lcfi2:
1059; AVX1-NEXT: .cfi_def_cfa_register %rbp
1060; AVX1-NEXT: andq $-32, %rsp
1061; AVX1-NEXT: subq $64, %rsp
1062; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1063; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1064; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1065; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1066; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1067; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1068; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1069; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1070; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1071; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1072; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1073; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1074; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1075; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1076; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1077; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1078; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1079; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1080; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1081; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1082; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1083; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1084; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1085; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1086; AVX1-NEXT: andb $1, %al
1087; AVX1-NEXT: movb %al, (%rsp)
1088; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1089; AVX1-NEXT: andb $1, %al
1090; AVX1-NEXT: movb %al, (%rsp)
1091; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1092; AVX1-NEXT: andb $1, %al
1093; AVX1-NEXT: movb %al, (%rsp)
1094; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1095; AVX1-NEXT: andb $1, %al
1096; AVX1-NEXT: movb %al, (%rsp)
1097; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1098; AVX1-NEXT: andb $1, %al
1099; AVX1-NEXT: movb %al, (%rsp)
1100; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1101; AVX1-NEXT: andb $1, %al
1102; AVX1-NEXT: movb %al, (%rsp)
1103; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1104; AVX1-NEXT: andb $1, %al
1105; AVX1-NEXT: movb %al, (%rsp)
1106; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1107; AVX1-NEXT: andb $1, %al
1108; AVX1-NEXT: movb %al, (%rsp)
1109; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1110; AVX1-NEXT: andb $1, %al
1111; AVX1-NEXT: movb %al, (%rsp)
1112; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1113; AVX1-NEXT: andb $1, %al
1114; AVX1-NEXT: movb %al, (%rsp)
1115; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1116; AVX1-NEXT: andb $1, %al
1117; AVX1-NEXT: movb %al, (%rsp)
1118; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1119; AVX1-NEXT: andb $1, %al
1120; AVX1-NEXT: movb %al, (%rsp)
1121; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1122; AVX1-NEXT: andb $1, %al
1123; AVX1-NEXT: movb %al, (%rsp)
1124; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1125; AVX1-NEXT: andb $1, %al
1126; AVX1-NEXT: movb %al, (%rsp)
1127; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1128; AVX1-NEXT: andb $1, %al
1129; AVX1-NEXT: movb %al, (%rsp)
1130; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1131; AVX1-NEXT: andb $1, %al
1132; AVX1-NEXT: movb %al, (%rsp)
1133; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1134; AVX1-NEXT: andb $1, %al
1135; AVX1-NEXT: movb %al, (%rsp)
1136; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1137; AVX1-NEXT: andb $1, %al
1138; AVX1-NEXT: movb %al, (%rsp)
1139; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1140; AVX1-NEXT: andb $1, %al
1141; AVX1-NEXT: movb %al, (%rsp)
1142; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1143; AVX1-NEXT: andb $1, %al
1144; AVX1-NEXT: movb %al, (%rsp)
1145; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1146; AVX1-NEXT: andb $1, %al
1147; AVX1-NEXT: movb %al, (%rsp)
1148; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1149; AVX1-NEXT: andb $1, %al
1150; AVX1-NEXT: movb %al, (%rsp)
1151; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1152; AVX1-NEXT: andb $1, %al
1153; AVX1-NEXT: movb %al, (%rsp)
1154; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1155; AVX1-NEXT: andb $1, %al
1156; AVX1-NEXT: movb %al, (%rsp)
1157; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1158; AVX1-NEXT: andb $1, %al
1159; AVX1-NEXT: movb %al, (%rsp)
1160; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1161; AVX1-NEXT: andb $1, %al
1162; AVX1-NEXT: movb %al, (%rsp)
1163; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1164; AVX1-NEXT: andb $1, %al
1165; AVX1-NEXT: movb %al, (%rsp)
1166; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1167; AVX1-NEXT: andb $1, %al
1168; AVX1-NEXT: movb %al, (%rsp)
1169; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1170; AVX1-NEXT: andb $1, %al
1171; AVX1-NEXT: movb %al, (%rsp)
1172; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1173; AVX1-NEXT: andb $1, %al
1174; AVX1-NEXT: movb %al, (%rsp)
1175; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1176; AVX1-NEXT: andb $1, %al
1177; AVX1-NEXT: movb %al, (%rsp)
1178; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1179; AVX1-NEXT: andb $1, %al
1180; AVX1-NEXT: movb %al, (%rsp)
1181; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1182; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1183; AVX1-NEXT: andb $1, %al
1184; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1185; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1186; AVX1-NEXT: andb $1, %al
1187; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1188; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1189; AVX1-NEXT: andb $1, %al
1190; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1191; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1192; AVX1-NEXT: andb $1, %al
1193; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1194; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1195; AVX1-NEXT: andb $1, %al
1196; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1197; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1198; AVX1-NEXT: andb $1, %al
1199; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1200; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1201; AVX1-NEXT: andb $1, %al
1202; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1203; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1204; AVX1-NEXT: andb $1, %al
1205; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1206; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1207; AVX1-NEXT: andb $1, %al
1208; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1209; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1210; AVX1-NEXT: andb $1, %al
1211; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1212; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1213; AVX1-NEXT: andb $1, %al
1214; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1215; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1216; AVX1-NEXT: andb $1, %al
1217; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1218; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1219; AVX1-NEXT: andb $1, %al
1220; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1221; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1222; AVX1-NEXT: andb $1, %al
1223; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1224; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1225; AVX1-NEXT: andb $1, %al
1226; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1227; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1228; AVX1-NEXT: andb $1, %al
1229; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1230; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1231; AVX1-NEXT: andb $1, %al
1232; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1233; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1234; AVX1-NEXT: andb $1, %al
1235; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1236; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1237; AVX1-NEXT: andb $1, %al
1238; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1239; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1240; AVX1-NEXT: andb $1, %al
1241; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1242; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1243; AVX1-NEXT: andb $1, %al
1244; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1245; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1246; AVX1-NEXT: andb $1, %al
1247; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1248; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1249; AVX1-NEXT: andb $1, %al
1250; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1251; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1252; AVX1-NEXT: andb $1, %al
1253; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1254; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1255; AVX1-NEXT: andb $1, %al
1256; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1257; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1258; AVX1-NEXT: andb $1, %al
1259; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1260; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1261; AVX1-NEXT: andb $1, %al
1262; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1263; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1264; AVX1-NEXT: andb $1, %al
1265; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1266; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1267; AVX1-NEXT: andb $1, %al
1268; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1269; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1270; AVX1-NEXT: andb $1, %al
1271; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1272; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1273; AVX1-NEXT: andb $1, %al
1274; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1275; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1276; AVX1-NEXT: andb $1, %al
1277; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1278; AVX1-NEXT: movl (%rsp), %ecx
1279; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1280; AVX1-NEXT: shlq $32, %rax
1281; AVX1-NEXT: orq %rcx, %rax
1282; AVX1-NEXT: movq %rbp, %rsp
1283; AVX1-NEXT: popq %rbp
1284; AVX1-NEXT: vzeroupper
1285; AVX1-NEXT: retq
1286;
1287; AVX2-LABEL: v64i8:
1288; AVX2: # BB#0:
1289; AVX2-NEXT: pushq %rbp
1290; AVX2-NEXT: .Lcfi0:
1291; AVX2-NEXT: .cfi_def_cfa_offset 16
1292; AVX2-NEXT: .Lcfi1:
1293; AVX2-NEXT: .cfi_offset %rbp, -16
1294; AVX2-NEXT: movq %rsp, %rbp
1295; AVX2-NEXT: .Lcfi2:
1296; AVX2-NEXT: .cfi_def_cfa_register %rbp
1297; AVX2-NEXT: andq $-32, %rsp
1298; AVX2-NEXT: subq $64, %rsp
1299; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1300; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1301; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1302; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1303; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1304; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1305; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1306; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1307; AVX2-NEXT: andb $1, %al
1308; AVX2-NEXT: movb %al, (%rsp)
1309; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1310; AVX2-NEXT: andb $1, %al
1311; AVX2-NEXT: movb %al, (%rsp)
1312; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1313; AVX2-NEXT: andb $1, %al
1314; AVX2-NEXT: movb %al, (%rsp)
1315; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1316; AVX2-NEXT: andb $1, %al
1317; AVX2-NEXT: movb %al, (%rsp)
1318; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1319; AVX2-NEXT: andb $1, %al
1320; AVX2-NEXT: movb %al, (%rsp)
1321; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1322; AVX2-NEXT: andb $1, %al
1323; AVX2-NEXT: movb %al, (%rsp)
1324; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1325; AVX2-NEXT: andb $1, %al
1326; AVX2-NEXT: movb %al, (%rsp)
1327; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1328; AVX2-NEXT: andb $1, %al
1329; AVX2-NEXT: movb %al, (%rsp)
1330; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1331; AVX2-NEXT: andb $1, %al
1332; AVX2-NEXT: movb %al, (%rsp)
1333; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1334; AVX2-NEXT: andb $1, %al
1335; AVX2-NEXT: movb %al, (%rsp)
1336; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1337; AVX2-NEXT: andb $1, %al
1338; AVX2-NEXT: movb %al, (%rsp)
1339; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1340; AVX2-NEXT: andb $1, %al
1341; AVX2-NEXT: movb %al, (%rsp)
1342; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1343; AVX2-NEXT: andb $1, %al
1344; AVX2-NEXT: movb %al, (%rsp)
1345; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1346; AVX2-NEXT: andb $1, %al
1347; AVX2-NEXT: movb %al, (%rsp)
1348; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1349; AVX2-NEXT: andb $1, %al
1350; AVX2-NEXT: movb %al, (%rsp)
1351; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1352; AVX2-NEXT: andb $1, %al
1353; AVX2-NEXT: movb %al, (%rsp)
1354; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1355; AVX2-NEXT: andb $1, %al
1356; AVX2-NEXT: movb %al, (%rsp)
1357; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1358; AVX2-NEXT: andb $1, %al
1359; AVX2-NEXT: movb %al, (%rsp)
1360; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1361; AVX2-NEXT: andb $1, %al
1362; AVX2-NEXT: movb %al, (%rsp)
1363; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1364; AVX2-NEXT: andb $1, %al
1365; AVX2-NEXT: movb %al, (%rsp)
1366; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1367; AVX2-NEXT: andb $1, %al
1368; AVX2-NEXT: movb %al, (%rsp)
1369; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1370; AVX2-NEXT: andb $1, %al
1371; AVX2-NEXT: movb %al, (%rsp)
1372; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1373; AVX2-NEXT: andb $1, %al
1374; AVX2-NEXT: movb %al, (%rsp)
1375; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1376; AVX2-NEXT: andb $1, %al
1377; AVX2-NEXT: movb %al, (%rsp)
1378; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1379; AVX2-NEXT: andb $1, %al
1380; AVX2-NEXT: movb %al, (%rsp)
1381; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1382; AVX2-NEXT: andb $1, %al
1383; AVX2-NEXT: movb %al, (%rsp)
1384; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1385; AVX2-NEXT: andb $1, %al
1386; AVX2-NEXT: movb %al, (%rsp)
1387; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1388; AVX2-NEXT: andb $1, %al
1389; AVX2-NEXT: movb %al, (%rsp)
1390; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1391; AVX2-NEXT: andb $1, %al
1392; AVX2-NEXT: movb %al, (%rsp)
1393; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1394; AVX2-NEXT: andb $1, %al
1395; AVX2-NEXT: movb %al, (%rsp)
1396; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1397; AVX2-NEXT: andb $1, %al
1398; AVX2-NEXT: movb %al, (%rsp)
1399; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1400; AVX2-NEXT: andb $1, %al
1401; AVX2-NEXT: movb %al, (%rsp)
1402; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1403; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1404; AVX2-NEXT: andb $1, %al
1405; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1406; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1407; AVX2-NEXT: andb $1, %al
1408; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1409; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1410; AVX2-NEXT: andb $1, %al
1411; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1412; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1413; AVX2-NEXT: andb $1, %al
1414; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1415; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1416; AVX2-NEXT: andb $1, %al
1417; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1418; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1419; AVX2-NEXT: andb $1, %al
1420; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1421; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1422; AVX2-NEXT: andb $1, %al
1423; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1424; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1425; AVX2-NEXT: andb $1, %al
1426; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1427; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1428; AVX2-NEXT: andb $1, %al
1429; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1430; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1431; AVX2-NEXT: andb $1, %al
1432; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1433; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1434; AVX2-NEXT: andb $1, %al
1435; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1436; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1437; AVX2-NEXT: andb $1, %al
1438; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1439; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1440; AVX2-NEXT: andb $1, %al
1441; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1442; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1443; AVX2-NEXT: andb $1, %al
1444; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1445; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1446; AVX2-NEXT: andb $1, %al
1447; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1448; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1449; AVX2-NEXT: andb $1, %al
1450; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1451; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1452; AVX2-NEXT: andb $1, %al
1453; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1454; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1455; AVX2-NEXT: andb $1, %al
1456; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1457; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1458; AVX2-NEXT: andb $1, %al
1459; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1460; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1461; AVX2-NEXT: andb $1, %al
1462; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1463; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1464; AVX2-NEXT: andb $1, %al
1465; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1466; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1467; AVX2-NEXT: andb $1, %al
1468; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1469; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1470; AVX2-NEXT: andb $1, %al
1471; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1472; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1473; AVX2-NEXT: andb $1, %al
1474; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1475; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1476; AVX2-NEXT: andb $1, %al
1477; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1478; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1479; AVX2-NEXT: andb $1, %al
1480; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1481; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1482; AVX2-NEXT: andb $1, %al
1483; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1484; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1485; AVX2-NEXT: andb $1, %al
1486; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1487; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1488; AVX2-NEXT: andb $1, %al
1489; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1490; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1491; AVX2-NEXT: andb $1, %al
1492; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1493; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1494; AVX2-NEXT: andb $1, %al
1495; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1496; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1497; AVX2-NEXT: andb $1, %al
1498; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1499; AVX2-NEXT: movl (%rsp), %ecx
1500; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1501; AVX2-NEXT: shlq $32, %rax
1502; AVX2-NEXT: orq %rcx, %rax
1503; AVX2-NEXT: movq %rbp, %rsp
1504; AVX2-NEXT: popq %rbp
1505; AVX2-NEXT: vzeroupper
1506; AVX2-NEXT: retq
1507;
1508; AVX512F-LABEL: v64i8:
1509; AVX512F: # BB#0:
1510; AVX512F-NEXT: pushq %rbp
1511; AVX512F-NEXT: .Lcfi3:
1512; AVX512F-NEXT: .cfi_def_cfa_offset 16
1513; AVX512F-NEXT: .Lcfi4:
1514; AVX512F-NEXT: .cfi_offset %rbp, -16
1515; AVX512F-NEXT: movq %rsp, %rbp
1516; AVX512F-NEXT: .Lcfi5:
1517; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1518; AVX512F-NEXT: andq $-32, %rsp
1519; AVX512F-NEXT: subq $64, %rsp
1520; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1521; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1522; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1523; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1524; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1525; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1526; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1527; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1528; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1529; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1530; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1531; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1532; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1533; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1534; AVX512F-NEXT: kmovw %k0, (%rsp)
1535; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1536; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1537; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1538; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1539; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1540; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1541; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1542; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1543; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1544; AVX512F-NEXT: movl (%rsp), %ecx
1545; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1546; AVX512F-NEXT: shlq $32, %rax
1547; AVX512F-NEXT: orq %rcx, %rax
1548; AVX512F-NEXT: movq %rbp, %rsp
1549; AVX512F-NEXT: popq %rbp
1550; AVX512F-NEXT: vzeroupper
1551; AVX512F-NEXT: retq
1552;
1553; AVX512BW-LABEL: v64i8:
1554; AVX512BW: # BB#0:
1555; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1556; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1557; AVX512BW-NEXT: kmovq %k0, %rax
1558; AVX512BW-NEXT: vzeroupper
1559; AVX512BW-NEXT: retq
1560 %x0 = icmp sgt <64 x i8> %a, %b
1561 %x1 = icmp sgt <64 x i8> %c, %d
1562 %y = and <64 x i1> %x0, %x1
1563 %res = bitcast <64 x i1> %y to i64
1564 ret i64 %res
1565}