blob: 2eba79b0297f9aee89ff93469127746a95aff0c2 [file] [log] [blame]
Simon Pilgrimebbb9692017-07-12 12:44:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
Simon Pilgrim8dfbc772017-07-12 13:41:13 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
Simon Pilgrimebbb9692017-07-12 12:44:10 +00007
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
10; SSE: # BB#0:
11; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
12; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
14; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
15; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
18; SSE-NEXT: pslld $31, %xmm2
19; SSE-NEXT: psrad $31, %xmm2
20; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
21; SSE-NEXT: pshufb %xmm3, %xmm2
22; SSE-NEXT: pcmpgtq %xmm5, %xmm1
23; SSE-NEXT: pcmpgtq %xmm4, %xmm0
24; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
25; SSE-NEXT: pslld $31, %xmm0
26; SSE-NEXT: psrad $31, %xmm0
27; SSE-NEXT: pshufb %xmm3, %xmm0
28; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
29; SSE-NEXT: psllw $15, %xmm0
30; SSE-NEXT: psraw $15, %xmm0
31; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
32; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
33; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2],xmm11[0,2]
34; SSE-NEXT: pslld $31, %xmm9
35; SSE-NEXT: psrad $31, %xmm9
36; SSE-NEXT: pshufb %xmm3, %xmm9
37; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
38; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
39; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm10[0,2]
40; SSE-NEXT: pslld $31, %xmm8
41; SSE-NEXT: psrad $31, %xmm8
42; SSE-NEXT: pshufb %xmm3, %xmm8
43; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
44; SSE-NEXT: psllw $15, %xmm8
45; SSE-NEXT: psraw $15, %xmm8
46; SSE-NEXT: pand %xmm0, %xmm8
47; SSE-NEXT: pshufb {{.*#+}} xmm8 = xmm8[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
48; SSE-NEXT: pmovmskb %xmm8, %eax
49; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
50; SSE-NEXT: retq
51;
52; AVX1-LABEL: v8i64:
53; AVX1: # BB#0:
54; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
55; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
56; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
57; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
58; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
59; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
60; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
61; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
62; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
63; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
64; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
65; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
66; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
67; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
68; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
69; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
70; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
71; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
72; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
73; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
74; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
75; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
76; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
77; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
78; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
79; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
80; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
81; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
82; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
83; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
84; AVX1-NEXT: vpsraw $15, %xmm1, %xmm1
85; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
86; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
87; AVX1-NEXT: vpmovmskb %xmm0, %eax
88; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
89; AVX1-NEXT: vzeroupper
90; AVX1-NEXT: retq
91;
92; AVX2-LABEL: v8i64:
93; AVX2: # BB#0:
94; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
95; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
96; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
97; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
98; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
99; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
100; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
101; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
102; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
103; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
104; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
105; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
106; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
107; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
108; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
109; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
110; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
111; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
112; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
113; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
114; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
115; AVX2-NEXT: vpsllw $15, %xmm1, %xmm1
116; AVX2-NEXT: vpsraw $15, %xmm1, %xmm1
117; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
118; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
119; AVX2-NEXT: vpmovmskb %xmm0, %eax
120; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
121; AVX2-NEXT: vzeroupper
122; AVX2-NEXT: retq
123;
124; AVX512F-LABEL: v8i64:
125; AVX512F: # BB#0:
126; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
127; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
128; AVX512F-NEXT: kmovw %k0, %eax
129; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
130; AVX512F-NEXT: vzeroupper
131; AVX512F-NEXT: retq
132;
133; AVX512BW-LABEL: v8i64:
134; AVX512BW: # BB#0:
135; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
136; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
137; AVX512BW-NEXT: kmovd %k0, %eax
138; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
139; AVX512BW-NEXT: vzeroupper
140; AVX512BW-NEXT: retq
141 %x0 = icmp sgt <8 x i64> %a, %b
142 %x1 = icmp sgt <8 x i64> %c, %d
143 %y = and <8 x i1> %x0, %x1
144 %res = bitcast <8 x i1> %y to i8
145 ret i8 %res
146}
147
148define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
149; SSE-LABEL: v8f64:
150; SSE: # BB#0:
151; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
152; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
153; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
154; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
155; SSE-NEXT: cmpltpd %xmm3, %xmm7
156; SSE-NEXT: cmpltpd %xmm2, %xmm6
157; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
158; SSE-NEXT: pslld $31, %xmm6
159; SSE-NEXT: psrad $31, %xmm6
160; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
161; SSE-NEXT: pshufb %xmm2, %xmm6
162; SSE-NEXT: cmpltpd %xmm1, %xmm5
163; SSE-NEXT: cmpltpd %xmm0, %xmm4
164; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2]
165; SSE-NEXT: pslld $31, %xmm4
166; SSE-NEXT: psrad $31, %xmm4
167; SSE-NEXT: pshufb %xmm2, %xmm4
168; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
169; SSE-NEXT: psllw $15, %xmm4
170; SSE-NEXT: psraw $15, %xmm4
171; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
172; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
173; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2],xmm11[0,2]
174; SSE-NEXT: pslld $31, %xmm9
175; SSE-NEXT: psrad $31, %xmm9
176; SSE-NEXT: pshufb %xmm2, %xmm9
177; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
178; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
179; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm10[0,2]
180; SSE-NEXT: pslld $31, %xmm8
181; SSE-NEXT: psrad $31, %xmm8
182; SSE-NEXT: pshufb %xmm2, %xmm8
183; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
184; SSE-NEXT: psllw $15, %xmm8
185; SSE-NEXT: psraw $15, %xmm8
186; SSE-NEXT: pand %xmm4, %xmm8
187; SSE-NEXT: pshufb {{.*#+}} xmm8 = xmm8[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
188; SSE-NEXT: pmovmskb %xmm8, %eax
189; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
190; SSE-NEXT: retq
191;
192; AVX12-LABEL: v8f64:
193; AVX12: # BB#0:
194; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
195; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
196; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
197; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
198; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
199; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
200; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
201; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
202; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
203; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
204; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
205; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
206; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
207; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
208; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
209; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
210; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
211; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
212; AVX12-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
213; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
214; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
215; AVX12-NEXT: vpsllw $15, %xmm1, %xmm1
216; AVX12-NEXT: vpsraw $15, %xmm1, %xmm1
217; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
218; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
219; AVX12-NEXT: vpmovmskb %xmm0, %eax
220; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
221; AVX12-NEXT: vzeroupper
222; AVX12-NEXT: retq
223;
224; AVX512F-LABEL: v8f64:
225; AVX512F: # BB#0:
226; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
227; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
228; AVX512F-NEXT: kmovw %k0, %eax
229; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
230; AVX512F-NEXT: vzeroupper
231; AVX512F-NEXT: retq
232;
233; AVX512BW-LABEL: v8f64:
234; AVX512BW: # BB#0:
235; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
236; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
237; AVX512BW-NEXT: kmovd %k0, %eax
238; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
239; AVX512BW-NEXT: vzeroupper
240; AVX512BW-NEXT: retq
241 %x0 = fcmp ogt <8 x double> %a, %b
242 %x1 = fcmp ogt <8 x double> %c, %d
243 %y = and <8 x i1> %x0, %x1
244 %res = bitcast <8 x i1> %y to i8
245 ret i8 %res
246}
247
248define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
249; SSE-LABEL: v32i16:
250; SSE: # BB#0:
251; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
252; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
253; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
254; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
255; SSE-NEXT: pcmpgtw %xmm5, %xmm1
256; SSE-NEXT: movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
257; SSE-NEXT: pshufb %xmm5, %xmm1
258; SSE-NEXT: pcmpgtw %xmm4, %xmm0
259; SSE-NEXT: pshufb %xmm5, %xmm0
260; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
261; SSE-NEXT: pcmpgtw %xmm7, %xmm3
262; SSE-NEXT: pshufb %xmm5, %xmm3
263; SSE-NEXT: pcmpgtw %xmm6, %xmm2
264; SSE-NEXT: pshufb %xmm5, %xmm2
265; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
266; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
267; SSE-NEXT: pshufb %xmm5, %xmm11
268; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
269; SSE-NEXT: pshufb %xmm5, %xmm8
270; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm11[0]
271; SSE-NEXT: pand %xmm0, %xmm8
272; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
273; SSE-NEXT: pshufb %xmm5, %xmm10
274; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
275; SSE-NEXT: pshufb %xmm5, %xmm9
276; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm10[0]
277; SSE-NEXT: pand %xmm2, %xmm9
278; SSE-NEXT: pextrb $15, %xmm9, %eax
279; SSE-NEXT: andb $1, %al
280; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
281; SSE-NEXT: pextrb $14, %xmm9, %eax
282; SSE-NEXT: andb $1, %al
283; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
284; SSE-NEXT: pextrb $13, %xmm9, %eax
285; SSE-NEXT: andb $1, %al
286; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
287; SSE-NEXT: pextrb $12, %xmm9, %eax
288; SSE-NEXT: andb $1, %al
289; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
290; SSE-NEXT: pextrb $11, %xmm9, %eax
291; SSE-NEXT: andb $1, %al
292; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
293; SSE-NEXT: pextrb $10, %xmm9, %eax
294; SSE-NEXT: andb $1, %al
295; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
296; SSE-NEXT: pextrb $9, %xmm9, %eax
297; SSE-NEXT: andb $1, %al
298; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
299; SSE-NEXT: pextrb $8, %xmm9, %eax
300; SSE-NEXT: andb $1, %al
301; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
302; SSE-NEXT: pextrb $7, %xmm9, %eax
303; SSE-NEXT: andb $1, %al
304; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
305; SSE-NEXT: pextrb $6, %xmm9, %eax
306; SSE-NEXT: andb $1, %al
307; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
308; SSE-NEXT: pextrb $5, %xmm9, %eax
309; SSE-NEXT: andb $1, %al
310; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
311; SSE-NEXT: pextrb $4, %xmm9, %eax
312; SSE-NEXT: andb $1, %al
313; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
314; SSE-NEXT: pextrb $3, %xmm9, %eax
315; SSE-NEXT: andb $1, %al
316; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
317; SSE-NEXT: pextrb $2, %xmm9, %eax
318; SSE-NEXT: andb $1, %al
319; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
320; SSE-NEXT: pextrb $1, %xmm9, %eax
321; SSE-NEXT: andb $1, %al
322; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
323; SSE-NEXT: pextrb $0, %xmm9, %eax
324; SSE-NEXT: andb $1, %al
325; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
326; SSE-NEXT: pextrb $15, %xmm8, %eax
327; SSE-NEXT: andb $1, %al
328; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
329; SSE-NEXT: pextrb $14, %xmm8, %eax
330; SSE-NEXT: andb $1, %al
331; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
332; SSE-NEXT: pextrb $13, %xmm8, %eax
333; SSE-NEXT: andb $1, %al
334; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
335; SSE-NEXT: pextrb $12, %xmm8, %eax
336; SSE-NEXT: andb $1, %al
337; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
338; SSE-NEXT: pextrb $11, %xmm8, %eax
339; SSE-NEXT: andb $1, %al
340; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
341; SSE-NEXT: pextrb $10, %xmm8, %eax
342; SSE-NEXT: andb $1, %al
343; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
344; SSE-NEXT: pextrb $9, %xmm8, %eax
345; SSE-NEXT: andb $1, %al
346; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
347; SSE-NEXT: pextrb $8, %xmm8, %eax
348; SSE-NEXT: andb $1, %al
349; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
350; SSE-NEXT: pextrb $7, %xmm8, %eax
351; SSE-NEXT: andb $1, %al
352; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
353; SSE-NEXT: pextrb $6, %xmm8, %eax
354; SSE-NEXT: andb $1, %al
355; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
356; SSE-NEXT: pextrb $5, %xmm8, %eax
357; SSE-NEXT: andb $1, %al
358; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
359; SSE-NEXT: pextrb $4, %xmm8, %eax
360; SSE-NEXT: andb $1, %al
361; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
362; SSE-NEXT: pextrb $3, %xmm8, %eax
363; SSE-NEXT: andb $1, %al
364; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
365; SSE-NEXT: pextrb $2, %xmm8, %eax
366; SSE-NEXT: andb $1, %al
367; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
368; SSE-NEXT: pextrb $1, %xmm8, %eax
369; SSE-NEXT: andb $1, %al
370; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
371; SSE-NEXT: pextrb $0, %xmm8, %eax
372; SSE-NEXT: andb $1, %al
373; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
374; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
375; SSE-NEXT: shll $16, %ecx
376; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
377; SSE-NEXT: orl %ecx, %eax
378; SSE-NEXT: retq
379;
380; AVX1-LABEL: v32i16:
381; AVX1: # BB#0:
382; AVX1-NEXT: pushq %rbp
383; AVX1-NEXT: .Lcfi0:
384; AVX1-NEXT: .cfi_def_cfa_offset 16
385; AVX1-NEXT: .Lcfi1:
386; AVX1-NEXT: .cfi_offset %rbp, -16
387; AVX1-NEXT: movq %rsp, %rbp
388; AVX1-NEXT: .Lcfi2:
389; AVX1-NEXT: .cfi_def_cfa_register %rbp
390; AVX1-NEXT: andq $-32, %rsp
391; AVX1-NEXT: subq $32, %rsp
392; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
393; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
394; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
395; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
396; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
397; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
398; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
399; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
400; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
401; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
402; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm0, %ymm0
403; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
404; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
405; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
406; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
407; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
408; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
409; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
410; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
411; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
412; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
413; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
414; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
415; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
416; AVX1-NEXT: vpextrb $15, %xmm1, %eax
417; AVX1-NEXT: andb $1, %al
418; AVX1-NEXT: movb %al, (%rsp)
419; AVX1-NEXT: vpextrb $14, %xmm1, %eax
420; AVX1-NEXT: andb $1, %al
421; AVX1-NEXT: movb %al, (%rsp)
422; AVX1-NEXT: vpextrb $13, %xmm1, %eax
423; AVX1-NEXT: andb $1, %al
424; AVX1-NEXT: movb %al, (%rsp)
425; AVX1-NEXT: vpextrb $12, %xmm1, %eax
426; AVX1-NEXT: andb $1, %al
427; AVX1-NEXT: movb %al, (%rsp)
428; AVX1-NEXT: vpextrb $11, %xmm1, %eax
429; AVX1-NEXT: andb $1, %al
430; AVX1-NEXT: movb %al, (%rsp)
431; AVX1-NEXT: vpextrb $10, %xmm1, %eax
432; AVX1-NEXT: andb $1, %al
433; AVX1-NEXT: movb %al, (%rsp)
434; AVX1-NEXT: vpextrb $9, %xmm1, %eax
435; AVX1-NEXT: andb $1, %al
436; AVX1-NEXT: movb %al, (%rsp)
437; AVX1-NEXT: vpextrb $8, %xmm1, %eax
438; AVX1-NEXT: andb $1, %al
439; AVX1-NEXT: movb %al, (%rsp)
440; AVX1-NEXT: vpextrb $7, %xmm1, %eax
441; AVX1-NEXT: andb $1, %al
442; AVX1-NEXT: movb %al, (%rsp)
443; AVX1-NEXT: vpextrb $6, %xmm1, %eax
444; AVX1-NEXT: andb $1, %al
445; AVX1-NEXT: movb %al, (%rsp)
446; AVX1-NEXT: vpextrb $5, %xmm1, %eax
447; AVX1-NEXT: andb $1, %al
448; AVX1-NEXT: movb %al, (%rsp)
449; AVX1-NEXT: vpextrb $4, %xmm1, %eax
450; AVX1-NEXT: andb $1, %al
451; AVX1-NEXT: movb %al, (%rsp)
452; AVX1-NEXT: vpextrb $3, %xmm1, %eax
453; AVX1-NEXT: andb $1, %al
454; AVX1-NEXT: movb %al, (%rsp)
455; AVX1-NEXT: vpextrb $2, %xmm1, %eax
456; AVX1-NEXT: andb $1, %al
457; AVX1-NEXT: movb %al, (%rsp)
458; AVX1-NEXT: vpextrb $1, %xmm1, %eax
459; AVX1-NEXT: andb $1, %al
460; AVX1-NEXT: movb %al, (%rsp)
461; AVX1-NEXT: vpextrb $0, %xmm1, %eax
462; AVX1-NEXT: andb $1, %al
463; AVX1-NEXT: movb %al, (%rsp)
464; AVX1-NEXT: vpextrb $15, %xmm0, %eax
465; AVX1-NEXT: andb $1, %al
466; AVX1-NEXT: movb %al, (%rsp)
467; AVX1-NEXT: vpextrb $14, %xmm0, %eax
468; AVX1-NEXT: andb $1, %al
469; AVX1-NEXT: movb %al, (%rsp)
470; AVX1-NEXT: vpextrb $13, %xmm0, %eax
471; AVX1-NEXT: andb $1, %al
472; AVX1-NEXT: movb %al, (%rsp)
473; AVX1-NEXT: vpextrb $12, %xmm0, %eax
474; AVX1-NEXT: andb $1, %al
475; AVX1-NEXT: movb %al, (%rsp)
476; AVX1-NEXT: vpextrb $11, %xmm0, %eax
477; AVX1-NEXT: andb $1, %al
478; AVX1-NEXT: movb %al, (%rsp)
479; AVX1-NEXT: vpextrb $10, %xmm0, %eax
480; AVX1-NEXT: andb $1, %al
481; AVX1-NEXT: movb %al, (%rsp)
482; AVX1-NEXT: vpextrb $9, %xmm0, %eax
483; AVX1-NEXT: andb $1, %al
484; AVX1-NEXT: movb %al, (%rsp)
485; AVX1-NEXT: vpextrb $8, %xmm0, %eax
486; AVX1-NEXT: andb $1, %al
487; AVX1-NEXT: movb %al, (%rsp)
488; AVX1-NEXT: vpextrb $7, %xmm0, %eax
489; AVX1-NEXT: andb $1, %al
490; AVX1-NEXT: movb %al, (%rsp)
491; AVX1-NEXT: vpextrb $6, %xmm0, %eax
492; AVX1-NEXT: andb $1, %al
493; AVX1-NEXT: movb %al, (%rsp)
494; AVX1-NEXT: vpextrb $5, %xmm0, %eax
495; AVX1-NEXT: andb $1, %al
496; AVX1-NEXT: movb %al, (%rsp)
497; AVX1-NEXT: vpextrb $4, %xmm0, %eax
498; AVX1-NEXT: andb $1, %al
499; AVX1-NEXT: movb %al, (%rsp)
500; AVX1-NEXT: vpextrb $3, %xmm0, %eax
501; AVX1-NEXT: andb $1, %al
502; AVX1-NEXT: movb %al, (%rsp)
503; AVX1-NEXT: vpextrb $2, %xmm0, %eax
504; AVX1-NEXT: andb $1, %al
505; AVX1-NEXT: movb %al, (%rsp)
506; AVX1-NEXT: vpextrb $1, %xmm0, %eax
507; AVX1-NEXT: andb $1, %al
508; AVX1-NEXT: movb %al, (%rsp)
509; AVX1-NEXT: vpextrb $0, %xmm0, %eax
510; AVX1-NEXT: andb $1, %al
511; AVX1-NEXT: movb %al, (%rsp)
512; AVX1-NEXT: movl (%rsp), %eax
513; AVX1-NEXT: movq %rbp, %rsp
514; AVX1-NEXT: popq %rbp
515; AVX1-NEXT: vzeroupper
516; AVX1-NEXT: retq
517;
518; AVX2-LABEL: v32i16:
519; AVX2: # BB#0:
520; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
521; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
522; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
523; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
524; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
525; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
526; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
527; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
528; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
529; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
530; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
531; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
532; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
533; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
534; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
535; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
536; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
537; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
538; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
539; AVX2-NEXT: vpmovmskb %ymm0, %eax
540; AVX2-NEXT: vzeroupper
541; AVX2-NEXT: retq
542;
543; AVX512F-LABEL: v32i16:
544; AVX512F: # BB#0:
545; AVX512F-NEXT: pushq %rbp
546; AVX512F-NEXT: .Lcfi0:
547; AVX512F-NEXT: .cfi_def_cfa_offset 16
548; AVX512F-NEXT: .Lcfi1:
549; AVX512F-NEXT: .cfi_offset %rbp, -16
550; AVX512F-NEXT: movq %rsp, %rbp
551; AVX512F-NEXT: .Lcfi2:
552; AVX512F-NEXT: .cfi_def_cfa_register %rbp
553; AVX512F-NEXT: andq $-32, %rsp
554; AVX512F-NEXT: subq $32, %rsp
555; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
556; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
557; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
558; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
559; AVX512F-NEXT: kshiftlw $14, %k0, %k1
560; AVX512F-NEXT: kshiftrw $15, %k1, %k1
561; AVX512F-NEXT: kmovw %k1, %eax
562; AVX512F-NEXT: kshiftlw $15, %k0, %k1
563; AVX512F-NEXT: kshiftrw $15, %k1, %k1
564; AVX512F-NEXT: kmovw %k1, %ecx
565; AVX512F-NEXT: vmovd %ecx, %xmm1
566; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
567; AVX512F-NEXT: kshiftlw $13, %k0, %k1
568; AVX512F-NEXT: kshiftrw $15, %k1, %k1
569; AVX512F-NEXT: kmovw %k1, %eax
570; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
571; AVX512F-NEXT: kshiftlw $12, %k0, %k1
572; AVX512F-NEXT: kshiftrw $15, %k1, %k1
573; AVX512F-NEXT: kmovw %k1, %eax
574; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
575; AVX512F-NEXT: kshiftlw $11, %k0, %k1
576; AVX512F-NEXT: kshiftrw $15, %k1, %k1
577; AVX512F-NEXT: kmovw %k1, %eax
578; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
579; AVX512F-NEXT: kshiftlw $10, %k0, %k1
580; AVX512F-NEXT: kshiftrw $15, %k1, %k1
581; AVX512F-NEXT: kmovw %k1, %eax
582; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
583; AVX512F-NEXT: kshiftlw $9, %k0, %k1
584; AVX512F-NEXT: kshiftrw $15, %k1, %k1
585; AVX512F-NEXT: kmovw %k1, %eax
586; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
587; AVX512F-NEXT: kshiftlw $8, %k0, %k1
588; AVX512F-NEXT: kshiftrw $15, %k1, %k1
589; AVX512F-NEXT: kmovw %k1, %eax
590; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
591; AVX512F-NEXT: kshiftlw $7, %k0, %k1
592; AVX512F-NEXT: kshiftrw $15, %k1, %k1
593; AVX512F-NEXT: kmovw %k1, %eax
594; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
595; AVX512F-NEXT: kshiftlw $6, %k0, %k1
596; AVX512F-NEXT: kshiftrw $15, %k1, %k1
597; AVX512F-NEXT: kmovw %k1, %eax
598; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
599; AVX512F-NEXT: kshiftlw $5, %k0, %k1
600; AVX512F-NEXT: kshiftrw $15, %k1, %k1
601; AVX512F-NEXT: kmovw %k1, %eax
602; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
603; AVX512F-NEXT: kshiftlw $4, %k0, %k1
604; AVX512F-NEXT: kshiftrw $15, %k1, %k1
605; AVX512F-NEXT: kmovw %k1, %eax
606; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
607; AVX512F-NEXT: kshiftlw $3, %k0, %k1
608; AVX512F-NEXT: kshiftrw $15, %k1, %k1
609; AVX512F-NEXT: kmovw %k1, %eax
610; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
611; AVX512F-NEXT: kshiftlw $2, %k0, %k1
612; AVX512F-NEXT: kshiftrw $15, %k1, %k1
613; AVX512F-NEXT: kmovw %k1, %eax
614; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
615; AVX512F-NEXT: kshiftlw $1, %k0, %k1
616; AVX512F-NEXT: kshiftrw $15, %k1, %k1
617; AVX512F-NEXT: kmovw %k1, %eax
618; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
619; AVX512F-NEXT: kshiftrw $15, %k0, %k0
620; AVX512F-NEXT: kmovw %k0, %eax
621; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
622; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
623; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
624; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
625; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
626; AVX512F-NEXT: kshiftlw $14, %k0, %k1
627; AVX512F-NEXT: kshiftrw $15, %k1, %k1
628; AVX512F-NEXT: kmovw %k1, %eax
629; AVX512F-NEXT: kshiftlw $15, %k0, %k1
630; AVX512F-NEXT: kshiftrw $15, %k1, %k1
631; AVX512F-NEXT: kmovw %k1, %ecx
632; AVX512F-NEXT: vmovd %ecx, %xmm0
633; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
634; AVX512F-NEXT: kshiftlw $13, %k0, %k1
635; AVX512F-NEXT: kshiftrw $15, %k1, %k1
636; AVX512F-NEXT: kmovw %k1, %eax
637; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
638; AVX512F-NEXT: kshiftlw $12, %k0, %k1
639; AVX512F-NEXT: kshiftrw $15, %k1, %k1
640; AVX512F-NEXT: kmovw %k1, %eax
641; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
642; AVX512F-NEXT: kshiftlw $11, %k0, %k1
643; AVX512F-NEXT: kshiftrw $15, %k1, %k1
644; AVX512F-NEXT: kmovw %k1, %eax
645; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
646; AVX512F-NEXT: kshiftlw $10, %k0, %k1
647; AVX512F-NEXT: kshiftrw $15, %k1, %k1
648; AVX512F-NEXT: kmovw %k1, %eax
649; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
650; AVX512F-NEXT: kshiftlw $9, %k0, %k1
651; AVX512F-NEXT: kshiftrw $15, %k1, %k1
652; AVX512F-NEXT: kmovw %k1, %eax
653; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
654; AVX512F-NEXT: kshiftlw $8, %k0, %k1
655; AVX512F-NEXT: kshiftrw $15, %k1, %k1
656; AVX512F-NEXT: kmovw %k1, %eax
657; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
658; AVX512F-NEXT: kshiftlw $7, %k0, %k1
659; AVX512F-NEXT: kshiftrw $15, %k1, %k1
660; AVX512F-NEXT: kmovw %k1, %eax
661; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
662; AVX512F-NEXT: kshiftlw $6, %k0, %k1
663; AVX512F-NEXT: kshiftrw $15, %k1, %k1
664; AVX512F-NEXT: kmovw %k1, %eax
665; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
666; AVX512F-NEXT: kshiftlw $5, %k0, %k1
667; AVX512F-NEXT: kshiftrw $15, %k1, %k1
668; AVX512F-NEXT: kmovw %k1, %eax
669; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
670; AVX512F-NEXT: kshiftlw $4, %k0, %k1
671; AVX512F-NEXT: kshiftrw $15, %k1, %k1
672; AVX512F-NEXT: kmovw %k1, %eax
673; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
674; AVX512F-NEXT: kshiftlw $3, %k0, %k1
675; AVX512F-NEXT: kshiftrw $15, %k1, %k1
676; AVX512F-NEXT: kmovw %k1, %eax
677; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
678; AVX512F-NEXT: kshiftlw $2, %k0, %k1
679; AVX512F-NEXT: kshiftrw $15, %k1, %k1
680; AVX512F-NEXT: kmovw %k1, %eax
681; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
682; AVX512F-NEXT: kshiftlw $1, %k0, %k1
683; AVX512F-NEXT: kshiftrw $15, %k1, %k1
684; AVX512F-NEXT: kmovw %k1, %eax
685; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
686; AVX512F-NEXT: kshiftrw $15, %k0, %k0
687; AVX512F-NEXT: kmovw %k0, %eax
688; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
689; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
690; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
691; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
692; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
693; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
694; AVX512F-NEXT: kshiftlw $14, %k0, %k1
695; AVX512F-NEXT: kshiftrw $15, %k1, %k1
696; AVX512F-NEXT: kmovw %k1, %eax
697; AVX512F-NEXT: kshiftlw $15, %k0, %k1
698; AVX512F-NEXT: kshiftrw $15, %k1, %k1
699; AVX512F-NEXT: kmovw %k1, %ecx
700; AVX512F-NEXT: vmovd %ecx, %xmm1
701; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
702; AVX512F-NEXT: kshiftlw $13, %k0, %k1
703; AVX512F-NEXT: kshiftrw $15, %k1, %k1
704; AVX512F-NEXT: kmovw %k1, %eax
705; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
706; AVX512F-NEXT: kshiftlw $12, %k0, %k1
707; AVX512F-NEXT: kshiftrw $15, %k1, %k1
708; AVX512F-NEXT: kmovw %k1, %eax
709; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
710; AVX512F-NEXT: kshiftlw $11, %k0, %k1
711; AVX512F-NEXT: kshiftrw $15, %k1, %k1
712; AVX512F-NEXT: kmovw %k1, %eax
713; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
714; AVX512F-NEXT: kshiftlw $10, %k0, %k1
715; AVX512F-NEXT: kshiftrw $15, %k1, %k1
716; AVX512F-NEXT: kmovw %k1, %eax
717; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
718; AVX512F-NEXT: kshiftlw $9, %k0, %k1
719; AVX512F-NEXT: kshiftrw $15, %k1, %k1
720; AVX512F-NEXT: kmovw %k1, %eax
721; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
722; AVX512F-NEXT: kshiftlw $8, %k0, %k1
723; AVX512F-NEXT: kshiftrw $15, %k1, %k1
724; AVX512F-NEXT: kmovw %k1, %eax
725; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
726; AVX512F-NEXT: kshiftlw $7, %k0, %k1
727; AVX512F-NEXT: kshiftrw $15, %k1, %k1
728; AVX512F-NEXT: kmovw %k1, %eax
729; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
730; AVX512F-NEXT: kshiftlw $6, %k0, %k1
731; AVX512F-NEXT: kshiftrw $15, %k1, %k1
732; AVX512F-NEXT: kmovw %k1, %eax
733; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
734; AVX512F-NEXT: kshiftlw $5, %k0, %k1
735; AVX512F-NEXT: kshiftrw $15, %k1, %k1
736; AVX512F-NEXT: kmovw %k1, %eax
737; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
738; AVX512F-NEXT: kshiftlw $4, %k0, %k1
739; AVX512F-NEXT: kshiftrw $15, %k1, %k1
740; AVX512F-NEXT: kmovw %k1, %eax
741; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
742; AVX512F-NEXT: kshiftlw $3, %k0, %k1
743; AVX512F-NEXT: kshiftrw $15, %k1, %k1
744; AVX512F-NEXT: kmovw %k1, %eax
745; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
746; AVX512F-NEXT: kshiftlw $2, %k0, %k1
747; AVX512F-NEXT: kshiftrw $15, %k1, %k1
748; AVX512F-NEXT: kmovw %k1, %eax
749; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
750; AVX512F-NEXT: kshiftlw $1, %k0, %k1
751; AVX512F-NEXT: kshiftrw $15, %k1, %k1
752; AVX512F-NEXT: kmovw %k1, %eax
753; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
754; AVX512F-NEXT: kshiftrw $15, %k0, %k0
755; AVX512F-NEXT: kmovw %k0, %eax
756; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
757; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
758; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
759; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
760; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
761; AVX512F-NEXT: kshiftlw $14, %k0, %k1
762; AVX512F-NEXT: kshiftrw $15, %k1, %k1
763; AVX512F-NEXT: kmovw %k1, %eax
764; AVX512F-NEXT: kshiftlw $15, %k0, %k1
765; AVX512F-NEXT: kshiftrw $15, %k1, %k1
766; AVX512F-NEXT: kmovw %k1, %ecx
767; AVX512F-NEXT: vmovd %ecx, %xmm2
768; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
769; AVX512F-NEXT: kshiftlw $13, %k0, %k1
770; AVX512F-NEXT: kshiftrw $15, %k1, %k1
771; AVX512F-NEXT: kmovw %k1, %eax
772; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
773; AVX512F-NEXT: kshiftlw $12, %k0, %k1
774; AVX512F-NEXT: kshiftrw $15, %k1, %k1
775; AVX512F-NEXT: kmovw %k1, %eax
776; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
777; AVX512F-NEXT: kshiftlw $11, %k0, %k1
778; AVX512F-NEXT: kshiftrw $15, %k1, %k1
779; AVX512F-NEXT: kmovw %k1, %eax
780; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
781; AVX512F-NEXT: kshiftlw $10, %k0, %k1
782; AVX512F-NEXT: kshiftrw $15, %k1, %k1
783; AVX512F-NEXT: kmovw %k1, %eax
784; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
785; AVX512F-NEXT: kshiftlw $9, %k0, %k1
786; AVX512F-NEXT: kshiftrw $15, %k1, %k1
787; AVX512F-NEXT: kmovw %k1, %eax
788; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
789; AVX512F-NEXT: kshiftlw $8, %k0, %k1
790; AVX512F-NEXT: kshiftrw $15, %k1, %k1
791; AVX512F-NEXT: kmovw %k1, %eax
792; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
793; AVX512F-NEXT: kshiftlw $7, %k0, %k1
794; AVX512F-NEXT: kshiftrw $15, %k1, %k1
795; AVX512F-NEXT: kmovw %k1, %eax
796; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
797; AVX512F-NEXT: kshiftlw $6, %k0, %k1
798; AVX512F-NEXT: kshiftrw $15, %k1, %k1
799; AVX512F-NEXT: kmovw %k1, %eax
800; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
801; AVX512F-NEXT: kshiftlw $5, %k0, %k1
802; AVX512F-NEXT: kshiftrw $15, %k1, %k1
803; AVX512F-NEXT: kmovw %k1, %eax
804; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
805; AVX512F-NEXT: kshiftlw $4, %k0, %k1
806; AVX512F-NEXT: kshiftrw $15, %k1, %k1
807; AVX512F-NEXT: kmovw %k1, %eax
808; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
809; AVX512F-NEXT: kshiftlw $3, %k0, %k1
810; AVX512F-NEXT: kshiftrw $15, %k1, %k1
811; AVX512F-NEXT: kmovw %k1, %eax
812; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
813; AVX512F-NEXT: kshiftlw $2, %k0, %k1
814; AVX512F-NEXT: kshiftrw $15, %k1, %k1
815; AVX512F-NEXT: kmovw %k1, %eax
816; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
817; AVX512F-NEXT: kshiftlw $1, %k0, %k1
818; AVX512F-NEXT: kshiftrw $15, %k1, %k1
819; AVX512F-NEXT: kmovw %k1, %eax
820; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
821; AVX512F-NEXT: kshiftrw $15, %k0, %k0
822; AVX512F-NEXT: kmovw %k0, %eax
823; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
824; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
825; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
826; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
827; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
828; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
829; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
830; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
831; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
832; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
833; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
834; AVX512F-NEXT: kmovw %k0, (%rsp)
835; AVX512F-NEXT: movl (%rsp), %eax
836; AVX512F-NEXT: movq %rbp, %rsp
837; AVX512F-NEXT: popq %rbp
838; AVX512F-NEXT: vzeroupper
839; AVX512F-NEXT: retq
840;
841; AVX512BW-LABEL: v32i16:
842; AVX512BW: # BB#0:
843; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
844; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
845; AVX512BW-NEXT: kmovd %k0, %eax
846; AVX512BW-NEXT: vzeroupper
847; AVX512BW-NEXT: retq
848 %x0 = icmp sgt <32 x i16> %a, %b
849 %x1 = icmp sgt <32 x i16> %c, %d
850 %y = and <32 x i1> %x0, %x1
851 %res = bitcast <32 x i1> %y to i32
852 ret i32 %res
853}
854
855define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
856; SSE-LABEL: v16i32:
857; SSE: # BB#0:
858; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
859; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
860; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
861; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
862; SSE-NEXT: pcmpgtd %xmm7, %xmm3
863; SSE-NEXT: movdqa {{.*#+}} xmm7 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
864; SSE-NEXT: pshufb %xmm7, %xmm3
865; SSE-NEXT: pcmpgtd %xmm6, %xmm2
866; SSE-NEXT: pshufb %xmm7, %xmm2
867; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
868; SSE-NEXT: psllw $15, %xmm2
869; SSE-NEXT: psraw $15, %xmm2
870; SSE-NEXT: movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
871; SSE-NEXT: pshufb %xmm3, %xmm2
872; SSE-NEXT: pcmpgtd %xmm5, %xmm1
873; SSE-NEXT: pshufb %xmm7, %xmm1
874; SSE-NEXT: pcmpgtd %xmm4, %xmm0
875; SSE-NEXT: pshufb %xmm7, %xmm0
876; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
877; SSE-NEXT: psllw $15, %xmm0
878; SSE-NEXT: psraw $15, %xmm0
879; SSE-NEXT: pshufb %xmm3, %xmm0
880; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
881; SSE-NEXT: psllw $7, %xmm0
882; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
883; SSE-NEXT: pand %xmm2, %xmm0
884; SSE-NEXT: pxor %xmm1, %xmm1
885; SSE-NEXT: pxor %xmm4, %xmm4
886; SSE-NEXT: pcmpgtb %xmm0, %xmm4
887; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
888; SSE-NEXT: pshufb %xmm7, %xmm11
889; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
890; SSE-NEXT: pshufb %xmm7, %xmm9
891; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
892; SSE-NEXT: psllw $15, %xmm9
893; SSE-NEXT: psraw $15, %xmm9
894; SSE-NEXT: pshufb %xmm3, %xmm9
895; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
896; SSE-NEXT: pshufb %xmm7, %xmm10
897; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
898; SSE-NEXT: pshufb %xmm7, %xmm8
899; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
900; SSE-NEXT: psllw $15, %xmm8
901; SSE-NEXT: psraw $15, %xmm8
902; SSE-NEXT: pshufb %xmm3, %xmm8
903; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
904; SSE-NEXT: psllw $7, %xmm8
905; SSE-NEXT: pand %xmm2, %xmm8
906; SSE-NEXT: pcmpgtb %xmm8, %xmm1
907; SSE-NEXT: pand %xmm4, %xmm1
908; SSE-NEXT: pmovmskb %xmm1, %eax
909; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
910; SSE-NEXT: retq
911;
912; AVX1-LABEL: v16i32:
913; AVX1: # BB#0:
914; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
915; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
916; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
917; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
918; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm1
919; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
920; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
921; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
922; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
923; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
924; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
925; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
926; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
927; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
928; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
929; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
930; AVX1-NEXT: vpand %xmm9, %xmm0, %xmm0
931; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
932; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
933; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm3
934; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
935; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
936; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm3
937; AVX1-NEXT: vpacksswb %xmm1, %xmm3, %xmm1
938; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
939; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm3
940; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
941; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm3
942; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm4
943; AVX1-NEXT: vpacksswb %xmm3, %xmm4, %xmm3
944; AVX1-NEXT: vpshufb %xmm8, %xmm3, %xmm3
945; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
946; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
947; AVX1-NEXT: vpand %xmm9, %xmm1, %xmm1
948; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
949; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
950; AVX1-NEXT: vpmovmskb %xmm0, %eax
951; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
952; AVX1-NEXT: vzeroupper
953; AVX1-NEXT: retq
954;
955; AVX2-LABEL: v16i32:
956; AVX2: # BB#0:
957; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
958; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
959; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
960; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
961; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
962; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
963; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
964; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
965; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
966; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
967; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
968; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
969; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
970; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
971; AVX2-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
972; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm5
973; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm7
974; AVX2-NEXT: vpacksswb %xmm7, %xmm5, %xmm5
975; AVX2-NEXT: vpshufb %xmm3, %xmm5, %xmm5
976; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm4
977; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm6
978; AVX2-NEXT: vpacksswb %xmm6, %xmm4, %xmm4
979; AVX2-NEXT: vpshufb %xmm3, %xmm4, %xmm3
980; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
981; AVX2-NEXT: vpsllw $7, %xmm3, %xmm3
982; AVX2-NEXT: vpand %xmm1, %xmm3, %xmm1
983; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
984; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
985; AVX2-NEXT: vpmovmskb %xmm0, %eax
986; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
987; AVX2-NEXT: vzeroupper
988; AVX2-NEXT: retq
989;
990; AVX512F-LABEL: v16i32:
991; AVX512F: # BB#0:
992; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
993; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
994; AVX512F-NEXT: kmovw %k0, %eax
995; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
996; AVX512F-NEXT: vzeroupper
997; AVX512F-NEXT: retq
998;
999; AVX512BW-LABEL: v16i32:
1000; AVX512BW: # BB#0:
1001; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
1002; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
1003; AVX512BW-NEXT: kmovd %k0, %eax
1004; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1005; AVX512BW-NEXT: vzeroupper
1006; AVX512BW-NEXT: retq
1007 %x0 = icmp sgt <16 x i32> %a, %b
1008 %x1 = icmp sgt <16 x i32> %c, %d
1009 %y = and <16 x i1> %x0, %x1
1010 %res = bitcast <16 x i1> %y to i16
1011 ret i16 %res
1012}
1013
1014define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
1015; SSE-LABEL: v16f32:
1016; SSE: # BB#0:
1017; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
1018; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
1019; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
1020; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
1021; SSE-NEXT: cmpltps %xmm3, %xmm7
1022; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1023; SSE-NEXT: pshufb %xmm3, %xmm7
1024; SSE-NEXT: cmpltps %xmm2, %xmm6
1025; SSE-NEXT: pshufb %xmm3, %xmm6
1026; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
1027; SSE-NEXT: psllw $15, %xmm6
1028; SSE-NEXT: psraw $15, %xmm6
1029; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1030; SSE-NEXT: pshufb %xmm2, %xmm6
1031; SSE-NEXT: cmpltps %xmm1, %xmm5
1032; SSE-NEXT: pshufb %xmm3, %xmm5
1033; SSE-NEXT: cmpltps %xmm0, %xmm4
1034; SSE-NEXT: pshufb %xmm3, %xmm4
1035; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
1036; SSE-NEXT: psllw $15, %xmm4
1037; SSE-NEXT: psraw $15, %xmm4
1038; SSE-NEXT: pshufb %xmm2, %xmm4
1039; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
1040; SSE-NEXT: psllw $7, %xmm4
1041; SSE-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1042; SSE-NEXT: pand %xmm1, %xmm4
1043; SSE-NEXT: xorps %xmm0, %xmm0
1044; SSE-NEXT: pxor %xmm5, %xmm5
1045; SSE-NEXT: pcmpgtb %xmm4, %xmm5
1046; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
1047; SSE-NEXT: pshufb %xmm3, %xmm11
1048; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
1049; SSE-NEXT: pshufb %xmm3, %xmm9
1050; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
1051; SSE-NEXT: psllw $15, %xmm9
1052; SSE-NEXT: psraw $15, %xmm9
1053; SSE-NEXT: pshufb %xmm2, %xmm9
1054; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
1055; SSE-NEXT: pshufb %xmm3, %xmm10
1056; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
1057; SSE-NEXT: pshufb %xmm3, %xmm8
1058; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
1059; SSE-NEXT: psllw $15, %xmm8
1060; SSE-NEXT: psraw $15, %xmm8
1061; SSE-NEXT: pshufb %xmm2, %xmm8
1062; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
1063; SSE-NEXT: psllw $7, %xmm8
1064; SSE-NEXT: pand %xmm1, %xmm8
1065; SSE-NEXT: pcmpgtb %xmm8, %xmm0
1066; SSE-NEXT: pand %xmm5, %xmm0
1067; SSE-NEXT: pmovmskb %xmm0, %eax
1068; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1069; SSE-NEXT: retq
1070;
1071; AVX12-LABEL: v16f32:
1072; AVX12: # BB#0:
1073; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
1074; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
1075; AVX12-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
1076; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1077; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1078; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
1079; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
1080; AVX12-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1081; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1082; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1083; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0
1084; AVX12-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1085; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
1086; AVX12-NEXT: vpxor %xmm2, %xmm2, %xmm2
1087; AVX12-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1088; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm5
1089; AVX12-NEXT: vextractf128 $1, %ymm5, %xmm7
1090; AVX12-NEXT: vpacksswb %xmm7, %xmm5, %xmm5
1091; AVX12-NEXT: vpshufb %xmm3, %xmm5, %xmm5
1092; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm4
1093; AVX12-NEXT: vextractf128 $1, %ymm4, %xmm6
1094; AVX12-NEXT: vpacksswb %xmm6, %xmm4, %xmm4
1095; AVX12-NEXT: vpshufb %xmm3, %xmm4, %xmm3
1096; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
1097; AVX12-NEXT: vpsllw $7, %xmm3, %xmm3
1098; AVX12-NEXT: vpand %xmm1, %xmm3, %xmm1
1099; AVX12-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
1100; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
1101; AVX12-NEXT: vpmovmskb %xmm0, %eax
1102; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1103; AVX12-NEXT: vzeroupper
1104; AVX12-NEXT: retq
1105;
1106; AVX512F-LABEL: v16f32:
1107; AVX512F: # BB#0:
1108; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
1109; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
1110; AVX512F-NEXT: kmovw %k0, %eax
1111; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1112; AVX512F-NEXT: vzeroupper
1113; AVX512F-NEXT: retq
1114;
1115; AVX512BW-LABEL: v16f32:
1116; AVX512BW: # BB#0:
1117; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
1118; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
1119; AVX512BW-NEXT: kmovd %k0, %eax
1120; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1121; AVX512BW-NEXT: vzeroupper
1122; AVX512BW-NEXT: retq
1123 %x0 = fcmp ogt <16 x float> %a, %b
1124 %x1 = fcmp ogt <16 x float> %c, %d
1125 %y = and <16 x i1> %x0, %x1
1126 %res = bitcast <16 x i1> %y to i16
1127 ret i16 %res
1128}
1129
1130define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
1131; SSE-LABEL: v64i8:
1132; SSE: # BB#0:
1133; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
1134; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
1135; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
1136; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
1137; SSE-NEXT: pcmpgtb %xmm6, %xmm2
1138; SSE-NEXT: pcmpgtb %xmm7, %xmm3
1139; SSE-NEXT: pcmpgtb %xmm4, %xmm0
1140; SSE-NEXT: pcmpgtb %xmm5, %xmm1
1141; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
1142; SSE-NEXT: pand %xmm2, %xmm8
1143; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
1144; SSE-NEXT: pand %xmm3, %xmm9
1145; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
1146; SSE-NEXT: pand %xmm0, %xmm10
1147; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
1148; SSE-NEXT: pand %xmm1, %xmm11
1149; SSE-NEXT: pextrb $15, %xmm11, %eax
1150; SSE-NEXT: andb $1, %al
1151; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1152; SSE-NEXT: pextrb $14, %xmm11, %eax
1153; SSE-NEXT: andb $1, %al
1154; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1155; SSE-NEXT: pextrb $13, %xmm11, %eax
1156; SSE-NEXT: andb $1, %al
1157; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1158; SSE-NEXT: pextrb $12, %xmm11, %eax
1159; SSE-NEXT: andb $1, %al
1160; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1161; SSE-NEXT: pextrb $11, %xmm11, %eax
1162; SSE-NEXT: andb $1, %al
1163; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1164; SSE-NEXT: pextrb $10, %xmm11, %eax
1165; SSE-NEXT: andb $1, %al
1166; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1167; SSE-NEXT: pextrb $9, %xmm11, %eax
1168; SSE-NEXT: andb $1, %al
1169; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1170; SSE-NEXT: pextrb $8, %xmm11, %eax
1171; SSE-NEXT: andb $1, %al
1172; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1173; SSE-NEXT: pextrb $7, %xmm11, %eax
1174; SSE-NEXT: andb $1, %al
1175; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1176; SSE-NEXT: pextrb $6, %xmm11, %eax
1177; SSE-NEXT: andb $1, %al
1178; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1179; SSE-NEXT: pextrb $5, %xmm11, %eax
1180; SSE-NEXT: andb $1, %al
1181; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1182; SSE-NEXT: pextrb $4, %xmm11, %eax
1183; SSE-NEXT: andb $1, %al
1184; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1185; SSE-NEXT: pextrb $3, %xmm11, %eax
1186; SSE-NEXT: andb $1, %al
1187; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1188; SSE-NEXT: pextrb $2, %xmm11, %eax
1189; SSE-NEXT: andb $1, %al
1190; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1191; SSE-NEXT: pextrb $1, %xmm11, %eax
1192; SSE-NEXT: andb $1, %al
1193; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1194; SSE-NEXT: pextrb $0, %xmm11, %eax
1195; SSE-NEXT: andb $1, %al
1196; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1197; SSE-NEXT: pextrb $15, %xmm10, %eax
1198; SSE-NEXT: andb $1, %al
1199; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1200; SSE-NEXT: pextrb $14, %xmm10, %eax
1201; SSE-NEXT: andb $1, %al
1202; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1203; SSE-NEXT: pextrb $13, %xmm10, %eax
1204; SSE-NEXT: andb $1, %al
1205; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1206; SSE-NEXT: pextrb $12, %xmm10, %eax
1207; SSE-NEXT: andb $1, %al
1208; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1209; SSE-NEXT: pextrb $11, %xmm10, %eax
1210; SSE-NEXT: andb $1, %al
1211; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1212; SSE-NEXT: pextrb $10, %xmm10, %eax
1213; SSE-NEXT: andb $1, %al
1214; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1215; SSE-NEXT: pextrb $9, %xmm10, %eax
1216; SSE-NEXT: andb $1, %al
1217; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1218; SSE-NEXT: pextrb $8, %xmm10, %eax
1219; SSE-NEXT: andb $1, %al
1220; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1221; SSE-NEXT: pextrb $7, %xmm10, %eax
1222; SSE-NEXT: andb $1, %al
1223; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1224; SSE-NEXT: pextrb $6, %xmm10, %eax
1225; SSE-NEXT: andb $1, %al
1226; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1227; SSE-NEXT: pextrb $5, %xmm10, %eax
1228; SSE-NEXT: andb $1, %al
1229; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1230; SSE-NEXT: pextrb $4, %xmm10, %eax
1231; SSE-NEXT: andb $1, %al
1232; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1233; SSE-NEXT: pextrb $3, %xmm10, %eax
1234; SSE-NEXT: andb $1, %al
1235; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1236; SSE-NEXT: pextrb $2, %xmm10, %eax
1237; SSE-NEXT: andb $1, %al
1238; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1239; SSE-NEXT: pextrb $1, %xmm10, %eax
1240; SSE-NEXT: andb $1, %al
1241; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1242; SSE-NEXT: pextrb $0, %xmm10, %eax
1243; SSE-NEXT: andb $1, %al
1244; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1245; SSE-NEXT: pextrb $15, %xmm9, %eax
1246; SSE-NEXT: andb $1, %al
1247; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1248; SSE-NEXT: pextrb $14, %xmm9, %eax
1249; SSE-NEXT: andb $1, %al
1250; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1251; SSE-NEXT: pextrb $13, %xmm9, %eax
1252; SSE-NEXT: andb $1, %al
1253; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1254; SSE-NEXT: pextrb $12, %xmm9, %eax
1255; SSE-NEXT: andb $1, %al
1256; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1257; SSE-NEXT: pextrb $11, %xmm9, %eax
1258; SSE-NEXT: andb $1, %al
1259; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1260; SSE-NEXT: pextrb $10, %xmm9, %eax
1261; SSE-NEXT: andb $1, %al
1262; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1263; SSE-NEXT: pextrb $9, %xmm9, %eax
1264; SSE-NEXT: andb $1, %al
1265; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1266; SSE-NEXT: pextrb $8, %xmm9, %eax
1267; SSE-NEXT: andb $1, %al
1268; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1269; SSE-NEXT: pextrb $7, %xmm9, %eax
1270; SSE-NEXT: andb $1, %al
1271; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1272; SSE-NEXT: pextrb $6, %xmm9, %eax
1273; SSE-NEXT: andb $1, %al
1274; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1275; SSE-NEXT: pextrb $5, %xmm9, %eax
1276; SSE-NEXT: andb $1, %al
1277; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1278; SSE-NEXT: pextrb $4, %xmm9, %eax
1279; SSE-NEXT: andb $1, %al
1280; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1281; SSE-NEXT: pextrb $3, %xmm9, %eax
1282; SSE-NEXT: andb $1, %al
1283; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1284; SSE-NEXT: pextrb $2, %xmm9, %eax
1285; SSE-NEXT: andb $1, %al
1286; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1287; SSE-NEXT: pextrb $1, %xmm9, %eax
1288; SSE-NEXT: andb $1, %al
1289; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1290; SSE-NEXT: pextrb $0, %xmm9, %eax
1291; SSE-NEXT: andb $1, %al
1292; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1293; SSE-NEXT: pextrb $15, %xmm8, %eax
1294; SSE-NEXT: andb $1, %al
1295; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1296; SSE-NEXT: pextrb $14, %xmm8, %eax
1297; SSE-NEXT: andb $1, %al
1298; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1299; SSE-NEXT: pextrb $13, %xmm8, %eax
1300; SSE-NEXT: andb $1, %al
1301; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1302; SSE-NEXT: pextrb $12, %xmm8, %eax
1303; SSE-NEXT: andb $1, %al
1304; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1305; SSE-NEXT: pextrb $11, %xmm8, %eax
1306; SSE-NEXT: andb $1, %al
1307; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1308; SSE-NEXT: pextrb $10, %xmm8, %eax
1309; SSE-NEXT: andb $1, %al
1310; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1311; SSE-NEXT: pextrb $9, %xmm8, %eax
1312; SSE-NEXT: andb $1, %al
1313; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1314; SSE-NEXT: pextrb $8, %xmm8, %eax
1315; SSE-NEXT: andb $1, %al
1316; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1317; SSE-NEXT: pextrb $7, %xmm8, %eax
1318; SSE-NEXT: andb $1, %al
1319; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1320; SSE-NEXT: pextrb $6, %xmm8, %eax
1321; SSE-NEXT: andb $1, %al
1322; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1323; SSE-NEXT: pextrb $5, %xmm8, %eax
1324; SSE-NEXT: andb $1, %al
1325; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1326; SSE-NEXT: pextrb $4, %xmm8, %eax
1327; SSE-NEXT: andb $1, %al
1328; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1329; SSE-NEXT: pextrb $3, %xmm8, %eax
1330; SSE-NEXT: andb $1, %al
1331; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1332; SSE-NEXT: pextrb $2, %xmm8, %eax
1333; SSE-NEXT: andb $1, %al
1334; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1335; SSE-NEXT: pextrb $1, %xmm8, %eax
1336; SSE-NEXT: andb $1, %al
1337; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1338; SSE-NEXT: pextrb $0, %xmm8, %eax
1339; SSE-NEXT: andb $1, %al
1340; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1341; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1342; SSE-NEXT: shll $16, %eax
1343; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
1344; SSE-NEXT: orl %eax, %ecx
1345; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
1346; SSE-NEXT: shll $16, %edx
1347; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1348; SSE-NEXT: orl %edx, %eax
1349; SSE-NEXT: shlq $32, %rax
1350; SSE-NEXT: orq %rcx, %rax
1351; SSE-NEXT: retq
1352;
1353; AVX1-LABEL: v64i8:
1354; AVX1: # BB#0:
1355; AVX1-NEXT: pushq %rbp
1356; AVX1-NEXT: .Lcfi3:
1357; AVX1-NEXT: .cfi_def_cfa_offset 16
1358; AVX1-NEXT: .Lcfi4:
1359; AVX1-NEXT: .cfi_offset %rbp, -16
1360; AVX1-NEXT: movq %rsp, %rbp
1361; AVX1-NEXT: .Lcfi5:
1362; AVX1-NEXT: .cfi_def_cfa_register %rbp
1363; AVX1-NEXT: andq $-32, %rsp
1364; AVX1-NEXT: subq $64, %rsp
1365; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
1366; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1367; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
1368; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1369; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
1370; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1371; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1372; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
1373; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
1374; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1375; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
1376; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
1377; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
1378; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
1379; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1380; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
1381; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
1382; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1383; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
1384; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
1385; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1386; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1387; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1388; AVX1-NEXT: vpextrb $15, %xmm2, %eax
1389; AVX1-NEXT: andb $1, %al
1390; AVX1-NEXT: movb %al, (%rsp)
1391; AVX1-NEXT: vpextrb $14, %xmm2, %eax
1392; AVX1-NEXT: andb $1, %al
1393; AVX1-NEXT: movb %al, (%rsp)
1394; AVX1-NEXT: vpextrb $13, %xmm2, %eax
1395; AVX1-NEXT: andb $1, %al
1396; AVX1-NEXT: movb %al, (%rsp)
1397; AVX1-NEXT: vpextrb $12, %xmm2, %eax
1398; AVX1-NEXT: andb $1, %al
1399; AVX1-NEXT: movb %al, (%rsp)
1400; AVX1-NEXT: vpextrb $11, %xmm2, %eax
1401; AVX1-NEXT: andb $1, %al
1402; AVX1-NEXT: movb %al, (%rsp)
1403; AVX1-NEXT: vpextrb $10, %xmm2, %eax
1404; AVX1-NEXT: andb $1, %al
1405; AVX1-NEXT: movb %al, (%rsp)
1406; AVX1-NEXT: vpextrb $9, %xmm2, %eax
1407; AVX1-NEXT: andb $1, %al
1408; AVX1-NEXT: movb %al, (%rsp)
1409; AVX1-NEXT: vpextrb $8, %xmm2, %eax
1410; AVX1-NEXT: andb $1, %al
1411; AVX1-NEXT: movb %al, (%rsp)
1412; AVX1-NEXT: vpextrb $7, %xmm2, %eax
1413; AVX1-NEXT: andb $1, %al
1414; AVX1-NEXT: movb %al, (%rsp)
1415; AVX1-NEXT: vpextrb $6, %xmm2, %eax
1416; AVX1-NEXT: andb $1, %al
1417; AVX1-NEXT: movb %al, (%rsp)
1418; AVX1-NEXT: vpextrb $5, %xmm2, %eax
1419; AVX1-NEXT: andb $1, %al
1420; AVX1-NEXT: movb %al, (%rsp)
1421; AVX1-NEXT: vpextrb $4, %xmm2, %eax
1422; AVX1-NEXT: andb $1, %al
1423; AVX1-NEXT: movb %al, (%rsp)
1424; AVX1-NEXT: vpextrb $3, %xmm2, %eax
1425; AVX1-NEXT: andb $1, %al
1426; AVX1-NEXT: movb %al, (%rsp)
1427; AVX1-NEXT: vpextrb $2, %xmm2, %eax
1428; AVX1-NEXT: andb $1, %al
1429; AVX1-NEXT: movb %al, (%rsp)
1430; AVX1-NEXT: vpextrb $1, %xmm2, %eax
1431; AVX1-NEXT: andb $1, %al
1432; AVX1-NEXT: movb %al, (%rsp)
1433; AVX1-NEXT: vpextrb $0, %xmm2, %eax
1434; AVX1-NEXT: andb $1, %al
1435; AVX1-NEXT: movb %al, (%rsp)
1436; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1437; AVX1-NEXT: andb $1, %al
1438; AVX1-NEXT: movb %al, (%rsp)
1439; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1440; AVX1-NEXT: andb $1, %al
1441; AVX1-NEXT: movb %al, (%rsp)
1442; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1443; AVX1-NEXT: andb $1, %al
1444; AVX1-NEXT: movb %al, (%rsp)
1445; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1446; AVX1-NEXT: andb $1, %al
1447; AVX1-NEXT: movb %al, (%rsp)
1448; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1449; AVX1-NEXT: andb $1, %al
1450; AVX1-NEXT: movb %al, (%rsp)
1451; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1452; AVX1-NEXT: andb $1, %al
1453; AVX1-NEXT: movb %al, (%rsp)
1454; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1455; AVX1-NEXT: andb $1, %al
1456; AVX1-NEXT: movb %al, (%rsp)
1457; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1458; AVX1-NEXT: andb $1, %al
1459; AVX1-NEXT: movb %al, (%rsp)
1460; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1461; AVX1-NEXT: andb $1, %al
1462; AVX1-NEXT: movb %al, (%rsp)
1463; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1464; AVX1-NEXT: andb $1, %al
1465; AVX1-NEXT: movb %al, (%rsp)
1466; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1467; AVX1-NEXT: andb $1, %al
1468; AVX1-NEXT: movb %al, (%rsp)
1469; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1470; AVX1-NEXT: andb $1, %al
1471; AVX1-NEXT: movb %al, (%rsp)
1472; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1473; AVX1-NEXT: andb $1, %al
1474; AVX1-NEXT: movb %al, (%rsp)
1475; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1476; AVX1-NEXT: andb $1, %al
1477; AVX1-NEXT: movb %al, (%rsp)
1478; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1479; AVX1-NEXT: andb $1, %al
1480; AVX1-NEXT: movb %al, (%rsp)
1481; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1482; AVX1-NEXT: andb $1, %al
1483; AVX1-NEXT: movb %al, (%rsp)
1484; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1485; AVX1-NEXT: vpextrb $15, %xmm1, %eax
1486; AVX1-NEXT: andb $1, %al
1487; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1488; AVX1-NEXT: vpextrb $14, %xmm1, %eax
1489; AVX1-NEXT: andb $1, %al
1490; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1491; AVX1-NEXT: vpextrb $13, %xmm1, %eax
1492; AVX1-NEXT: andb $1, %al
1493; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1494; AVX1-NEXT: vpextrb $12, %xmm1, %eax
1495; AVX1-NEXT: andb $1, %al
1496; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1497; AVX1-NEXT: vpextrb $11, %xmm1, %eax
1498; AVX1-NEXT: andb $1, %al
1499; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1500; AVX1-NEXT: vpextrb $10, %xmm1, %eax
1501; AVX1-NEXT: andb $1, %al
1502; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1503; AVX1-NEXT: vpextrb $9, %xmm1, %eax
1504; AVX1-NEXT: andb $1, %al
1505; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1506; AVX1-NEXT: vpextrb $8, %xmm1, %eax
1507; AVX1-NEXT: andb $1, %al
1508; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1509; AVX1-NEXT: vpextrb $7, %xmm1, %eax
1510; AVX1-NEXT: andb $1, %al
1511; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1512; AVX1-NEXT: vpextrb $6, %xmm1, %eax
1513; AVX1-NEXT: andb $1, %al
1514; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1515; AVX1-NEXT: vpextrb $5, %xmm1, %eax
1516; AVX1-NEXT: andb $1, %al
1517; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1518; AVX1-NEXT: vpextrb $4, %xmm1, %eax
1519; AVX1-NEXT: andb $1, %al
1520; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1521; AVX1-NEXT: vpextrb $3, %xmm1, %eax
1522; AVX1-NEXT: andb $1, %al
1523; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1524; AVX1-NEXT: vpextrb $2, %xmm1, %eax
1525; AVX1-NEXT: andb $1, %al
1526; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1527; AVX1-NEXT: vpextrb $1, %xmm1, %eax
1528; AVX1-NEXT: andb $1, %al
1529; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1530; AVX1-NEXT: vpextrb $0, %xmm1, %eax
1531; AVX1-NEXT: andb $1, %al
1532; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1533; AVX1-NEXT: vpextrb $15, %xmm0, %eax
1534; AVX1-NEXT: andb $1, %al
1535; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1536; AVX1-NEXT: vpextrb $14, %xmm0, %eax
1537; AVX1-NEXT: andb $1, %al
1538; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1539; AVX1-NEXT: vpextrb $13, %xmm0, %eax
1540; AVX1-NEXT: andb $1, %al
1541; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1542; AVX1-NEXT: vpextrb $12, %xmm0, %eax
1543; AVX1-NEXT: andb $1, %al
1544; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1545; AVX1-NEXT: vpextrb $11, %xmm0, %eax
1546; AVX1-NEXT: andb $1, %al
1547; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1548; AVX1-NEXT: vpextrb $10, %xmm0, %eax
1549; AVX1-NEXT: andb $1, %al
1550; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1551; AVX1-NEXT: vpextrb $9, %xmm0, %eax
1552; AVX1-NEXT: andb $1, %al
1553; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1554; AVX1-NEXT: vpextrb $8, %xmm0, %eax
1555; AVX1-NEXT: andb $1, %al
1556; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1557; AVX1-NEXT: vpextrb $7, %xmm0, %eax
1558; AVX1-NEXT: andb $1, %al
1559; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1560; AVX1-NEXT: vpextrb $6, %xmm0, %eax
1561; AVX1-NEXT: andb $1, %al
1562; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1563; AVX1-NEXT: vpextrb $5, %xmm0, %eax
1564; AVX1-NEXT: andb $1, %al
1565; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1566; AVX1-NEXT: vpextrb $4, %xmm0, %eax
1567; AVX1-NEXT: andb $1, %al
1568; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1569; AVX1-NEXT: vpextrb $3, %xmm0, %eax
1570; AVX1-NEXT: andb $1, %al
1571; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1572; AVX1-NEXT: vpextrb $2, %xmm0, %eax
1573; AVX1-NEXT: andb $1, %al
1574; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1575; AVX1-NEXT: vpextrb $1, %xmm0, %eax
1576; AVX1-NEXT: andb $1, %al
1577; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1578; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1579; AVX1-NEXT: andb $1, %al
1580; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
1581; AVX1-NEXT: movl (%rsp), %ecx
1582; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1583; AVX1-NEXT: shlq $32, %rax
1584; AVX1-NEXT: orq %rcx, %rax
1585; AVX1-NEXT: movq %rbp, %rsp
1586; AVX1-NEXT: popq %rbp
1587; AVX1-NEXT: vzeroupper
1588; AVX1-NEXT: retq
1589;
1590; AVX2-LABEL: v64i8:
1591; AVX2: # BB#0:
1592; AVX2-NEXT: pushq %rbp
1593; AVX2-NEXT: .Lcfi0:
1594; AVX2-NEXT: .cfi_def_cfa_offset 16
1595; AVX2-NEXT: .Lcfi1:
1596; AVX2-NEXT: .cfi_offset %rbp, -16
1597; AVX2-NEXT: movq %rsp, %rbp
1598; AVX2-NEXT: .Lcfi2:
1599; AVX2-NEXT: .cfi_def_cfa_register %rbp
1600; AVX2-NEXT: andq $-32, %rsp
1601; AVX2-NEXT: subq $64, %rsp
1602; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1603; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1604; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1605; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1606; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1607; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
1608; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1609; AVX2-NEXT: vpextrb $15, %xmm2, %eax
1610; AVX2-NEXT: andb $1, %al
1611; AVX2-NEXT: movb %al, (%rsp)
1612; AVX2-NEXT: vpextrb $14, %xmm2, %eax
1613; AVX2-NEXT: andb $1, %al
1614; AVX2-NEXT: movb %al, (%rsp)
1615; AVX2-NEXT: vpextrb $13, %xmm2, %eax
1616; AVX2-NEXT: andb $1, %al
1617; AVX2-NEXT: movb %al, (%rsp)
1618; AVX2-NEXT: vpextrb $12, %xmm2, %eax
1619; AVX2-NEXT: andb $1, %al
1620; AVX2-NEXT: movb %al, (%rsp)
1621; AVX2-NEXT: vpextrb $11, %xmm2, %eax
1622; AVX2-NEXT: andb $1, %al
1623; AVX2-NEXT: movb %al, (%rsp)
1624; AVX2-NEXT: vpextrb $10, %xmm2, %eax
1625; AVX2-NEXT: andb $1, %al
1626; AVX2-NEXT: movb %al, (%rsp)
1627; AVX2-NEXT: vpextrb $9, %xmm2, %eax
1628; AVX2-NEXT: andb $1, %al
1629; AVX2-NEXT: movb %al, (%rsp)
1630; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1631; AVX2-NEXT: andb $1, %al
1632; AVX2-NEXT: movb %al, (%rsp)
1633; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1634; AVX2-NEXT: andb $1, %al
1635; AVX2-NEXT: movb %al, (%rsp)
1636; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1637; AVX2-NEXT: andb $1, %al
1638; AVX2-NEXT: movb %al, (%rsp)
1639; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1640; AVX2-NEXT: andb $1, %al
1641; AVX2-NEXT: movb %al, (%rsp)
1642; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1643; AVX2-NEXT: andb $1, %al
1644; AVX2-NEXT: movb %al, (%rsp)
1645; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1646; AVX2-NEXT: andb $1, %al
1647; AVX2-NEXT: movb %al, (%rsp)
1648; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1649; AVX2-NEXT: andb $1, %al
1650; AVX2-NEXT: movb %al, (%rsp)
1651; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1652; AVX2-NEXT: andb $1, %al
1653; AVX2-NEXT: movb %al, (%rsp)
1654; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1655; AVX2-NEXT: andb $1, %al
1656; AVX2-NEXT: movb %al, (%rsp)
1657; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1658; AVX2-NEXT: andb $1, %al
1659; AVX2-NEXT: movb %al, (%rsp)
1660; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1661; AVX2-NEXT: andb $1, %al
1662; AVX2-NEXT: movb %al, (%rsp)
1663; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1664; AVX2-NEXT: andb $1, %al
1665; AVX2-NEXT: movb %al, (%rsp)
1666; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1667; AVX2-NEXT: andb $1, %al
1668; AVX2-NEXT: movb %al, (%rsp)
1669; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1670; AVX2-NEXT: andb $1, %al
1671; AVX2-NEXT: movb %al, (%rsp)
1672; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1673; AVX2-NEXT: andb $1, %al
1674; AVX2-NEXT: movb %al, (%rsp)
1675; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1676; AVX2-NEXT: andb $1, %al
1677; AVX2-NEXT: movb %al, (%rsp)
1678; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1679; AVX2-NEXT: andb $1, %al
1680; AVX2-NEXT: movb %al, (%rsp)
1681; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1682; AVX2-NEXT: andb $1, %al
1683; AVX2-NEXT: movb %al, (%rsp)
1684; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1685; AVX2-NEXT: andb $1, %al
1686; AVX2-NEXT: movb %al, (%rsp)
1687; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1688; AVX2-NEXT: andb $1, %al
1689; AVX2-NEXT: movb %al, (%rsp)
1690; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1691; AVX2-NEXT: andb $1, %al
1692; AVX2-NEXT: movb %al, (%rsp)
1693; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1694; AVX2-NEXT: andb $1, %al
1695; AVX2-NEXT: movb %al, (%rsp)
1696; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1697; AVX2-NEXT: andb $1, %al
1698; AVX2-NEXT: movb %al, (%rsp)
1699; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1700; AVX2-NEXT: andb $1, %al
1701; AVX2-NEXT: movb %al, (%rsp)
1702; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1703; AVX2-NEXT: andb $1, %al
1704; AVX2-NEXT: movb %al, (%rsp)
1705; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1706; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1707; AVX2-NEXT: andb $1, %al
1708; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1709; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1710; AVX2-NEXT: andb $1, %al
1711; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1712; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1713; AVX2-NEXT: andb $1, %al
1714; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1715; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1716; AVX2-NEXT: andb $1, %al
1717; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1718; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1719; AVX2-NEXT: andb $1, %al
1720; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1721; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1722; AVX2-NEXT: andb $1, %al
1723; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1724; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1725; AVX2-NEXT: andb $1, %al
1726; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1727; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1728; AVX2-NEXT: andb $1, %al
1729; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1730; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1731; AVX2-NEXT: andb $1, %al
1732; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1733; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1734; AVX2-NEXT: andb $1, %al
1735; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1736; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1737; AVX2-NEXT: andb $1, %al
1738; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1739; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1740; AVX2-NEXT: andb $1, %al
1741; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1742; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1743; AVX2-NEXT: andb $1, %al
1744; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1745; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1746; AVX2-NEXT: andb $1, %al
1747; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1748; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1749; AVX2-NEXT: andb $1, %al
1750; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1751; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1752; AVX2-NEXT: andb $1, %al
1753; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1754; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1755; AVX2-NEXT: andb $1, %al
1756; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1757; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1758; AVX2-NEXT: andb $1, %al
1759; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1760; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1761; AVX2-NEXT: andb $1, %al
1762; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1763; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1764; AVX2-NEXT: andb $1, %al
1765; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1766; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1767; AVX2-NEXT: andb $1, %al
1768; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1769; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1770; AVX2-NEXT: andb $1, %al
1771; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1772; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1773; AVX2-NEXT: andb $1, %al
1774; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1775; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1776; AVX2-NEXT: andb $1, %al
1777; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1778; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1779; AVX2-NEXT: andb $1, %al
1780; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1781; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1782; AVX2-NEXT: andb $1, %al
1783; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1784; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1785; AVX2-NEXT: andb $1, %al
1786; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1787; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1788; AVX2-NEXT: andb $1, %al
1789; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1790; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1791; AVX2-NEXT: andb $1, %al
1792; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1793; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1794; AVX2-NEXT: andb $1, %al
1795; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1796; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1797; AVX2-NEXT: andb $1, %al
1798; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1799; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1800; AVX2-NEXT: andb $1, %al
1801; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1802; AVX2-NEXT: movl (%rsp), %ecx
1803; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1804; AVX2-NEXT: shlq $32, %rax
1805; AVX2-NEXT: orq %rcx, %rax
1806; AVX2-NEXT: movq %rbp, %rsp
1807; AVX2-NEXT: popq %rbp
1808; AVX2-NEXT: vzeroupper
1809; AVX2-NEXT: retq
1810;
1811; AVX512F-LABEL: v64i8:
1812; AVX512F: # BB#0:
1813; AVX512F-NEXT: pushq %rbp
1814; AVX512F-NEXT: .Lcfi3:
1815; AVX512F-NEXT: .cfi_def_cfa_offset 16
1816; AVX512F-NEXT: .Lcfi4:
1817; AVX512F-NEXT: .cfi_offset %rbp, -16
1818; AVX512F-NEXT: movq %rsp, %rbp
1819; AVX512F-NEXT: .Lcfi5:
1820; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1821; AVX512F-NEXT: andq $-32, %rsp
1822; AVX512F-NEXT: subq $64, %rsp
1823; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1824; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1825; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1826; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1827; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1828; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1829; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1830; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1831; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1832; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1833; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1834; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1835; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1836; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1837; AVX512F-NEXT: kmovw %k0, (%rsp)
1838; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1839; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1840; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1841; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1842; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1843; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1844; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1845; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1846; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1847; AVX512F-NEXT: movl (%rsp), %ecx
1848; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1849; AVX512F-NEXT: shlq $32, %rax
1850; AVX512F-NEXT: orq %rcx, %rax
1851; AVX512F-NEXT: movq %rbp, %rsp
1852; AVX512F-NEXT: popq %rbp
1853; AVX512F-NEXT: vzeroupper
1854; AVX512F-NEXT: retq
1855;
1856; AVX512BW-LABEL: v64i8:
1857; AVX512BW: # BB#0:
1858; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1859; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1860; AVX512BW-NEXT: kmovq %k0, %rax
1861; AVX512BW-NEXT: vzeroupper
1862; AVX512BW-NEXT: retq
1863 %x0 = icmp sgt <64 x i8> %a, %b
1864 %x1 = icmp sgt <64 x i8> %c, %d
1865 %y = and <64 x i1> %x0, %x1
1866 %res = bitcast <64 x i1> %y to i64
1867 ret i64 %res
1868}