blob: a6d6ca155302eb37b8793df7664c35ab1bf17950 [file] [log] [blame]
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Zvi Rackover76937332017-06-01 11:27:57 +00002; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+SSE2 < %s | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+SSSE3 < %s | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx < %s | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefixes=AVX12,AVX2
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +00006; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx512f,+avx512vl,+avx512bw < %s | FileCheck %s --check-prefix=AVX512
7
8define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +00009; SSE2-SSSE3-LABEL: v4i64:
10; SSE2-SSSE3: ## BB#0:
11; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
12; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3
13; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1
14; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9
15; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9
16; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
17; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
19; SSE2-SSSE3-NEXT: pand %xmm10, %xmm1
20; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
21; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
22; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2
23; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0
24; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1
25; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
26; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[0,0,2,2]
27; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
28; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
29; SSE2-SSSE3-NEXT: pand %xmm9, %xmm2
30; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
32; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
33; SSE2-SSSE3-NEXT: pslld $31, %xmm0
34; SSE2-SSSE3-NEXT: psrad $31, %xmm0
35; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7
36; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5
37; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1
38; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
39; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
40; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
41; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
42; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
43; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
44; SSE2-SSSE3-NEXT: por %xmm3, %xmm1
45; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6
46; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4
47; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
48; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
49; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
50; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
51; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
52; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
53; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
54; SSE2-SSSE3-NEXT: por %xmm4, %xmm2
55; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
56; SSE2-SSSE3-NEXT: pslld $31, %xmm2
57; SSE2-SSSE3-NEXT: psrad $31, %xmm2
58; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
59; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
60; SSE2-SSSE3-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
61; SSE2-SSSE3-NEXT: retq
62;
63; AVX1-LABEL: v4i64:
64; AVX1: ## BB#0:
65; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
66; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
67; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
68; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
69; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
70; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
71; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
72; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
73; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
74; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
75; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
76; AVX1-NEXT: vmovmskps %xmm0, %eax
77; AVX1-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
78; AVX1-NEXT: vzeroupper
79; AVX1-NEXT: retq
80;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000081; AVX2-LABEL: v4i64:
82; AVX2: ## BB#0:
83; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
84; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
85; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
86; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
87; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
88; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
89; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +000090; AVX2-NEXT: vmovmskps %xmm0, %eax
91; AVX2-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000092; AVX2-NEXT: vzeroupper
93; AVX2-NEXT: retq
94;
95; AVX512-LABEL: v4i64:
96; AVX512: ## BB#0:
97; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
98; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
99; AVX512-NEXT: kmovd %k0, %eax
100; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
101; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
102; AVX512-NEXT: vzeroupper
103; AVX512-NEXT: retq
104 %x0 = icmp sgt <4 x i64> %a, %b
105 %x1 = icmp sgt <4 x i64> %c, %d
106 %y = and <4 x i1> %x0, %x1
107 %res = bitcast <4 x i1> %y to i4
108 ret i4 %res
109}
110
111define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000112; SSE2-SSSE3-LABEL: v4f64:
113; SSE2-SSSE3: ## BB#0:
114; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
115; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
116; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
117; SSE2-SSSE3-NEXT: pslld $31, %xmm2
118; SSE2-SSSE3-NEXT: psrad $31, %xmm2
119; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
120; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
121; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
122; SSE2-SSSE3-NEXT: pslld $31, %xmm6
123; SSE2-SSSE3-NEXT: psrad $31, %xmm6
124; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
125; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
126; SSE2-SSSE3-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
127; SSE2-SSSE3-NEXT: retq
128;
129; AVX12-LABEL: v4f64:
130; AVX12: ## BB#0:
131; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
132; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
133; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
134; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
135; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
136; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
137; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
138; AVX12-NEXT: vmovmskps %xmm0, %eax
139; AVX12-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
140; AVX12-NEXT: vzeroupper
141; AVX12-NEXT: retq
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000142;
143; AVX512-LABEL: v4f64:
144; AVX512: ## BB#0:
145; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
146; AVX512-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
147; AVX512-NEXT: kmovd %k0, %eax
148; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
149; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
150; AVX512-NEXT: vzeroupper
151; AVX512-NEXT: retq
152 %x0 = fcmp ogt <4 x double> %a, %b
153 %x1 = fcmp ogt <4 x double> %c, %d
154 %y = and <4 x i1> %x0, %x1
155 %res = bitcast <4 x i1> %y to i4
156 ret i4 %res
157}
158
159define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000160; SSE2-LABEL: v16i16:
161; SSE2: ## BB#0:
162; SSE2-NEXT: pcmpgtw %xmm3, %xmm1
163; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
164; SSE2-NEXT: pand %xmm3, %xmm1
165; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
166; SSE2-NEXT: pand %xmm3, %xmm0
167; SSE2-NEXT: packuswb %xmm1, %xmm0
168; SSE2-NEXT: psllw $7, %xmm0
169; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
170; SSE2-NEXT: pand %xmm8, %xmm0
171; SSE2-NEXT: pxor %xmm2, %xmm2
172; SSE2-NEXT: pxor %xmm1, %xmm1
173; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
174; SSE2-NEXT: pcmpgtw %xmm7, %xmm5
175; SSE2-NEXT: pand %xmm3, %xmm5
176; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
177; SSE2-NEXT: pand %xmm3, %xmm4
178; SSE2-NEXT: packuswb %xmm5, %xmm4
179; SSE2-NEXT: psllw $7, %xmm4
180; SSE2-NEXT: pand %xmm8, %xmm4
181; SSE2-NEXT: pcmpgtb %xmm4, %xmm2
182; SSE2-NEXT: pand %xmm1, %xmm2
183; SSE2-NEXT: pmovmskb %xmm2, %eax
184; SSE2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
185; SSE2-NEXT: retq
186;
187; SSSE3-LABEL: v16i16:
188; SSSE3: ## BB#0:
189; SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
190; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
191; SSSE3-NEXT: pshufb %xmm3, %xmm1
192; SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
193; SSSE3-NEXT: pshufb %xmm3, %xmm0
194; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
195; SSSE3-NEXT: psllw $7, %xmm0
196; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
197; SSSE3-NEXT: pand %xmm8, %xmm0
198; SSSE3-NEXT: pxor %xmm2, %xmm2
199; SSSE3-NEXT: pxor %xmm1, %xmm1
200; SSSE3-NEXT: pcmpgtb %xmm0, %xmm1
201; SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
202; SSSE3-NEXT: pshufb %xmm3, %xmm5
203; SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
204; SSSE3-NEXT: pshufb %xmm3, %xmm4
205; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
206; SSSE3-NEXT: psllw $7, %xmm4
207; SSSE3-NEXT: pand %xmm8, %xmm4
208; SSSE3-NEXT: pcmpgtb %xmm4, %xmm2
209; SSSE3-NEXT: pand %xmm1, %xmm2
210; SSSE3-NEXT: pmovmskb %xmm2, %eax
211; SSSE3-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
212; SSSE3-NEXT: retq
213;
214; AVX1-LABEL: v16i16:
215; AVX1: ## BB#0:
216; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
217; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
218; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4
219; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
220; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
221; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
222; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
223; AVX1-NEXT: vpcmpgtw %xmm1, %xmm4, %xmm1
224; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
225; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
226; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
227; AVX1-NEXT: vpmovmskb %xmm0, %eax
228; AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
229; AVX1-NEXT: vzeroupper
230; AVX1-NEXT: retq
231;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000232; AVX2-LABEL: v16i16:
233; AVX2: ## BB#0:
234; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
235; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
236; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
237; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
238; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
239; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
240; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000241; AVX2-NEXT: vpmovmskb %xmm0, %eax
242; AVX2-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000243; AVX2-NEXT: vzeroupper
244; AVX2-NEXT: retq
245;
246; AVX512-LABEL: v16i16:
247; AVX512: ## BB#0:
248; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
249; AVX512-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
250; AVX512-NEXT: kmovd %k0, %eax
251; AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
252; AVX512-NEXT: vzeroupper
253; AVX512-NEXT: retq
254 %x0 = icmp sgt <16 x i16> %a, %b
255 %x1 = icmp sgt <16 x i16> %c, %d
256 %y = and <16 x i1> %x0, %x1
257 %res = bitcast <16 x i1> %y to i16
258 ret i16 %res
259}
260
261define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000262; SSE2-LABEL: v8i32:
263; SSE2: ## BB#0:
264; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
265; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
266; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
267; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
268; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
269; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
270; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
271; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
272; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
273; SSE2-NEXT: psllw $15, %xmm0
274; SSE2-NEXT: psraw $15, %xmm0
275; SSE2-NEXT: pcmpgtd %xmm7, %xmm5
276; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
277; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
278; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
279; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
280; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm4[0,2,2,3,4,5,6,7]
281; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
282; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
283; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
284; SSE2-NEXT: psllw $15, %xmm2
285; SSE2-NEXT: psraw $15, %xmm2
286; SSE2-NEXT: pand %xmm0, %xmm2
287; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
288; SSE2-NEXT: packuswb %xmm2, %xmm2
289; SSE2-NEXT: pmovmskb %xmm2, %eax
290; SSE2-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
291; SSE2-NEXT: retq
292;
293; SSSE3-LABEL: v8i32:
294; SSSE3: ## BB#0:
295; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
296; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
297; SSSE3-NEXT: pshufb %xmm3, %xmm1
298; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
299; SSSE3-NEXT: pshufb %xmm3, %xmm0
300; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
301; SSSE3-NEXT: psllw $15, %xmm0
302; SSSE3-NEXT: psraw $15, %xmm0
303; SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
304; SSSE3-NEXT: pshufb %xmm3, %xmm5
305; SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
306; SSSE3-NEXT: pshufb %xmm3, %xmm4
307; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
308; SSSE3-NEXT: psllw $15, %xmm4
309; SSSE3-NEXT: psraw $15, %xmm4
310; SSSE3-NEXT: pand %xmm0, %xmm4
311; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
312; SSSE3-NEXT: pmovmskb %xmm4, %eax
313; SSSE3-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
314; SSSE3-NEXT: retq
315;
316; AVX1-LABEL: v8i32:
317; AVX1: ## BB#0:
318; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
320; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
321; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
322; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
323; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
324; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
325; AVX1-NEXT: vpcmpgtd %xmm1, %xmm4, %xmm1
326; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
327; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
328; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
329; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
330; AVX1-NEXT: vpmovmskb %xmm0, %eax
331; AVX1-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
332; AVX1-NEXT: vzeroupper
333; AVX1-NEXT: retq
334;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000335; AVX2-LABEL: v8i32:
336; AVX2: ## BB#0:
337; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
338; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
339; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
340; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
341; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
342; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
343; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000344; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
345; AVX2-NEXT: vpmovmskb %xmm0, %eax
346; AVX2-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000347; AVX2-NEXT: vzeroupper
348; AVX2-NEXT: retq
349;
350; AVX512-LABEL: v8i32:
351; AVX512: ## BB#0:
352; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
353; AVX512-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
354; AVX512-NEXT: kmovd %k0, %eax
355; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
356; AVX512-NEXT: vzeroupper
357; AVX512-NEXT: retq
358 %x0 = icmp sgt <8 x i32> %a, %b
359 %x1 = icmp sgt <8 x i32> %c, %d
360 %y = and <8 x i1> %x0, %x1
361 %res = bitcast <8 x i1> %y to i8
362 ret i8 %res
363}
364
365define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000366; SSE2-LABEL: v8f32:
367; SSE2: ## BB#0:
368; SSE2-NEXT: cmpltps %xmm1, %xmm3
369; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
370; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
371; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
372; SSE2-NEXT: cmpltps %xmm0, %xmm2
373; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
374; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
375; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
376; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
377; SSE2-NEXT: psllw $15, %xmm0
378; SSE2-NEXT: psraw $15, %xmm0
379; SSE2-NEXT: cmpltps %xmm5, %xmm7
380; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm7[0,2,2,3,4,5,6,7]
381; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
382; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
383; SSE2-NEXT: cmpltps %xmm4, %xmm6
384; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[0,2,2,3,4,5,6,7]
385; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
386; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
387; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
388; SSE2-NEXT: psllw $15, %xmm2
389; SSE2-NEXT: psraw $15, %xmm2
390; SSE2-NEXT: pand %xmm0, %xmm2
391; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
392; SSE2-NEXT: packuswb %xmm2, %xmm2
393; SSE2-NEXT: pmovmskb %xmm2, %eax
394; SSE2-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
395; SSE2-NEXT: retq
396;
397; SSSE3-LABEL: v8f32:
398; SSSE3: ## BB#0:
399; SSSE3-NEXT: cmpltps %xmm1, %xmm3
400; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
401; SSSE3-NEXT: pshufb %xmm1, %xmm3
402; SSSE3-NEXT: cmpltps %xmm0, %xmm2
403; SSSE3-NEXT: pshufb %xmm1, %xmm2
404; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
405; SSSE3-NEXT: psllw $15, %xmm2
406; SSSE3-NEXT: psraw $15, %xmm2
407; SSSE3-NEXT: cmpltps %xmm5, %xmm7
408; SSSE3-NEXT: pshufb %xmm1, %xmm7
409; SSSE3-NEXT: cmpltps %xmm4, %xmm6
410; SSSE3-NEXT: pshufb %xmm1, %xmm6
411; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
412; SSSE3-NEXT: psllw $15, %xmm6
413; SSSE3-NEXT: psraw $15, %xmm6
414; SSSE3-NEXT: pand %xmm2, %xmm6
415; SSSE3-NEXT: pshufb {{.*#+}} xmm6 = xmm6[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
416; SSSE3-NEXT: pmovmskb %xmm6, %eax
417; SSSE3-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
418; SSSE3-NEXT: retq
419;
420; AVX12-LABEL: v8f32:
421; AVX12: ## BB#0:
422; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
423; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
424; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
425; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
426; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
427; AVX12-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
428; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
429; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
430; AVX12-NEXT: vpmovmskb %xmm0, %eax
431; AVX12-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
432; AVX12-NEXT: vzeroupper
433; AVX12-NEXT: retq
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000434;
435; AVX512-LABEL: v8f32:
436; AVX512: ## BB#0:
437; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
438; AVX512-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
439; AVX512-NEXT: kmovd %k0, %eax
440; AVX512-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
441; AVX512-NEXT: vzeroupper
442; AVX512-NEXT: retq
443 %x0 = fcmp ogt <8 x float> %a, %b
444 %x1 = fcmp ogt <8 x float> %c, %d
445 %y = and <8 x i1> %x0, %x1
446 %res = bitcast <8 x i1> %y to i8
447 ret i8 %res
448}
449
450define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000451; SSE2-SSSE3-LABEL: v32i8:
452; SSE2-SSSE3: ## BB#0:
453; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
454; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
455; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
456; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
457; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
458; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
459; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
460; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
461; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
462; SSE2-SSSE3-NEXT: andb $1, %al
463; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
464; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
465; SSE2-SSSE3-NEXT: andb $1, %al
466; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
467; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
468; SSE2-SSSE3-NEXT: andb $1, %al
469; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
470; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
471; SSE2-SSSE3-NEXT: andb $1, %al
472; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
473; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
474; SSE2-SSSE3-NEXT: andb $1, %al
475; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
476; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
477; SSE2-SSSE3-NEXT: andb $1, %al
478; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
479; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
480; SSE2-SSSE3-NEXT: andb $1, %al
481; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
482; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
483; SSE2-SSSE3-NEXT: andb $1, %al
484; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
485; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
486; SSE2-SSSE3-NEXT: andb $1, %al
487; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
488; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
489; SSE2-SSSE3-NEXT: andb $1, %al
490; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
491; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
492; SSE2-SSSE3-NEXT: andb $1, %al
493; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
494; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
495; SSE2-SSSE3-NEXT: andb $1, %al
496; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
497; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
498; SSE2-SSSE3-NEXT: andb $1, %al
499; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
500; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
501; SSE2-SSSE3-NEXT: andb $1, %al
502; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
503; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
504; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl
505; SSE2-SSSE3-NEXT: andb $1, %cl
506; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
507; SSE2-SSSE3-NEXT: andb $1, %al
508; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
509; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
510; SSE2-SSSE3-NEXT: andb $1, %al
511; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
512; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
513; SSE2-SSSE3-NEXT: andb $1, %al
514; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
515; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
516; SSE2-SSSE3-NEXT: andb $1, %al
517; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
518; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
519; SSE2-SSSE3-NEXT: andb $1, %al
520; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
521; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
522; SSE2-SSSE3-NEXT: andb $1, %al
523; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
524; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
525; SSE2-SSSE3-NEXT: andb $1, %al
526; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
527; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
528; SSE2-SSSE3-NEXT: andb $1, %al
529; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
530; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
531; SSE2-SSSE3-NEXT: andb $1, %al
532; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
533; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
534; SSE2-SSSE3-NEXT: andb $1, %al
535; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
536; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
537; SSE2-SSSE3-NEXT: andb $1, %al
538; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
539; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
540; SSE2-SSSE3-NEXT: andb $1, %al
541; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
542; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
543; SSE2-SSSE3-NEXT: andb $1, %al
544; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
545; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
546; SSE2-SSSE3-NEXT: andb $1, %al
547; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
548; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
549; SSE2-SSSE3-NEXT: andb $1, %al
550; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
551; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
552; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl
553; SSE2-SSSE3-NEXT: andb $1, %cl
554; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
555; SSE2-SSSE3-NEXT: andb $1, %al
556; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp)
557; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
558; SSE2-SSSE3-NEXT: shll $16, %ecx
559; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
560; SSE2-SSSE3-NEXT: orl %ecx, %eax
561; SSE2-SSSE3-NEXT: retq
562;
563; AVX1-LABEL: v32i8:
564; AVX1: ## BB#0:
565; AVX1-NEXT: pushq %rbp
566; AVX1-NEXT: Lcfi0:
567; AVX1-NEXT: .cfi_def_cfa_offset 16
568; AVX1-NEXT: Lcfi1:
569; AVX1-NEXT: .cfi_offset %rbp, -16
570; AVX1-NEXT: movq %rsp, %rbp
571; AVX1-NEXT: Lcfi2:
572; AVX1-NEXT: .cfi_def_cfa_register %rbp
573; AVX1-NEXT: andq $-32, %rsp
574; AVX1-NEXT: subq $32, %rsp
575; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
576; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
577; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
578; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
579; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
580; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
581; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
582; AVX1-NEXT: vpcmpgtb %xmm1, %xmm4, %xmm1
583; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
584; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
585; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
586; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
587; AVX1-NEXT: vpextrb $15, %xmm1, %eax
588; AVX1-NEXT: andb $1, %al
589; AVX1-NEXT: movb %al, (%rsp)
590; AVX1-NEXT: vpextrb $14, %xmm1, %eax
591; AVX1-NEXT: andb $1, %al
592; AVX1-NEXT: movb %al, (%rsp)
593; AVX1-NEXT: vpextrb $13, %xmm1, %eax
594; AVX1-NEXT: andb $1, %al
595; AVX1-NEXT: movb %al, (%rsp)
596; AVX1-NEXT: vpextrb $12, %xmm1, %eax
597; AVX1-NEXT: andb $1, %al
598; AVX1-NEXT: movb %al, (%rsp)
599; AVX1-NEXT: vpextrb $11, %xmm1, %eax
600; AVX1-NEXT: andb $1, %al
601; AVX1-NEXT: movb %al, (%rsp)
602; AVX1-NEXT: vpextrb $10, %xmm1, %eax
603; AVX1-NEXT: andb $1, %al
604; AVX1-NEXT: movb %al, (%rsp)
605; AVX1-NEXT: vpextrb $9, %xmm1, %eax
606; AVX1-NEXT: andb $1, %al
607; AVX1-NEXT: movb %al, (%rsp)
608; AVX1-NEXT: vpextrb $8, %xmm1, %eax
609; AVX1-NEXT: andb $1, %al
610; AVX1-NEXT: movb %al, (%rsp)
611; AVX1-NEXT: vpextrb $7, %xmm1, %eax
612; AVX1-NEXT: andb $1, %al
613; AVX1-NEXT: movb %al, (%rsp)
614; AVX1-NEXT: vpextrb $6, %xmm1, %eax
615; AVX1-NEXT: andb $1, %al
616; AVX1-NEXT: movb %al, (%rsp)
617; AVX1-NEXT: vpextrb $5, %xmm1, %eax
618; AVX1-NEXT: andb $1, %al
619; AVX1-NEXT: movb %al, (%rsp)
620; AVX1-NEXT: vpextrb $4, %xmm1, %eax
621; AVX1-NEXT: andb $1, %al
622; AVX1-NEXT: movb %al, (%rsp)
623; AVX1-NEXT: vpextrb $3, %xmm1, %eax
624; AVX1-NEXT: andb $1, %al
625; AVX1-NEXT: movb %al, (%rsp)
626; AVX1-NEXT: vpextrb $2, %xmm1, %eax
627; AVX1-NEXT: andb $1, %al
628; AVX1-NEXT: movb %al, (%rsp)
629; AVX1-NEXT: vpextrb $1, %xmm1, %eax
630; AVX1-NEXT: andb $1, %al
631; AVX1-NEXT: movb %al, (%rsp)
632; AVX1-NEXT: vpextrb $0, %xmm1, %eax
633; AVX1-NEXT: andb $1, %al
634; AVX1-NEXT: movb %al, (%rsp)
635; AVX1-NEXT: vpextrb $15, %xmm0, %eax
636; AVX1-NEXT: andb $1, %al
637; AVX1-NEXT: movb %al, (%rsp)
638; AVX1-NEXT: vpextrb $14, %xmm0, %eax
639; AVX1-NEXT: andb $1, %al
640; AVX1-NEXT: movb %al, (%rsp)
641; AVX1-NEXT: vpextrb $13, %xmm0, %eax
642; AVX1-NEXT: andb $1, %al
643; AVX1-NEXT: movb %al, (%rsp)
644; AVX1-NEXT: vpextrb $12, %xmm0, %eax
645; AVX1-NEXT: andb $1, %al
646; AVX1-NEXT: movb %al, (%rsp)
647; AVX1-NEXT: vpextrb $11, %xmm0, %eax
648; AVX1-NEXT: andb $1, %al
649; AVX1-NEXT: movb %al, (%rsp)
650; AVX1-NEXT: vpextrb $10, %xmm0, %eax
651; AVX1-NEXT: andb $1, %al
652; AVX1-NEXT: movb %al, (%rsp)
653; AVX1-NEXT: vpextrb $9, %xmm0, %eax
654; AVX1-NEXT: andb $1, %al
655; AVX1-NEXT: movb %al, (%rsp)
656; AVX1-NEXT: vpextrb $8, %xmm0, %eax
657; AVX1-NEXT: andb $1, %al
658; AVX1-NEXT: movb %al, (%rsp)
659; AVX1-NEXT: vpextrb $7, %xmm0, %eax
660; AVX1-NEXT: andb $1, %al
661; AVX1-NEXT: movb %al, (%rsp)
662; AVX1-NEXT: vpextrb $6, %xmm0, %eax
663; AVX1-NEXT: andb $1, %al
664; AVX1-NEXT: movb %al, (%rsp)
665; AVX1-NEXT: vpextrb $5, %xmm0, %eax
666; AVX1-NEXT: andb $1, %al
667; AVX1-NEXT: movb %al, (%rsp)
668; AVX1-NEXT: vpextrb $4, %xmm0, %eax
669; AVX1-NEXT: andb $1, %al
670; AVX1-NEXT: movb %al, (%rsp)
671; AVX1-NEXT: vpextrb $3, %xmm0, %eax
672; AVX1-NEXT: andb $1, %al
673; AVX1-NEXT: movb %al, (%rsp)
674; AVX1-NEXT: vpextrb $2, %xmm0, %eax
675; AVX1-NEXT: andb $1, %al
676; AVX1-NEXT: movb %al, (%rsp)
677; AVX1-NEXT: vpextrb $1, %xmm0, %eax
678; AVX1-NEXT: andb $1, %al
679; AVX1-NEXT: movb %al, (%rsp)
680; AVX1-NEXT: vpextrb $0, %xmm0, %eax
681; AVX1-NEXT: andb $1, %al
682; AVX1-NEXT: movb %al, (%rsp)
683; AVX1-NEXT: movl (%rsp), %eax
684; AVX1-NEXT: movq %rbp, %rsp
685; AVX1-NEXT: popq %rbp
686; AVX1-NEXT: vzeroupper
687; AVX1-NEXT: retq
688;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000689; AVX2-LABEL: v32i8:
690; AVX2: ## BB#0:
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000691; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
692; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
693; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Zvi Rackover76937332017-06-01 11:27:57 +0000694; AVX2-NEXT: vpmovmskb %ymm0, %eax
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000695; AVX2-NEXT: vzeroupper
696; AVX2-NEXT: retq
697;
698; AVX512-LABEL: v32i8:
699; AVX512: ## BB#0:
700; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
701; AVX512-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
702; AVX512-NEXT: kmovd %k0, %eax
703; AVX512-NEXT: vzeroupper
704; AVX512-NEXT: retq
705 %x0 = icmp sgt <32 x i8> %a, %b
706 %x1 = icmp sgt <32 x i8> %c, %d
707 %y = and <32 x i1> %x0, %x1
708 %res = bitcast <32 x i1> %y to i32
709 ret i32 %res
710}