blob: a8522b3c238e0acb61cd0268f0adb52abc0883f0 [file] [log] [blame]
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim55006b42017-07-05 17:30:30 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
Simon Pilgrimc1927f12017-10-31 18:41:48 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +00008
9define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +000010; SSE2-SSSE3-LABEL: v4i64:
Simon Pilgrim55006b42017-07-05 17:30:30 +000011; SSE2-SSSE3: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +000012; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
13; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm3
14; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm1
15; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm9
16; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm9
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
18; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1
19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
20; SSE2-SSSE3-NEXT: pand %xmm10, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
22; SSE2-SSSE3-NEXT: por %xmm1, %xmm3
23; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm2
24; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm0
25; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1
26; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
27; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[0,0,2,2]
28; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
29; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
30; SSE2-SSSE3-NEXT: pand %xmm9, %xmm2
31; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
33; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Zvi Rackover76937332017-06-01 11:27:57 +000034; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm7
35; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm5
36; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1
37; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
38; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
39; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5
40; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
41; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
42; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
43; SSE2-SSSE3-NEXT: por %xmm3, %xmm1
44; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm6
45; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm4
46; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
47; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
48; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
49; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4
50; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
51; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
52; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
53; SSE2-SSSE3-NEXT: por %xmm4, %xmm2
54; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000055; SSE2-SSSE3-NEXT: andps %xmm0, %xmm2
Zvi Rackover76937332017-06-01 11:27:57 +000056; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +000057; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Simon Pilgrim03c87532017-10-23 14:47:49 +000058; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +000059;
60; AVX1-LABEL: v4i64:
Simon Pilgrim55006b42017-07-05 17:30:30 +000061; AVX1: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +000062; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
63; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
64; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
65; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +000066; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +000067; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
68; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
69; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
70; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000071; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Zvi Rackover76937332017-06-01 11:27:57 +000072; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +000074; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackover76937332017-06-01 11:27:57 +000075; AVX1-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +000076; AVX1-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +000077;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000078; AVX2-LABEL: v4i64:
Simon Pilgrim55006b42017-07-05 17:30:30 +000079; AVX2: # BB#0:
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000080; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
81; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +000082; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000083; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
84; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000085; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000086; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +000087; AVX2-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +000088; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000089; AVX2-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +000090; AVX2-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +000091;
Simon Pilgrimc1927f12017-10-31 18:41:48 +000092; AVX512F-LABEL: v4i64:
93; AVX512F: # BB#0:
94; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
95; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
96; AVX512F-NEXT: kmovw %k0, %eax
97; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
98; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
99; AVX512F-NEXT: vzeroupper
100; AVX512F-NEXT: ret{{[l|q]}}
101;
102; AVX512BW-LABEL: v4i64:
103; AVX512BW: # BB#0:
104; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
105; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
106; AVX512BW-NEXT: kmovd %k0, %eax
107; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
108; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
109; AVX512BW-NEXT: vzeroupper
110; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000111 %x0 = icmp sgt <4 x i64> %a, %b
112 %x1 = icmp sgt <4 x i64> %c, %d
113 %y = and <4 x i1> %x0, %x1
114 %res = bitcast <4 x i1> %y to i4
115 ret i4 %res
116}
117
118define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000119; SSE2-SSSE3-LABEL: v4f64:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000120; SSE2-SSSE3: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000121; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3
122; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2
123; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
Zvi Rackover76937332017-06-01 11:27:57 +0000124; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7
125; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6
126; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000127; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6
Zvi Rackover76937332017-06-01 11:27:57 +0000128; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000129; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Simon Pilgrim03c87532017-10-23 14:47:49 +0000130; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000131;
132; AVX12-LABEL: v4f64:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000133; AVX12: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000134; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
135; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000136; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000137; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
138; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000139; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Zvi Rackover76937332017-06-01 11:27:57 +0000140; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
141; AVX12-NEXT: vmovmskps %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000142; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackover76937332017-06-01 11:27:57 +0000143; AVX12-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000144; AVX12-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000145;
Simon Pilgrimc1927f12017-10-31 18:41:48 +0000146; AVX512F-LABEL: v4f64:
147; AVX512F: # BB#0:
148; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1
149; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
150; AVX512F-NEXT: kmovw %k0, %eax
151; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
152; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
153; AVX512F-NEXT: vzeroupper
154; AVX512F-NEXT: ret{{[l|q]}}
155;
156; AVX512BW-LABEL: v4f64:
157; AVX512BW: # BB#0:
158; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1
159; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1}
160; AVX512BW-NEXT: kmovd %k0, %eax
161; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
162; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
163; AVX512BW-NEXT: vzeroupper
164; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000165 %x0 = fcmp ogt <4 x double> %a, %b
166 %x1 = fcmp ogt <4 x double> %c, %d
167 %y = and <4 x i1> %x0, %x1
168 %res = bitcast <4 x i1> %y to i4
169 ret i4 %res
170}
171
172define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000173; SSE2-SSSE3-LABEL: v16i16:
174; SSE2-SSSE3: # BB#0:
175; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1
176; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0
177; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
178; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5
179; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4
180; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4
181; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
182; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
183; SSE2-SSSE3-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
Simon Pilgrim03c87532017-10-23 14:47:49 +0000184; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000185;
186; AVX1-LABEL: v16i16:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000187; AVX1: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000188; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
190; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4
191; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
192; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
193; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
194; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
195; AVX1-NEXT: vpcmpgtw %xmm1, %xmm4, %xmm1
196; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2
197; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
198; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
199; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000200; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
Zvi Rackover76937332017-06-01 11:27:57 +0000201; AVX1-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000202; AVX1-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000203;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000204; AVX2-LABEL: v16i16:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000205; AVX2: # BB#0:
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000206; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
207; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
208; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
209; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1
210; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
211; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
212; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000213; AVX2-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000214; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000215; AVX2-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000216; AVX2-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000217;
Simon Pilgrimc1927f12017-10-31 18:41:48 +0000218; AVX512F-LABEL: v16i16:
219; AVX512F: # BB#0:
220; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
221; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
222; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
223; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
224; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm0
225; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
226; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
227; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
228; AVX512F-NEXT: kmovw %k0, %eax
229; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
230; AVX512F-NEXT: vzeroupper
231; AVX512F-NEXT: ret{{[l|q]}}
232;
233; AVX512BW-LABEL: v16i16:
234; AVX512BW: # BB#0:
235; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1
236; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1}
237; AVX512BW-NEXT: kmovd %k0, %eax
238; AVX512BW-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
239; AVX512BW-NEXT: vzeroupper
240; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000241 %x0 = icmp sgt <16 x i16> %a, %b
242 %x1 = icmp sgt <16 x i16> %c, %d
243 %y = and <16 x i1> %x0, %x1
244 %res = bitcast <16 x i1> %y to i16
245 ret i16 %res
246}
247
248define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
Simon Pilgrim321e54f2017-10-23 22:05:02 +0000249; SSE2-SSSE3-LABEL: v8i32:
250; SSE2-SSSE3: # BB#0:
251; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1
252; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
253; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
254; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
255; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4
256; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4
257; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
258; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4
259; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax
260; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
261; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000262;
263; AVX1-LABEL: v8i32:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000264; AVX1: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000265; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
266; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
267; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
268; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000269; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000270; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
271; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
272; AVX1-NEXT: vpcmpgtd %xmm1, %xmm4, %xmm1
273; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000274; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Zvi Rackover76937332017-06-01 11:27:57 +0000275; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim321e54f2017-10-23 22:05:02 +0000276; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000277; AVX1-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000278; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackover76937332017-06-01 11:27:57 +0000279; AVX1-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000280; AVX1-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000281;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000282; AVX2-LABEL: v8i32:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000283; AVX2: # BB#0:
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000284; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
285; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000286; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000287; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
288; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000289; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000290; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim321e54f2017-10-23 22:05:02 +0000291; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000292; AVX2-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000293; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000294; AVX2-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000295; AVX2-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000296;
Simon Pilgrimc1927f12017-10-31 18:41:48 +0000297; AVX512F-LABEL: v8i32:
298; AVX512F: # BB#0:
299; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
300; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
301; AVX512F-NEXT: kmovw %k0, %eax
302; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
303; AVX512F-NEXT: vzeroupper
304; AVX512F-NEXT: ret{{[l|q]}}
305;
306; AVX512BW-LABEL: v8i32:
307; AVX512BW: # BB#0:
308; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
309; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1}
310; AVX512BW-NEXT: kmovd %k0, %eax
311; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
312; AVX512BW-NEXT: vzeroupper
313; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000314 %x0 = icmp sgt <8 x i32> %a, %b
315 %x1 = icmp sgt <8 x i32> %c, %d
316 %y = and <8 x i1> %x0, %x1
317 %res = bitcast <8 x i1> %y to i8
318 ret i8 %res
319}
320
321define i8 @v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000322; SSE2-SSSE3-LABEL: v8f32:
323; SSE2-SSSE3: # BB#0:
324; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3
325; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2
326; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
327; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7
328; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6
329; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6
330; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6
331; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6
332; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax
333; SSE2-SSSE3-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
334; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000335;
336; AVX12-LABEL: v8f32:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000337; AVX12: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000338; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
339; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000340; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000341; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
342; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000343; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Zvi Rackover76937332017-06-01 11:27:57 +0000344; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrim321e54f2017-10-23 22:05:02 +0000345; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000346; AVX12-NEXT: vpmovmskb %xmm0, %eax
Simon Pilgrim55006b42017-07-05 17:30:30 +0000347; AVX12-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
Zvi Rackover76937332017-06-01 11:27:57 +0000348; AVX12-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000349; AVX12-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000350;
Simon Pilgrimc1927f12017-10-31 18:41:48 +0000351; AVX512F-LABEL: v8f32:
352; AVX512F: # BB#0:
353; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1
354; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
355; AVX512F-NEXT: kmovw %k0, %eax
356; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
357; AVX512F-NEXT: vzeroupper
358; AVX512F-NEXT: ret{{[l|q]}}
359;
360; AVX512BW-LABEL: v8f32:
361; AVX512BW: # BB#0:
362; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1
363; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1}
364; AVX512BW-NEXT: kmovd %k0, %eax
365; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
366; AVX512BW-NEXT: vzeroupper
367; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000368 %x0 = fcmp ogt <8 x float> %a, %b
369 %x1 = fcmp ogt <8 x float> %c, %d
370 %y = and <8 x i1> %x0, %x1
371 %res = bitcast <8 x i1> %y to i8
372 ret i8 %res
373}
374
375define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
Zvi Rackover76937332017-06-01 11:27:57 +0000376; SSE2-SSSE3-LABEL: v32i8:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000377; SSE2-SSSE3: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000378; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
379; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
380; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
Michael Zuckermanf6684002017-06-28 11:23:31 +0000381; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
Zvi Rackover76937332017-06-01 11:27:57 +0000382; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
383; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
Simon Pilgrim32c377a2017-07-21 09:58:50 +0000384; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx
385; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax
386; SSE2-SSSE3-NEXT: shll $16, %eax
Zvi Rackover76937332017-06-01 11:27:57 +0000387; SSE2-SSSE3-NEXT: orl %ecx, %eax
Simon Pilgrim03c87532017-10-23 14:47:49 +0000388; SSE2-SSSE3-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000389;
390; AVX1-LABEL: v32i8:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000391; AVX1: # BB#0:
Zvi Rackover76937332017-06-01 11:27:57 +0000392; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
393; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
394; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4
395; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
Zvi Rackover76937332017-06-01 11:27:57 +0000396; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
Simon Pilgrim32c377a2017-07-21 09:58:50 +0000397; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
398; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1
399; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
Zvi Rackover76937332017-06-01 11:27:57 +0000400; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2
Simon Pilgrim32c377a2017-07-21 09:58:50 +0000401; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
402; AVX1-NEXT: vpmovmskb %xmm0, %ecx
403; AVX1-NEXT: vpmovmskb %xmm1, %eax
404; AVX1-NEXT: shll $16, %eax
405; AVX1-NEXT: orl %ecx, %eax
Zvi Rackover76937332017-06-01 11:27:57 +0000406; AVX1-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000407; AVX1-NEXT: ret{{[l|q]}}
Zvi Rackover76937332017-06-01 11:27:57 +0000408;
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000409; AVX2-LABEL: v32i8:
Simon Pilgrim55006b42017-07-05 17:30:30 +0000410; AVX2: # BB#0:
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000411; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
412; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
413; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Zvi Rackover76937332017-06-01 11:27:57 +0000414; AVX2-NEXT: vpmovmskb %ymm0, %eax
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000415; AVX2-NEXT: vzeroupper
Simon Pilgrim03c87532017-10-23 14:47:49 +0000416; AVX2-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000417;
Simon Pilgrimc1927f12017-10-31 18:41:48 +0000418; AVX512F-LABEL: v32i8:
419; AVX512F: # BB#0:
420; AVX512F-NEXT: pushq %rbp
421; AVX512F-NEXT: .cfi_def_cfa_offset 16
422; AVX512F-NEXT: .cfi_offset %rbp, -16
423; AVX512F-NEXT: movq %rsp, %rbp
424; AVX512F-NEXT: .cfi_def_cfa_register %rbp
425; AVX512F-NEXT: andq $-32, %rsp
426; AVX512F-NEXT: subq $32, %rsp
427; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
428; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
429; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
430; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
431; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
432; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
433; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
434; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
435; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
436; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
437; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
438; AVX512F-NEXT: kmovw %k0, (%rsp)
439; AVX512F-NEXT: movl (%rsp), %eax
440; AVX512F-NEXT: movq %rbp, %rsp
441; AVX512F-NEXT: popq %rbp
442; AVX512F-NEXT: vzeroupper
443; AVX512F-NEXT: ret{{[l|q]}}
444;
445; AVX512BW-LABEL: v32i8:
446; AVX512BW: # BB#0:
447; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
448; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1}
449; AVX512BW-NEXT: kmovd %k0, %eax
450; AVX512BW-NEXT: vzeroupper
451; AVX512BW-NEXT: ret{{[l|q]}}
Zvi Rackoverc7bf2a12017-05-29 19:00:57 +0000452 %x0 = icmp sgt <32 x i8> %a, %b
453 %x1 = icmp sgt <32 x i8> %c, %d
454 %y = and <32 x i1> %x0, %x1
455 %res = bitcast <32 x i1> %y to i32
456 ret i32 %res
457}