blob: 78d47001c6e8d4c463bc2ee66b16e538c62ac3ca [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000010; SSE: # %bb.0:
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000013; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000015; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
Craig Toppercf772032017-12-14 06:49:07 +000018; SSE-NEXT: pcmpgtq %xmm5, %xmm1
19; SSE-NEXT: pcmpgtq %xmm4, %xmm0
20; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
21; SSE-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000022; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000023; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
Craig Toppercf772032017-12-14 06:49:07 +000024; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm11[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000025; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000026; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
27; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
Craig Toppercf772032017-12-14 06:49:07 +000028; SSE-NEXT: packssdw %xmm10, %xmm8
29; SSE-NEXT: pand %xmm0, %xmm8
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000030; SSE-NEXT: packsswb %xmm0, %xmm8
31; SSE-NEXT: pmovmskb %xmm8, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +000032; SSE-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrima5793822017-10-31 18:43:24 +000033; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000034;
35; AVX1-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000037; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
38; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
39; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
40; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +000041; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000042; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
43; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
44; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
45; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
46; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
47; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +000048; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000049; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
50; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000051; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
52; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
53; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
54; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000055; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000056; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
57; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
58; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
59; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
60; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +000061; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +000062; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
63; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000064; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000065; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000066; AVX1-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +000067; AVX1-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000068; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +000069; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000070;
71; AVX2-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000072; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000073; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
74; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +000075; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000076; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
77; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
78; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
79; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000080; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000081; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
82; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000083; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
84; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000085; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000086; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
87; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
88; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
Simon Pilgrim0a12c232017-10-24 15:38:16 +000089; AVX2-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +000090; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
91; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000092; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000093; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000094; AVX2-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +000095; AVX2-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000096; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +000097; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000098;
99; AVX512F-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000101; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
102; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
103; AVX512F-NEXT: kmovw %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000104; AVX512F-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000105; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000106; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000107;
108; AVX512BW-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000109; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000110; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
111; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
112; AVX512BW-NEXT: kmovd %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000113; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000114; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000115; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000116 %x0 = icmp sgt <8 x i64> %a, %b
117 %x1 = icmp sgt <8 x i64> %c, %d
118 %y = and <8 x i1> %x0, %x1
119 %res = bitcast <8 x i1> %y to i8
120 ret i8 %res
121}
122
123define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
124; SSE-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000125; SSE: # %bb.0:
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000126; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000127; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000128; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000129; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000130; SSE-NEXT: cmpltpd %xmm3, %xmm7
131; SSE-NEXT: cmpltpd %xmm2, %xmm6
132; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Craig Toppercf772032017-12-14 06:49:07 +0000133; SSE-NEXT: cmpltpd %xmm1, %xmm5
134; SSE-NEXT: cmpltpd %xmm0, %xmm4
135; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2]
136; SSE-NEXT: packssdw %xmm6, %xmm4
Simon Pilgrim11e29692017-09-14 10:30:22 +0000137; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000138; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
Craig Toppercf772032017-12-14 06:49:07 +0000139; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm11[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000140; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000141; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
142; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
Craig Toppercf772032017-12-14 06:49:07 +0000143; SSE-NEXT: packssdw %xmm10, %xmm8
144; SSE-NEXT: pand %xmm4, %xmm8
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000145; SSE-NEXT: packsswb %xmm0, %xmm8
146; SSE-NEXT: pmovmskb %xmm8, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000147; SSE-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000148; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000149;
150; AVX12-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000151; AVX12: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000152; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
153; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000154; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000155; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
156; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
157; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
158; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000159; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
161; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000162; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
163; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000164; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000165; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
166; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
167; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000168; AVX12-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000169; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
170; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000171; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000172; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000173; AVX12-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000174; AVX12-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000175; AVX12-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000176; AVX12-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000177;
178; AVX512F-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000179; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000180; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
181; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
182; AVX512F-NEXT: kmovw %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000183; AVX512F-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000184; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000185; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000186;
187; AVX512BW-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000188; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000189; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
190; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
191; AVX512BW-NEXT: kmovd %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000192; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000193; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000194; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000195 %x0 = fcmp ogt <8 x double> %a, %b
196 %x1 = fcmp ogt <8 x double> %c, %d
197 %y = and <8 x i1> %x0, %x1
198 %res = bitcast <8 x i1> %y to i8
199 ret i8 %res
200}
201
202define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
203; SSE-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000204; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000205; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000206; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000207; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000208; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
209; SSE-NEXT: pcmpgtw %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000210; SSE-NEXT: pcmpgtw %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000211; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000212; SSE-NEXT: pcmpgtw %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000213; SSE-NEXT: pcmpgtw %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000214; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000215; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000216; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000217; SSE-NEXT: packsswb %xmm11, %xmm10
218; SSE-NEXT: pand %xmm0, %xmm10
219; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000220; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000221; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000222; SSE-NEXT: pand %xmm2, %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000223; SSE-NEXT: pmovmskb %xmm10, %ecx
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000224; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000225; SSE-NEXT: shll $16, %eax
226; SSE-NEXT: orl %ecx, %eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000227; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000228;
229; AVX1-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000230; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000231; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
232; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
233; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
234; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
235; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
236; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
238; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
239; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
240; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
241; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
242; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
243; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
244; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
245; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
246; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
247; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
248; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
249; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
250; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
251; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
252; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
253; AVX1-NEXT: vpmovmskb %xmm0, %ecx
254; AVX1-NEXT: vpmovmskb %xmm1, %eax
255; AVX1-NEXT: shll $16, %eax
256; AVX1-NEXT: orl %ecx, %eax
257; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000258; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000259;
260; AVX2-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000261; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000262; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
263; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
264; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
265; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
266; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
267; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
268; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
269; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
270; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
271; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
272; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
273; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
274; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
275; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
276; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
277; AVX2-NEXT: vpmovmskb %ymm0, %eax
278; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000279; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000280;
281; AVX512F-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000282; AVX512F: # %bb.0:
Craig Toppereab2d462017-12-14 08:25:58 +0000283; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000284; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
Craig Topper76adcc82018-01-23 14:25:39 +0000285; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
286; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
287; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
288; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
289; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0
290; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
291; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1
292; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
293; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
294; AVX512F-NEXT: kmovw %k0, %ecx
295; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
296; AVX512F-NEXT: kmovw %k0, %eax
297; AVX512F-NEXT: shll $16, %eax
298; AVX512F-NEXT: orl %ecx, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000299; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000300; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000301;
302; AVX512BW-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000303; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000304; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
305; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
306; AVX512BW-NEXT: kmovd %k0, %eax
307; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000308; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000309 %x0 = icmp sgt <32 x i16> %a, %b
310 %x1 = icmp sgt <32 x i16> %c, %d
311 %y = and <32 x i1> %x0, %x1
312 %res = bitcast <32 x i1> %y to i32
313 ret i32 %res
314}
315
316define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
317; SSE-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000318; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000319; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000320; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000321; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000322; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
323; SSE-NEXT: pcmpgtd %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000324; SSE-NEXT: pcmpgtd %xmm6, %xmm2
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000325; SSE-NEXT: packssdw %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000326; SSE-NEXT: pcmpgtd %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000327; SSE-NEXT: pcmpgtd %xmm4, %xmm0
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000328; SSE-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000329; SSE-NEXT: packsswb %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000330; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000331; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000332; SSE-NEXT: packssdw %xmm11, %xmm10
333; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000334; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000335; SSE-NEXT: packssdw %xmm9, %xmm8
336; SSE-NEXT: packsswb %xmm10, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000337; SSE-NEXT: pand %xmm0, %xmm8
338; SSE-NEXT: pmovmskb %xmm8, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000339; SSE-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000340; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000341;
342; AVX1-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000343; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000344; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
345; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
346; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
347; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000348; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000349; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
351; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
352; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000353; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
354; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000355; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
356; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
357; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
358; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000359; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000360; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
361; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
362; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
363; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000364; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
365; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000366; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
367; AVX1-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000368; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000369; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000370; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000371;
372; AVX2-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000373; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000374; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
375; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000376; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000377; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
378; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000379; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
380; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000381; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
382; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000383; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000384; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000385; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
386; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
387; AVX2-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000388; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
389; AVX2-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000390; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000391; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000392; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000393;
394; AVX512F-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000395; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000396; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
397; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
398; AVX512F-NEXT: kmovw %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000399; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000400; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000401; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000402;
403; AVX512BW-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000404; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000405; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
406; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
407; AVX512BW-NEXT: kmovd %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000408; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000409; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000410; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000411 %x0 = icmp sgt <16 x i32> %a, %b
412 %x1 = icmp sgt <16 x i32> %c, %d
413 %y = and <16 x i1> %x0, %x1
414 %res = bitcast <16 x i1> %y to i16
415 ret i16 %res
416}
417
418define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
419; SSE-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000420; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000421; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000422; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000423; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000424; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
425; SSE-NEXT: cmpltps %xmm3, %xmm7
Simon Pilgrim11e29692017-09-14 10:30:22 +0000426; SSE-NEXT: cmpltps %xmm2, %xmm6
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000427; SSE-NEXT: packssdw %xmm7, %xmm6
Simon Pilgrim11e29692017-09-14 10:30:22 +0000428; SSE-NEXT: cmpltps %xmm1, %xmm5
Simon Pilgrim11e29692017-09-14 10:30:22 +0000429; SSE-NEXT: cmpltps %xmm0, %xmm4
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000430; SSE-NEXT: packssdw %xmm5, %xmm4
431; SSE-NEXT: packsswb %xmm6, %xmm4
Simon Pilgrim11e29692017-09-14 10:30:22 +0000432; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000433; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000434; SSE-NEXT: packssdw %xmm11, %xmm10
435; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000436; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000437; SSE-NEXT: packssdw %xmm9, %xmm8
438; SSE-NEXT: packsswb %xmm10, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000439; SSE-NEXT: pand %xmm4, %xmm8
440; SSE-NEXT: pmovmskb %xmm8, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000441; SSE-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000442; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000443;
444; AVX12-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000445; AVX12: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000446; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
447; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000448; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000449; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
450; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000451; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000452; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000453; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
454; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000455; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000456; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000457; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3
458; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
459; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000460; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
461; AVX12-NEXT: vpmovmskb %xmm0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000462; AVX12-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000463; AVX12-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000464; AVX12-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000465;
466; AVX512F-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000467; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000468; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
469; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
470; AVX512F-NEXT: kmovw %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000471; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000472; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000473; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000474;
475; AVX512BW-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000476; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000477; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
478; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
479; AVX512BW-NEXT: kmovd %k0, %eax
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000480; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000481; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000482; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000483 %x0 = fcmp ogt <16 x float> %a, %b
484 %x1 = fcmp ogt <16 x float> %c, %d
485 %y = and <16 x i1> %x0, %x1
486 %res = bitcast <16 x i1> %y to i16
487 ret i16 %res
488}
489
490define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
491; SSE-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000492; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000493; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
494; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
495; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
496; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
497; SSE-NEXT: pcmpgtb %xmm6, %xmm2
498; SSE-NEXT: pcmpgtb %xmm7, %xmm3
499; SSE-NEXT: pcmpgtb %xmm4, %xmm0
500; SSE-NEXT: pcmpgtb %xmm5, %xmm1
501; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
502; SSE-NEXT: pand %xmm2, %xmm8
503; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
504; SSE-NEXT: pand %xmm3, %xmm9
505; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
506; SSE-NEXT: pand %xmm0, %xmm10
507; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
508; SSE-NEXT: pand %xmm1, %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000509; SSE-NEXT: pextrb $1, %xmm11, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000510; SSE-NEXT: andl $1, %eax
511; SSE-NEXT: pextrb $0, %xmm11, %ecx
512; SSE-NEXT: andl $1, %ecx
513; SSE-NEXT: leal (%rcx,%rax,2), %eax
514; SSE-NEXT: pextrb $2, %xmm11, %ecx
515; SSE-NEXT: andl $1, %ecx
516; SSE-NEXT: leal (%rax,%rcx,4), %eax
517; SSE-NEXT: pextrb $3, %xmm11, %ecx
518; SSE-NEXT: andl $1, %ecx
519; SSE-NEXT: leal (%rax,%rcx,8), %eax
520; SSE-NEXT: pextrb $4, %xmm11, %ecx
521; SSE-NEXT: andl $1, %ecx
522; SSE-NEXT: shll $4, %ecx
523; SSE-NEXT: orl %eax, %ecx
524; SSE-NEXT: pextrb $5, %xmm11, %eax
525; SSE-NEXT: andl $1, %eax
526; SSE-NEXT: shll $5, %eax
527; SSE-NEXT: orl %ecx, %eax
528; SSE-NEXT: pextrb $6, %xmm11, %ecx
529; SSE-NEXT: andl $1, %ecx
530; SSE-NEXT: shll $6, %ecx
531; SSE-NEXT: pextrb $7, %xmm11, %edx
532; SSE-NEXT: andl $1, %edx
533; SSE-NEXT: shll $7, %edx
534; SSE-NEXT: orl %ecx, %edx
535; SSE-NEXT: pextrb $8, %xmm11, %ecx
536; SSE-NEXT: andl $1, %ecx
537; SSE-NEXT: shll $8, %ecx
538; SSE-NEXT: orl %edx, %ecx
539; SSE-NEXT: pextrb $9, %xmm11, %edx
540; SSE-NEXT: andl $1, %edx
541; SSE-NEXT: shll $9, %edx
542; SSE-NEXT: orl %ecx, %edx
543; SSE-NEXT: pextrb $10, %xmm11, %ecx
544; SSE-NEXT: andl $1, %ecx
545; SSE-NEXT: shll $10, %ecx
546; SSE-NEXT: orl %edx, %ecx
547; SSE-NEXT: pextrb $11, %xmm11, %edx
548; SSE-NEXT: andl $1, %edx
549; SSE-NEXT: shll $11, %edx
550; SSE-NEXT: orl %ecx, %edx
551; SSE-NEXT: pextrb $12, %xmm11, %ecx
552; SSE-NEXT: andl $1, %ecx
553; SSE-NEXT: shll $12, %ecx
554; SSE-NEXT: orl %edx, %ecx
555; SSE-NEXT: pextrb $13, %xmm11, %edx
556; SSE-NEXT: andl $1, %edx
557; SSE-NEXT: shll $13, %edx
558; SSE-NEXT: orl %ecx, %edx
559; SSE-NEXT: pextrb $14, %xmm11, %ecx
560; SSE-NEXT: andl $1, %ecx
561; SSE-NEXT: shll $14, %ecx
562; SSE-NEXT: orl %edx, %ecx
563; SSE-NEXT: pextrb $15, %xmm11, %edx
564; SSE-NEXT: shll $15, %edx
565; SSE-NEXT: orl %ecx, %edx
566; SSE-NEXT: orl %eax, %edx
567; SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +0000568; SSE-NEXT: pextrb $1, %xmm10, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000569; SSE-NEXT: andl $1, %eax
570; SSE-NEXT: pextrb $0, %xmm10, %ecx
571; SSE-NEXT: andl $1, %ecx
572; SSE-NEXT: leal (%rcx,%rax,2), %eax
573; SSE-NEXT: pextrb $2, %xmm10, %ecx
574; SSE-NEXT: andl $1, %ecx
575; SSE-NEXT: leal (%rax,%rcx,4), %eax
576; SSE-NEXT: pextrb $3, %xmm10, %ecx
577; SSE-NEXT: andl $1, %ecx
578; SSE-NEXT: leal (%rax,%rcx,8), %eax
579; SSE-NEXT: pextrb $4, %xmm10, %ecx
580; SSE-NEXT: andl $1, %ecx
581; SSE-NEXT: shll $4, %ecx
582; SSE-NEXT: orl %eax, %ecx
583; SSE-NEXT: pextrb $5, %xmm10, %eax
584; SSE-NEXT: andl $1, %eax
585; SSE-NEXT: shll $5, %eax
586; SSE-NEXT: orl %ecx, %eax
587; SSE-NEXT: pextrb $6, %xmm10, %ecx
588; SSE-NEXT: andl $1, %ecx
589; SSE-NEXT: shll $6, %ecx
590; SSE-NEXT: pextrb $7, %xmm10, %edx
591; SSE-NEXT: andl $1, %edx
592; SSE-NEXT: shll $7, %edx
593; SSE-NEXT: orl %ecx, %edx
594; SSE-NEXT: pextrb $8, %xmm10, %ecx
595; SSE-NEXT: andl $1, %ecx
596; SSE-NEXT: shll $8, %ecx
597; SSE-NEXT: orl %edx, %ecx
598; SSE-NEXT: pextrb $9, %xmm10, %edx
599; SSE-NEXT: andl $1, %edx
600; SSE-NEXT: shll $9, %edx
601; SSE-NEXT: orl %ecx, %edx
602; SSE-NEXT: pextrb $10, %xmm10, %ecx
603; SSE-NEXT: andl $1, %ecx
604; SSE-NEXT: shll $10, %ecx
605; SSE-NEXT: orl %edx, %ecx
606; SSE-NEXT: pextrb $11, %xmm10, %edx
607; SSE-NEXT: andl $1, %edx
608; SSE-NEXT: shll $11, %edx
609; SSE-NEXT: orl %ecx, %edx
610; SSE-NEXT: pextrb $12, %xmm10, %ecx
611; SSE-NEXT: andl $1, %ecx
612; SSE-NEXT: shll $12, %ecx
613; SSE-NEXT: orl %edx, %ecx
614; SSE-NEXT: pextrb $13, %xmm10, %edx
615; SSE-NEXT: andl $1, %edx
616; SSE-NEXT: shll $13, %edx
617; SSE-NEXT: orl %ecx, %edx
618; SSE-NEXT: pextrb $14, %xmm10, %ecx
619; SSE-NEXT: andl $1, %ecx
620; SSE-NEXT: shll $14, %ecx
621; SSE-NEXT: orl %edx, %ecx
622; SSE-NEXT: pextrb $15, %xmm10, %edx
623; SSE-NEXT: shll $15, %edx
624; SSE-NEXT: orl %ecx, %edx
625; SSE-NEXT: orl %eax, %edx
626; SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +0000627; SSE-NEXT: pextrb $1, %xmm9, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000628; SSE-NEXT: andl $1, %eax
629; SSE-NEXT: pextrb $0, %xmm9, %ecx
630; SSE-NEXT: andl $1, %ecx
631; SSE-NEXT: leal (%rcx,%rax,2), %eax
632; SSE-NEXT: pextrb $2, %xmm9, %ecx
633; SSE-NEXT: andl $1, %ecx
634; SSE-NEXT: leal (%rax,%rcx,4), %eax
635; SSE-NEXT: pextrb $3, %xmm9, %ecx
636; SSE-NEXT: andl $1, %ecx
637; SSE-NEXT: leal (%rax,%rcx,8), %eax
638; SSE-NEXT: pextrb $4, %xmm9, %ecx
639; SSE-NEXT: andl $1, %ecx
640; SSE-NEXT: shll $4, %ecx
641; SSE-NEXT: orl %eax, %ecx
642; SSE-NEXT: pextrb $5, %xmm9, %eax
643; SSE-NEXT: andl $1, %eax
644; SSE-NEXT: shll $5, %eax
645; SSE-NEXT: orl %ecx, %eax
646; SSE-NEXT: pextrb $6, %xmm9, %ecx
647; SSE-NEXT: andl $1, %ecx
648; SSE-NEXT: shll $6, %ecx
649; SSE-NEXT: pextrb $7, %xmm9, %edx
650; SSE-NEXT: andl $1, %edx
651; SSE-NEXT: shll $7, %edx
652; SSE-NEXT: orl %ecx, %edx
653; SSE-NEXT: pextrb $8, %xmm9, %ecx
654; SSE-NEXT: andl $1, %ecx
655; SSE-NEXT: shll $8, %ecx
656; SSE-NEXT: orl %edx, %ecx
657; SSE-NEXT: pextrb $9, %xmm9, %edx
658; SSE-NEXT: andl $1, %edx
659; SSE-NEXT: shll $9, %edx
660; SSE-NEXT: orl %ecx, %edx
661; SSE-NEXT: pextrb $10, %xmm9, %ecx
662; SSE-NEXT: andl $1, %ecx
663; SSE-NEXT: shll $10, %ecx
664; SSE-NEXT: orl %edx, %ecx
665; SSE-NEXT: pextrb $11, %xmm9, %edx
666; SSE-NEXT: andl $1, %edx
667; SSE-NEXT: shll $11, %edx
668; SSE-NEXT: orl %ecx, %edx
669; SSE-NEXT: pextrb $12, %xmm9, %ecx
670; SSE-NEXT: andl $1, %ecx
671; SSE-NEXT: shll $12, %ecx
672; SSE-NEXT: orl %edx, %ecx
673; SSE-NEXT: pextrb $13, %xmm9, %edx
674; SSE-NEXT: andl $1, %edx
675; SSE-NEXT: shll $13, %edx
676; SSE-NEXT: orl %ecx, %edx
677; SSE-NEXT: pextrb $14, %xmm9, %ecx
678; SSE-NEXT: andl $1, %ecx
679; SSE-NEXT: shll $14, %ecx
680; SSE-NEXT: orl %edx, %ecx
681; SSE-NEXT: pextrb $15, %xmm9, %edx
682; SSE-NEXT: shll $15, %edx
683; SSE-NEXT: orl %ecx, %edx
684; SSE-NEXT: orl %eax, %edx
685; SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +0000686; SSE-NEXT: pextrb $1, %xmm8, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000687; SSE-NEXT: andl $1, %eax
688; SSE-NEXT: pextrb $0, %xmm8, %ecx
689; SSE-NEXT: andl $1, %ecx
690; SSE-NEXT: leal (%rcx,%rax,2), %eax
691; SSE-NEXT: pextrb $2, %xmm8, %ecx
692; SSE-NEXT: andl $1, %ecx
693; SSE-NEXT: leal (%rax,%rcx,4), %eax
694; SSE-NEXT: pextrb $3, %xmm8, %ecx
695; SSE-NEXT: andl $1, %ecx
696; SSE-NEXT: leal (%rax,%rcx,8), %eax
697; SSE-NEXT: pextrb $4, %xmm8, %ecx
698; SSE-NEXT: andl $1, %ecx
699; SSE-NEXT: shll $4, %ecx
700; SSE-NEXT: orl %eax, %ecx
701; SSE-NEXT: pextrb $5, %xmm8, %eax
702; SSE-NEXT: andl $1, %eax
703; SSE-NEXT: shll $5, %eax
704; SSE-NEXT: orl %ecx, %eax
705; SSE-NEXT: pextrb $6, %xmm8, %ecx
706; SSE-NEXT: andl $1, %ecx
707; SSE-NEXT: shll $6, %ecx
708; SSE-NEXT: pextrb $7, %xmm8, %edx
709; SSE-NEXT: andl $1, %edx
710; SSE-NEXT: shll $7, %edx
711; SSE-NEXT: orl %ecx, %edx
712; SSE-NEXT: pextrb $8, %xmm8, %ecx
713; SSE-NEXT: andl $1, %ecx
714; SSE-NEXT: shll $8, %ecx
715; SSE-NEXT: orl %edx, %ecx
716; SSE-NEXT: pextrb $9, %xmm8, %edx
717; SSE-NEXT: andl $1, %edx
718; SSE-NEXT: shll $9, %edx
719; SSE-NEXT: orl %ecx, %edx
720; SSE-NEXT: pextrb $10, %xmm8, %ecx
721; SSE-NEXT: andl $1, %ecx
722; SSE-NEXT: shll $10, %ecx
723; SSE-NEXT: orl %edx, %ecx
724; SSE-NEXT: pextrb $11, %xmm8, %edx
725; SSE-NEXT: andl $1, %edx
726; SSE-NEXT: shll $11, %edx
727; SSE-NEXT: orl %ecx, %edx
728; SSE-NEXT: pextrb $12, %xmm8, %ecx
729; SSE-NEXT: andl $1, %ecx
730; SSE-NEXT: shll $12, %ecx
731; SSE-NEXT: orl %edx, %ecx
732; SSE-NEXT: pextrb $13, %xmm8, %edx
733; SSE-NEXT: andl $1, %edx
734; SSE-NEXT: shll $13, %edx
735; SSE-NEXT: orl %ecx, %edx
736; SSE-NEXT: pextrb $14, %xmm8, %ecx
737; SSE-NEXT: andl $1, %ecx
738; SSE-NEXT: shll $14, %ecx
739; SSE-NEXT: orl %edx, %ecx
740; SSE-NEXT: pextrb $15, %xmm8, %edx
741; SSE-NEXT: shll $15, %edx
742; SSE-NEXT: orl %ecx, %edx
743; SSE-NEXT: orl %eax, %edx
744; SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +0000745; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
746; SSE-NEXT: shll $16, %eax
747; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
748; SSE-NEXT: orl %eax, %ecx
749; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
750; SSE-NEXT: shll $16, %edx
751; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
752; SSE-NEXT: orl %edx, %eax
753; SSE-NEXT: shlq $32, %rax
754; SSE-NEXT: orq %rcx, %rax
Simon Pilgrima5793822017-10-31 18:43:24 +0000755; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000756;
757; AVX1-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000758; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000759; AVX1-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000760; AVX1-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +0000761; AVX1-NEXT: .cfi_offset %rbp, -16
762; AVX1-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000763; AVX1-NEXT: .cfi_def_cfa_register %rbp
764; AVX1-NEXT: andq $-32, %rsp
765; AVX1-NEXT: subq $64, %rsp
766; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
767; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
768; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
769; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
770; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
771; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
772; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
773; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
774; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
775; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
776; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
777; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
778; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
779; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
780; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
781; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
782; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
783; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
784; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
785; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
786; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
787; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000788; AVX1-NEXT: vpextrb $1, %xmm1, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000789; AVX1-NEXT: andl $1, %eax
790; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
791; AVX1-NEXT: andl $1, %ecx
792; AVX1-NEXT: leal (%rcx,%rax,2), %eax
793; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
794; AVX1-NEXT: andl $1, %ecx
795; AVX1-NEXT: leal (%rax,%rcx,4), %eax
796; AVX1-NEXT: vpextrb $3, %xmm1, %ecx
797; AVX1-NEXT: andl $1, %ecx
798; AVX1-NEXT: leal (%rax,%rcx,8), %eax
799; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
800; AVX1-NEXT: andl $1, %ecx
801; AVX1-NEXT: shll $4, %ecx
802; AVX1-NEXT: orl %eax, %ecx
Simon Pilgrim11e29692017-09-14 10:30:22 +0000803; AVX1-NEXT: vpextrb $5, %xmm1, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000804; AVX1-NEXT: andl $1, %eax
805; AVX1-NEXT: shll $5, %eax
806; AVX1-NEXT: orl %ecx, %eax
807; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
808; AVX1-NEXT: andl $1, %ecx
809; AVX1-NEXT: shll $6, %ecx
810; AVX1-NEXT: vpextrb $7, %xmm1, %edx
811; AVX1-NEXT: andl $1, %edx
812; AVX1-NEXT: shll $7, %edx
813; AVX1-NEXT: orl %ecx, %edx
814; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
815; AVX1-NEXT: andl $1, %ecx
816; AVX1-NEXT: shll $8, %ecx
817; AVX1-NEXT: orl %edx, %ecx
818; AVX1-NEXT: vpextrb $9, %xmm1, %edx
819; AVX1-NEXT: andl $1, %edx
820; AVX1-NEXT: shll $9, %edx
821; AVX1-NEXT: orl %ecx, %edx
822; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
823; AVX1-NEXT: andl $1, %ecx
824; AVX1-NEXT: shll $10, %ecx
825; AVX1-NEXT: orl %edx, %ecx
826; AVX1-NEXT: vpextrb $11, %xmm1, %edx
827; AVX1-NEXT: andl $1, %edx
828; AVX1-NEXT: shll $11, %edx
829; AVX1-NEXT: orl %ecx, %edx
830; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
831; AVX1-NEXT: andl $1, %ecx
832; AVX1-NEXT: shll $12, %ecx
833; AVX1-NEXT: orl %edx, %ecx
834; AVX1-NEXT: vpextrb $13, %xmm1, %edx
835; AVX1-NEXT: andl $1, %edx
836; AVX1-NEXT: shll $13, %edx
837; AVX1-NEXT: orl %ecx, %edx
838; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
839; AVX1-NEXT: andl $1, %ecx
840; AVX1-NEXT: shll $14, %ecx
841; AVX1-NEXT: orl %edx, %ecx
842; AVX1-NEXT: vpextrb $15, %xmm1, %edx
843; AVX1-NEXT: andl $1, %edx
844; AVX1-NEXT: shll $15, %edx
845; AVX1-NEXT: orl %ecx, %edx
846; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
847; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
848; AVX1-NEXT: andl $1, %ecx
849; AVX1-NEXT: shll $16, %ecx
850; AVX1-NEXT: orl %edx, %ecx
851; AVX1-NEXT: vpextrb $1, %xmm1, %edx
852; AVX1-NEXT: andl $1, %edx
853; AVX1-NEXT: shll $17, %edx
854; AVX1-NEXT: orl %ecx, %edx
855; AVX1-NEXT: vpextrb $2, %xmm1, %ecx
856; AVX1-NEXT: andl $1, %ecx
857; AVX1-NEXT: shll $18, %ecx
858; AVX1-NEXT: orl %edx, %ecx
859; AVX1-NEXT: vpextrb $3, %xmm1, %edx
860; AVX1-NEXT: andl $1, %edx
861; AVX1-NEXT: shll $19, %edx
862; AVX1-NEXT: orl %ecx, %edx
863; AVX1-NEXT: vpextrb $4, %xmm1, %ecx
864; AVX1-NEXT: andl $1, %ecx
865; AVX1-NEXT: shll $20, %ecx
866; AVX1-NEXT: orl %edx, %ecx
867; AVX1-NEXT: vpextrb $5, %xmm1, %edx
868; AVX1-NEXT: andl $1, %edx
869; AVX1-NEXT: shll $21, %edx
870; AVX1-NEXT: orl %ecx, %edx
871; AVX1-NEXT: vpextrb $6, %xmm1, %ecx
872; AVX1-NEXT: andl $1, %ecx
873; AVX1-NEXT: shll $22, %ecx
874; AVX1-NEXT: orl %edx, %ecx
875; AVX1-NEXT: vpextrb $7, %xmm1, %edx
876; AVX1-NEXT: andl $1, %edx
877; AVX1-NEXT: shll $23, %edx
878; AVX1-NEXT: orl %ecx, %edx
879; AVX1-NEXT: vpextrb $8, %xmm1, %ecx
880; AVX1-NEXT: andl $1, %ecx
881; AVX1-NEXT: shll $24, %ecx
882; AVX1-NEXT: orl %edx, %ecx
883; AVX1-NEXT: vpextrb $9, %xmm1, %edx
884; AVX1-NEXT: andl $1, %edx
885; AVX1-NEXT: shll $25, %edx
886; AVX1-NEXT: orl %ecx, %edx
887; AVX1-NEXT: vpextrb $10, %xmm1, %ecx
888; AVX1-NEXT: andl $1, %ecx
889; AVX1-NEXT: shll $26, %ecx
890; AVX1-NEXT: orl %edx, %ecx
891; AVX1-NEXT: vpextrb $11, %xmm1, %edx
892; AVX1-NEXT: andl $1, %edx
893; AVX1-NEXT: shll $27, %edx
894; AVX1-NEXT: orl %ecx, %edx
895; AVX1-NEXT: vpextrb $12, %xmm1, %ecx
896; AVX1-NEXT: andl $1, %ecx
897; AVX1-NEXT: shll $28, %ecx
898; AVX1-NEXT: orl %edx, %ecx
899; AVX1-NEXT: vpextrb $13, %xmm1, %edx
900; AVX1-NEXT: andl $1, %edx
901; AVX1-NEXT: shll $29, %edx
902; AVX1-NEXT: orl %ecx, %edx
903; AVX1-NEXT: vpextrb $14, %xmm1, %ecx
904; AVX1-NEXT: andl $1, %ecx
905; AVX1-NEXT: shll $30, %ecx
906; AVX1-NEXT: orl %edx, %ecx
907; AVX1-NEXT: vpextrb $15, %xmm1, %edx
908; AVX1-NEXT: shll $31, %edx
909; AVX1-NEXT: orl %ecx, %edx
910; AVX1-NEXT: orl %eax, %edx
911; AVX1-NEXT: movl %edx, (%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +0000912; AVX1-NEXT: vpextrb $1, %xmm0, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +0000913; AVX1-NEXT: andl $1, %eax
914; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
915; AVX1-NEXT: andl $1, %ecx
916; AVX1-NEXT: leal (%rcx,%rax,2), %eax
917; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
918; AVX1-NEXT: andl $1, %ecx
919; AVX1-NEXT: leal (%rax,%rcx,4), %eax
920; AVX1-NEXT: vpextrb $3, %xmm0, %ecx
921; AVX1-NEXT: andl $1, %ecx
922; AVX1-NEXT: leal (%rax,%rcx,8), %eax
923; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
924; AVX1-NEXT: andl $1, %ecx
925; AVX1-NEXT: shll $4, %ecx
926; AVX1-NEXT: orl %eax, %ecx
927; AVX1-NEXT: vpextrb $5, %xmm0, %eax
928; AVX1-NEXT: andl $1, %eax
929; AVX1-NEXT: shll $5, %eax
930; AVX1-NEXT: orl %ecx, %eax
931; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
932; AVX1-NEXT: andl $1, %ecx
933; AVX1-NEXT: shll $6, %ecx
934; AVX1-NEXT: vpextrb $7, %xmm0, %edx
935; AVX1-NEXT: andl $1, %edx
936; AVX1-NEXT: shll $7, %edx
937; AVX1-NEXT: orl %ecx, %edx
938; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
939; AVX1-NEXT: andl $1, %ecx
940; AVX1-NEXT: shll $8, %ecx
941; AVX1-NEXT: orl %edx, %ecx
942; AVX1-NEXT: vpextrb $9, %xmm0, %edx
943; AVX1-NEXT: andl $1, %edx
944; AVX1-NEXT: shll $9, %edx
945; AVX1-NEXT: orl %ecx, %edx
946; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
947; AVX1-NEXT: andl $1, %ecx
948; AVX1-NEXT: shll $10, %ecx
949; AVX1-NEXT: orl %edx, %ecx
950; AVX1-NEXT: vpextrb $11, %xmm0, %edx
951; AVX1-NEXT: andl $1, %edx
952; AVX1-NEXT: shll $11, %edx
953; AVX1-NEXT: orl %ecx, %edx
954; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
955; AVX1-NEXT: andl $1, %ecx
956; AVX1-NEXT: shll $12, %ecx
957; AVX1-NEXT: orl %edx, %ecx
958; AVX1-NEXT: vpextrb $13, %xmm0, %edx
959; AVX1-NEXT: andl $1, %edx
960; AVX1-NEXT: shll $13, %edx
961; AVX1-NEXT: orl %ecx, %edx
962; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
963; AVX1-NEXT: andl $1, %ecx
964; AVX1-NEXT: shll $14, %ecx
965; AVX1-NEXT: orl %edx, %ecx
966; AVX1-NEXT: vpextrb $15, %xmm0, %edx
967; AVX1-NEXT: andl $1, %edx
968; AVX1-NEXT: shll $15, %edx
969; AVX1-NEXT: orl %ecx, %edx
970; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
971; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
972; AVX1-NEXT: andl $1, %ecx
973; AVX1-NEXT: shll $16, %ecx
974; AVX1-NEXT: orl %edx, %ecx
975; AVX1-NEXT: vpextrb $1, %xmm0, %edx
976; AVX1-NEXT: andl $1, %edx
977; AVX1-NEXT: shll $17, %edx
978; AVX1-NEXT: orl %ecx, %edx
979; AVX1-NEXT: vpextrb $2, %xmm0, %ecx
980; AVX1-NEXT: andl $1, %ecx
981; AVX1-NEXT: shll $18, %ecx
982; AVX1-NEXT: orl %edx, %ecx
983; AVX1-NEXT: vpextrb $3, %xmm0, %edx
984; AVX1-NEXT: andl $1, %edx
985; AVX1-NEXT: shll $19, %edx
986; AVX1-NEXT: orl %ecx, %edx
987; AVX1-NEXT: vpextrb $4, %xmm0, %ecx
988; AVX1-NEXT: andl $1, %ecx
989; AVX1-NEXT: shll $20, %ecx
990; AVX1-NEXT: orl %edx, %ecx
991; AVX1-NEXT: vpextrb $5, %xmm0, %edx
992; AVX1-NEXT: andl $1, %edx
993; AVX1-NEXT: shll $21, %edx
994; AVX1-NEXT: orl %ecx, %edx
995; AVX1-NEXT: vpextrb $6, %xmm0, %ecx
996; AVX1-NEXT: andl $1, %ecx
997; AVX1-NEXT: shll $22, %ecx
998; AVX1-NEXT: orl %edx, %ecx
999; AVX1-NEXT: vpextrb $7, %xmm0, %edx
1000; AVX1-NEXT: andl $1, %edx
1001; AVX1-NEXT: shll $23, %edx
1002; AVX1-NEXT: orl %ecx, %edx
1003; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
1004; AVX1-NEXT: andl $1, %ecx
1005; AVX1-NEXT: shll $24, %ecx
1006; AVX1-NEXT: orl %edx, %ecx
1007; AVX1-NEXT: vpextrb $9, %xmm0, %edx
1008; AVX1-NEXT: andl $1, %edx
1009; AVX1-NEXT: shll $25, %edx
1010; AVX1-NEXT: orl %ecx, %edx
1011; AVX1-NEXT: vpextrb $10, %xmm0, %ecx
1012; AVX1-NEXT: andl $1, %ecx
1013; AVX1-NEXT: shll $26, %ecx
1014; AVX1-NEXT: orl %edx, %ecx
1015; AVX1-NEXT: vpextrb $11, %xmm0, %edx
1016; AVX1-NEXT: andl $1, %edx
1017; AVX1-NEXT: shll $27, %edx
1018; AVX1-NEXT: orl %ecx, %edx
1019; AVX1-NEXT: vpextrb $12, %xmm0, %ecx
1020; AVX1-NEXT: andl $1, %ecx
1021; AVX1-NEXT: shll $28, %ecx
1022; AVX1-NEXT: orl %edx, %ecx
1023; AVX1-NEXT: vpextrb $13, %xmm0, %edx
1024; AVX1-NEXT: andl $1, %edx
1025; AVX1-NEXT: shll $29, %edx
1026; AVX1-NEXT: orl %ecx, %edx
1027; AVX1-NEXT: vpextrb $14, %xmm0, %ecx
1028; AVX1-NEXT: andl $1, %ecx
1029; AVX1-NEXT: shll $30, %ecx
1030; AVX1-NEXT: orl %edx, %ecx
1031; AVX1-NEXT: vpextrb $15, %xmm0, %edx
1032; AVX1-NEXT: shll $31, %edx
1033; AVX1-NEXT: orl %ecx, %edx
1034; AVX1-NEXT: orl %eax, %edx
1035; AVX1-NEXT: movl %edx, {{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +00001036; AVX1-NEXT: movl (%rsp), %ecx
1037; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
1038; AVX1-NEXT: shlq $32, %rax
1039; AVX1-NEXT: orq %rcx, %rax
1040; AVX1-NEXT: movq %rbp, %rsp
1041; AVX1-NEXT: popq %rbp
1042; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001043; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001044;
1045; AVX2-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001046; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +00001047; AVX2-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +00001048; AVX2-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +00001049; AVX2-NEXT: .cfi_offset %rbp, -16
1050; AVX2-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +00001051; AVX2-NEXT: .cfi_def_cfa_register %rbp
1052; AVX2-NEXT: andq $-32, %rsp
1053; AVX2-NEXT: subq $64, %rsp
1054; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1055; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
1056; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1057; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
1058; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
1059; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
Simon Pilgrim11e29692017-09-14 10:30:22 +00001060; AVX2-NEXT: vpextrb $1, %xmm1, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +00001061; AVX2-NEXT: andl $1, %eax
1062; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1063; AVX2-NEXT: andl $1, %ecx
1064; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1065; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1066; AVX2-NEXT: andl $1, %ecx
1067; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1068; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
1069; AVX2-NEXT: andl $1, %ecx
1070; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1071; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1072; AVX2-NEXT: andl $1, %ecx
1073; AVX2-NEXT: shll $4, %ecx
1074; AVX2-NEXT: orl %eax, %ecx
Simon Pilgrim11e29692017-09-14 10:30:22 +00001075; AVX2-NEXT: vpextrb $5, %xmm1, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +00001076; AVX2-NEXT: andl $1, %eax
1077; AVX2-NEXT: shll $5, %eax
1078; AVX2-NEXT: orl %ecx, %eax
1079; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1080; AVX2-NEXT: andl $1, %ecx
1081; AVX2-NEXT: shll $6, %ecx
1082; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1083; AVX2-NEXT: andl $1, %edx
1084; AVX2-NEXT: shll $7, %edx
1085; AVX2-NEXT: orl %ecx, %edx
1086; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1087; AVX2-NEXT: andl $1, %ecx
1088; AVX2-NEXT: shll $8, %ecx
1089; AVX2-NEXT: orl %edx, %ecx
1090; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1091; AVX2-NEXT: andl $1, %edx
1092; AVX2-NEXT: shll $9, %edx
1093; AVX2-NEXT: orl %ecx, %edx
1094; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1095; AVX2-NEXT: andl $1, %ecx
1096; AVX2-NEXT: shll $10, %ecx
1097; AVX2-NEXT: orl %edx, %ecx
1098; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1099; AVX2-NEXT: andl $1, %edx
1100; AVX2-NEXT: shll $11, %edx
1101; AVX2-NEXT: orl %ecx, %edx
1102; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1103; AVX2-NEXT: andl $1, %ecx
1104; AVX2-NEXT: shll $12, %ecx
1105; AVX2-NEXT: orl %edx, %ecx
1106; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1107; AVX2-NEXT: andl $1, %edx
1108; AVX2-NEXT: shll $13, %edx
1109; AVX2-NEXT: orl %ecx, %edx
1110; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1111; AVX2-NEXT: andl $1, %ecx
1112; AVX2-NEXT: shll $14, %ecx
1113; AVX2-NEXT: orl %edx, %ecx
1114; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1115; AVX2-NEXT: andl $1, %edx
1116; AVX2-NEXT: shll $15, %edx
1117; AVX2-NEXT: orl %ecx, %edx
1118; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1119; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
1120; AVX2-NEXT: andl $1, %ecx
1121; AVX2-NEXT: shll $16, %ecx
1122; AVX2-NEXT: orl %edx, %ecx
1123; AVX2-NEXT: vpextrb $1, %xmm1, %edx
1124; AVX2-NEXT: andl $1, %edx
1125; AVX2-NEXT: shll $17, %edx
1126; AVX2-NEXT: orl %ecx, %edx
1127; AVX2-NEXT: vpextrb $2, %xmm1, %ecx
1128; AVX2-NEXT: andl $1, %ecx
1129; AVX2-NEXT: shll $18, %ecx
1130; AVX2-NEXT: orl %edx, %ecx
1131; AVX2-NEXT: vpextrb $3, %xmm1, %edx
1132; AVX2-NEXT: andl $1, %edx
1133; AVX2-NEXT: shll $19, %edx
1134; AVX2-NEXT: orl %ecx, %edx
1135; AVX2-NEXT: vpextrb $4, %xmm1, %ecx
1136; AVX2-NEXT: andl $1, %ecx
1137; AVX2-NEXT: shll $20, %ecx
1138; AVX2-NEXT: orl %edx, %ecx
1139; AVX2-NEXT: vpextrb $5, %xmm1, %edx
1140; AVX2-NEXT: andl $1, %edx
1141; AVX2-NEXT: shll $21, %edx
1142; AVX2-NEXT: orl %ecx, %edx
1143; AVX2-NEXT: vpextrb $6, %xmm1, %ecx
1144; AVX2-NEXT: andl $1, %ecx
1145; AVX2-NEXT: shll $22, %ecx
1146; AVX2-NEXT: orl %edx, %ecx
1147; AVX2-NEXT: vpextrb $7, %xmm1, %edx
1148; AVX2-NEXT: andl $1, %edx
1149; AVX2-NEXT: shll $23, %edx
1150; AVX2-NEXT: orl %ecx, %edx
1151; AVX2-NEXT: vpextrb $8, %xmm1, %ecx
1152; AVX2-NEXT: andl $1, %ecx
1153; AVX2-NEXT: shll $24, %ecx
1154; AVX2-NEXT: orl %edx, %ecx
1155; AVX2-NEXT: vpextrb $9, %xmm1, %edx
1156; AVX2-NEXT: andl $1, %edx
1157; AVX2-NEXT: shll $25, %edx
1158; AVX2-NEXT: orl %ecx, %edx
1159; AVX2-NEXT: vpextrb $10, %xmm1, %ecx
1160; AVX2-NEXT: andl $1, %ecx
1161; AVX2-NEXT: shll $26, %ecx
1162; AVX2-NEXT: orl %edx, %ecx
1163; AVX2-NEXT: vpextrb $11, %xmm1, %edx
1164; AVX2-NEXT: andl $1, %edx
1165; AVX2-NEXT: shll $27, %edx
1166; AVX2-NEXT: orl %ecx, %edx
1167; AVX2-NEXT: vpextrb $12, %xmm1, %ecx
1168; AVX2-NEXT: andl $1, %ecx
1169; AVX2-NEXT: shll $28, %ecx
1170; AVX2-NEXT: orl %edx, %ecx
1171; AVX2-NEXT: vpextrb $13, %xmm1, %edx
1172; AVX2-NEXT: andl $1, %edx
1173; AVX2-NEXT: shll $29, %edx
1174; AVX2-NEXT: orl %ecx, %edx
1175; AVX2-NEXT: vpextrb $14, %xmm1, %ecx
1176; AVX2-NEXT: andl $1, %ecx
1177; AVX2-NEXT: shll $30, %ecx
1178; AVX2-NEXT: orl %edx, %ecx
1179; AVX2-NEXT: vpextrb $15, %xmm1, %edx
1180; AVX2-NEXT: shll $31, %edx
1181; AVX2-NEXT: orl %ecx, %edx
1182; AVX2-NEXT: orl %eax, %edx
1183; AVX2-NEXT: movl %edx, (%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +00001184; AVX2-NEXT: vpextrb $1, %xmm0, %eax
Jonas Paulsson7ad28862018-01-20 16:05:10 +00001185; AVX2-NEXT: andl $1, %eax
1186; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1187; AVX2-NEXT: andl $1, %ecx
1188; AVX2-NEXT: leal (%rcx,%rax,2), %eax
1189; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1190; AVX2-NEXT: andl $1, %ecx
1191; AVX2-NEXT: leal (%rax,%rcx,4), %eax
1192; AVX2-NEXT: vpextrb $3, %xmm0, %ecx
1193; AVX2-NEXT: andl $1, %ecx
1194; AVX2-NEXT: leal (%rax,%rcx,8), %eax
1195; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1196; AVX2-NEXT: andl $1, %ecx
1197; AVX2-NEXT: shll $4, %ecx
1198; AVX2-NEXT: orl %eax, %ecx
1199; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1200; AVX2-NEXT: andl $1, %eax
1201; AVX2-NEXT: shll $5, %eax
1202; AVX2-NEXT: orl %ecx, %eax
1203; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1204; AVX2-NEXT: andl $1, %ecx
1205; AVX2-NEXT: shll $6, %ecx
1206; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1207; AVX2-NEXT: andl $1, %edx
1208; AVX2-NEXT: shll $7, %edx
1209; AVX2-NEXT: orl %ecx, %edx
1210; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1211; AVX2-NEXT: andl $1, %ecx
1212; AVX2-NEXT: shll $8, %ecx
1213; AVX2-NEXT: orl %edx, %ecx
1214; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1215; AVX2-NEXT: andl $1, %edx
1216; AVX2-NEXT: shll $9, %edx
1217; AVX2-NEXT: orl %ecx, %edx
1218; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1219; AVX2-NEXT: andl $1, %ecx
1220; AVX2-NEXT: shll $10, %ecx
1221; AVX2-NEXT: orl %edx, %ecx
1222; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1223; AVX2-NEXT: andl $1, %edx
1224; AVX2-NEXT: shll $11, %edx
1225; AVX2-NEXT: orl %ecx, %edx
1226; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1227; AVX2-NEXT: andl $1, %ecx
1228; AVX2-NEXT: shll $12, %ecx
1229; AVX2-NEXT: orl %edx, %ecx
1230; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1231; AVX2-NEXT: andl $1, %edx
1232; AVX2-NEXT: shll $13, %edx
1233; AVX2-NEXT: orl %ecx, %edx
1234; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1235; AVX2-NEXT: andl $1, %ecx
1236; AVX2-NEXT: shll $14, %ecx
1237; AVX2-NEXT: orl %edx, %ecx
1238; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1239; AVX2-NEXT: andl $1, %edx
1240; AVX2-NEXT: shll $15, %edx
1241; AVX2-NEXT: orl %ecx, %edx
1242; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1243; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
1244; AVX2-NEXT: andl $1, %ecx
1245; AVX2-NEXT: shll $16, %ecx
1246; AVX2-NEXT: orl %edx, %ecx
1247; AVX2-NEXT: vpextrb $1, %xmm0, %edx
1248; AVX2-NEXT: andl $1, %edx
1249; AVX2-NEXT: shll $17, %edx
1250; AVX2-NEXT: orl %ecx, %edx
1251; AVX2-NEXT: vpextrb $2, %xmm0, %ecx
1252; AVX2-NEXT: andl $1, %ecx
1253; AVX2-NEXT: shll $18, %ecx
1254; AVX2-NEXT: orl %edx, %ecx
1255; AVX2-NEXT: vpextrb $3, %xmm0, %edx
1256; AVX2-NEXT: andl $1, %edx
1257; AVX2-NEXT: shll $19, %edx
1258; AVX2-NEXT: orl %ecx, %edx
1259; AVX2-NEXT: vpextrb $4, %xmm0, %ecx
1260; AVX2-NEXT: andl $1, %ecx
1261; AVX2-NEXT: shll $20, %ecx
1262; AVX2-NEXT: orl %edx, %ecx
1263; AVX2-NEXT: vpextrb $5, %xmm0, %edx
1264; AVX2-NEXT: andl $1, %edx
1265; AVX2-NEXT: shll $21, %edx
1266; AVX2-NEXT: orl %ecx, %edx
1267; AVX2-NEXT: vpextrb $6, %xmm0, %ecx
1268; AVX2-NEXT: andl $1, %ecx
1269; AVX2-NEXT: shll $22, %ecx
1270; AVX2-NEXT: orl %edx, %ecx
1271; AVX2-NEXT: vpextrb $7, %xmm0, %edx
1272; AVX2-NEXT: andl $1, %edx
1273; AVX2-NEXT: shll $23, %edx
1274; AVX2-NEXT: orl %ecx, %edx
1275; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
1276; AVX2-NEXT: andl $1, %ecx
1277; AVX2-NEXT: shll $24, %ecx
1278; AVX2-NEXT: orl %edx, %ecx
1279; AVX2-NEXT: vpextrb $9, %xmm0, %edx
1280; AVX2-NEXT: andl $1, %edx
1281; AVX2-NEXT: shll $25, %edx
1282; AVX2-NEXT: orl %ecx, %edx
1283; AVX2-NEXT: vpextrb $10, %xmm0, %ecx
1284; AVX2-NEXT: andl $1, %ecx
1285; AVX2-NEXT: shll $26, %ecx
1286; AVX2-NEXT: orl %edx, %ecx
1287; AVX2-NEXT: vpextrb $11, %xmm0, %edx
1288; AVX2-NEXT: andl $1, %edx
1289; AVX2-NEXT: shll $27, %edx
1290; AVX2-NEXT: orl %ecx, %edx
1291; AVX2-NEXT: vpextrb $12, %xmm0, %ecx
1292; AVX2-NEXT: andl $1, %ecx
1293; AVX2-NEXT: shll $28, %ecx
1294; AVX2-NEXT: orl %edx, %ecx
1295; AVX2-NEXT: vpextrb $13, %xmm0, %edx
1296; AVX2-NEXT: andl $1, %edx
1297; AVX2-NEXT: shll $29, %edx
1298; AVX2-NEXT: orl %ecx, %edx
1299; AVX2-NEXT: vpextrb $14, %xmm0, %ecx
1300; AVX2-NEXT: andl $1, %ecx
1301; AVX2-NEXT: shll $30, %ecx
1302; AVX2-NEXT: orl %edx, %ecx
1303; AVX2-NEXT: vpextrb $15, %xmm0, %edx
1304; AVX2-NEXT: shll $31, %edx
1305; AVX2-NEXT: orl %ecx, %edx
1306; AVX2-NEXT: orl %eax, %edx
1307; AVX2-NEXT: movl %edx, {{[0-9]+}}(%rsp)
Simon Pilgrim11e29692017-09-14 10:30:22 +00001308; AVX2-NEXT: movl (%rsp), %ecx
1309; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1310; AVX2-NEXT: shlq $32, %rax
1311; AVX2-NEXT: orq %rcx, %rax
1312; AVX2-NEXT: movq %rbp, %rsp
1313; AVX2-NEXT: popq %rbp
1314; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001315; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001316;
1317; AVX512F-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001318; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +00001319; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
Craig Topper76adcc82018-01-23 14:25:39 +00001320; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3
1321; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1322; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
1323; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1324; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2
Simon Pilgrim11e29692017-09-14 10:30:22 +00001325; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
Craig Topper76adcc82018-01-23 14:25:39 +00001326; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1327; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1328; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3
1329; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1330; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4
1331; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
1332; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1333; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1334; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +00001335; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
Craig Topper76adcc82018-01-23 14:25:39 +00001336; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
1337; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +00001338; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
Craig Topper76adcc82018-01-23 14:25:39 +00001339; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4}
1340; AVX512F-NEXT: kmovw %k0, %eax
1341; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3}
1342; AVX512F-NEXT: kmovw %k0, %ecx
1343; AVX512F-NEXT: shll $16, %ecx
1344; AVX512F-NEXT: orl %eax, %ecx
1345; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
1346; AVX512F-NEXT: kmovw %k0, %edx
1347; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
1348; AVX512F-NEXT: kmovw %k0, %eax
1349; AVX512F-NEXT: shll $16, %eax
1350; AVX512F-NEXT: orl %edx, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +00001351; AVX512F-NEXT: shlq $32, %rax
1352; AVX512F-NEXT: orq %rcx, %rax
Simon Pilgrim11e29692017-09-14 10:30:22 +00001353; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001354; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001355;
1356; AVX512BW-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001357; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +00001358; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1359; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1360; AVX512BW-NEXT: kmovq %k0, %rax
1361; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001362; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001363 %x0 = icmp sgt <64 x i8> %a, %b
1364 %x1 = icmp sgt <64 x i8> %c, %d
1365 %y = and <64 x i1> %x0, %x1
1366 %res = bitcast <64 x i1> %y to i64
1367 ret i64 %res
1368}