blob: dfda374aa52f3cc2ac4802e1cd56d9d8d2fd4182 [file] [log] [blame]
Simon Pilgrim11e29692017-09-14 10:30:22 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
7
8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9; SSE-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000010; SSE: # %bb.0:
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000011; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +000012; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000013; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +000014; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000015; SSE-NEXT: pcmpgtq %xmm7, %xmm3
16; SSE-NEXT: pcmpgtq %xmm6, %xmm2
17; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
Craig Toppercf772032017-12-14 06:49:07 +000018; SSE-NEXT: pcmpgtq %xmm5, %xmm1
19; SSE-NEXT: pcmpgtq %xmm4, %xmm0
20; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
21; SSE-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000022; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000023; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10
Craig Toppercf772032017-12-14 06:49:07 +000024; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm11[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +000025; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000026; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8
27; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
Craig Toppercf772032017-12-14 06:49:07 +000028; SSE-NEXT: packssdw %xmm10, %xmm8
29; SSE-NEXT: pand %xmm0, %xmm8
Simon Pilgrimbd5d2f02017-10-04 13:12:08 +000030; SSE-NEXT: packsswb %xmm0, %xmm8
31; SSE-NEXT: pmovmskb %xmm8, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000032; SSE-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrima5793822017-10-31 18:43:24 +000033; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000034;
35; AVX1-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000037; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
38; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
39; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
40; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +000041; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000042; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
43; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm9
44; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
45; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
46; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
47; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +000048; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000049; AVX1-NEXT: vpshufb %xmm8, %xmm0, %xmm0
50; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm9[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000051; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
52; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
53; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
54; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000055; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000056; AVX1-NEXT: vpshufb %xmm8, %xmm1, %xmm1
57; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
58; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
59; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
60; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +000061; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +000062; AVX1-NEXT: vpshufb %xmm8, %xmm2, %xmm2
63; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000064; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000065; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000066; AVX1-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000067; AVX1-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000068; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +000069; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000070;
71; AVX2-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000072; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +000073; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
74; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +000075; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000076; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
77; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
78; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
79; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000080; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000081; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
82; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000083; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
84; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +000085; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +000086; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
87; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
88; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
Simon Pilgrim0a12c232017-10-24 15:38:16 +000089; AVX2-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +000090; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
91; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +000092; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +000093; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +000094; AVX2-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000095; AVX2-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +000096; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +000097; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +000098;
99; AVX512F-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000101; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
102; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
103; AVX512F-NEXT: kmovw %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000104; AVX512F-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000105; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000106; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000107;
108; AVX512BW-LABEL: v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000109; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000110; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1
111; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
112; AVX512BW-NEXT: kmovd %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000113; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000114; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000115; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000116 %x0 = icmp sgt <8 x i64> %a, %b
117 %x1 = icmp sgt <8 x i64> %c, %d
118 %y = and <8 x i1> %x0, %x1
119 %res = bitcast <8 x i1> %y to i8
120 ret i8 %res
121}
122
123define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
124; SSE-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000125; SSE: # %bb.0:
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000126; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000127; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000128; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000129; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000130; SSE-NEXT: cmpltpd %xmm3, %xmm7
131; SSE-NEXT: cmpltpd %xmm2, %xmm6
132; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
Craig Toppercf772032017-12-14 06:49:07 +0000133; SSE-NEXT: cmpltpd %xmm1, %xmm5
134; SSE-NEXT: cmpltpd %xmm0, %xmm4
135; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2]
136; SSE-NEXT: packssdw %xmm6, %xmm4
Simon Pilgrim11e29692017-09-14 10:30:22 +0000137; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000138; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
Craig Toppercf772032017-12-14 06:49:07 +0000139; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm11[0,2]
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000140; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000141; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
142; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm9[0,2]
Craig Toppercf772032017-12-14 06:49:07 +0000143; SSE-NEXT: packssdw %xmm10, %xmm8
144; SSE-NEXT: pand %xmm4, %xmm8
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000145; SSE-NEXT: packsswb %xmm0, %xmm8
146; SSE-NEXT: pmovmskb %xmm8, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000147; SSE-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000148; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000149;
150; AVX12-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000151; AVX12: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000152; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
153; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000154; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000155; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
156; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
157; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
158; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000159; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000160; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
161; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000162; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
163; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000164; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000165; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
166; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
167; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000168; AVX12-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000169; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
170; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
Simon Pilgrim11e29692017-09-14 10:30:22 +0000171; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000172; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000173; AVX12-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000174; AVX12-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000175; AVX12-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000176; AVX12-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000177;
178; AVX512F-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000179; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000180; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
181; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
182; AVX512F-NEXT: kmovw %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000183; AVX512F-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000184; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000185; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000186;
187; AVX512BW-LABEL: v8f64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000188; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000189; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
190; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1}
191; AVX512BW-NEXT: kmovd %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000192; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000193; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000194; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000195 %x0 = fcmp ogt <8 x double> %a, %b
196 %x1 = fcmp ogt <8 x double> %c, %d
197 %y = and <8 x i1> %x0, %x1
198 %res = bitcast <8 x i1> %y to i8
199 ret i8 %res
200}
201
202define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) {
203; SSE-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000204; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000205; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000206; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000207; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000208; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
209; SSE-NEXT: pcmpgtw %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000210; SSE-NEXT: pcmpgtw %xmm4, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000211; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000212; SSE-NEXT: pcmpgtw %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000213; SSE-NEXT: pcmpgtw %xmm6, %xmm2
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000214; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000215; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000216; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000217; SSE-NEXT: packsswb %xmm11, %xmm10
218; SSE-NEXT: pand %xmm0, %xmm10
219; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000220; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000221; SSE-NEXT: packsswb %xmm9, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000222; SSE-NEXT: pand %xmm2, %xmm8
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000223; SSE-NEXT: pmovmskb %xmm10, %ecx
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000224; SSE-NEXT: pmovmskb %xmm8, %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000225; SSE-NEXT: shll $16, %eax
226; SSE-NEXT: orl %ecx, %eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000227; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000228;
229; AVX1-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000230; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000231; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
232; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
233; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
234; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
235; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8
236; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
238; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1
239; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0
240; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
241; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
242; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
243; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
244; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2
245; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
246; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1
247; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
248; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
249; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
250; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3
251; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2
252; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
253; AVX1-NEXT: vpmovmskb %xmm0, %ecx
254; AVX1-NEXT: vpmovmskb %xmm1, %eax
255; AVX1-NEXT: shll $16, %eax
256; AVX1-NEXT: orl %ecx, %eax
257; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000258; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000259;
260; AVX2-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000261; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000262; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
263; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
264; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
265; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
266; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
267; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
268; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
269; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
270; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
271; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
272; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
273; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
274; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
275; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
276; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
277; AVX2-NEXT: vpmovmskb %ymm0, %eax
278; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000279; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000280;
281; AVX512F-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000282; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000283; AVX512F-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000284; AVX512F-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +0000285; AVX512F-NEXT: .cfi_offset %rbp, -16
286; AVX512F-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000287; AVX512F-NEXT: .cfi_def_cfa_register %rbp
288; AVX512F-NEXT: andq $-32, %rsp
289; AVX512F-NEXT: subq $32, %rsp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000290; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
291; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
Craig Toppereab2d462017-12-14 08:25:58 +0000292; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
293; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000294; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
Craig Toppereab2d462017-12-14 08:25:58 +0000295; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000296; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
297; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
Craig Toppereab2d462017-12-14 08:25:58 +0000298; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
299; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
300; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
301; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
302; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
303; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000304; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
305; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
306; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
307; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
308; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
309; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
310; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
311; AVX512F-NEXT: kmovw %k0, (%rsp)
312; AVX512F-NEXT: movl (%rsp), %eax
313; AVX512F-NEXT: movq %rbp, %rsp
314; AVX512F-NEXT: popq %rbp
315; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000316; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000317;
318; AVX512BW-LABEL: v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000319; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000320; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
321; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1}
322; AVX512BW-NEXT: kmovd %k0, %eax
323; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000324; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000325 %x0 = icmp sgt <32 x i16> %a, %b
326 %x1 = icmp sgt <32 x i16> %c, %d
327 %y = and <32 x i1> %x0, %x1
328 %res = bitcast <32 x i1> %y to i32
329 ret i32 %res
330}
331
332define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) {
333; SSE-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000334; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000335; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000336; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000337; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000338; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
339; SSE-NEXT: pcmpgtd %xmm7, %xmm3
Simon Pilgrim11e29692017-09-14 10:30:22 +0000340; SSE-NEXT: pcmpgtd %xmm6, %xmm2
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000341; SSE-NEXT: packssdw %xmm3, %xmm2
Simon Pilgrim11e29692017-09-14 10:30:22 +0000342; SSE-NEXT: pcmpgtd %xmm5, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000343; SSE-NEXT: pcmpgtd %xmm4, %xmm0
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000344; SSE-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrimf5f291d2017-10-03 12:01:31 +0000345; SSE-NEXT: packsswb %xmm2, %xmm0
Simon Pilgrim11e29692017-09-14 10:30:22 +0000346; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000347; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000348; SSE-NEXT: packssdw %xmm11, %xmm10
349; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000350; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimb47b3f22017-10-04 17:31:28 +0000351; SSE-NEXT: packssdw %xmm9, %xmm8
352; SSE-NEXT: packsswb %xmm10, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000353; SSE-NEXT: pand %xmm0, %xmm8
354; SSE-NEXT: pmovmskb %xmm8, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000355; SSE-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000356; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000357;
358; AVX1-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000359; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000360; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
361; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
362; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8
363; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000364; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000365; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
367; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1
368; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000369; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
370; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000371; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
372; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
373; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
374; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000375; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000376; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
377; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
378; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
379; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000380; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
381; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000382; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
383; AVX1-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000384; AVX1-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000385; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000386; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000387;
388; AVX2-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000389; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000390; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1
391; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000392; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000393; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
394; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000395; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
396; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000397; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1
398; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000399; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000400; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000401; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
402; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
403; AVX2-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000404; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
405; AVX2-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000406; AVX2-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000407; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000408; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000409;
410; AVX512F-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000411; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000412; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
413; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
414; AVX512F-NEXT: kmovw %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000415; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000416; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000417; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000418;
419; AVX512BW-LABEL: v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000420; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000421; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
422; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1}
423; AVX512BW-NEXT: kmovd %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000424; AVX512BW-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000425; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000426; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000427 %x0 = icmp sgt <16 x i32> %a, %b
428 %x1 = icmp sgt <16 x i32> %c, %d
429 %y = and <16 x i1> %x0, %x1
430 %res = bitcast <16 x i1> %y to i16
431 ret i16 %res
432}
433
434define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
435; SSE-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000436; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000437; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrim11e29692017-09-14 10:30:22 +0000438; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000439; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrim11e29692017-09-14 10:30:22 +0000440; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
441; SSE-NEXT: cmpltps %xmm3, %xmm7
Simon Pilgrim11e29692017-09-14 10:30:22 +0000442; SSE-NEXT: cmpltps %xmm2, %xmm6
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000443; SSE-NEXT: packssdw %xmm7, %xmm6
Simon Pilgrim11e29692017-09-14 10:30:22 +0000444; SSE-NEXT: cmpltps %xmm1, %xmm5
Simon Pilgrim11e29692017-09-14 10:30:22 +0000445; SSE-NEXT: cmpltps %xmm0, %xmm4
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000446; SSE-NEXT: packssdw %xmm5, %xmm4
447; SSE-NEXT: packsswb %xmm6, %xmm4
Simon Pilgrim11e29692017-09-14 10:30:22 +0000448; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
Simon Pilgrim11e29692017-09-14 10:30:22 +0000449; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000450; SSE-NEXT: packssdw %xmm11, %xmm10
451; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
Simon Pilgrim11e29692017-09-14 10:30:22 +0000452; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000453; SSE-NEXT: packssdw %xmm9, %xmm8
454; SSE-NEXT: packsswb %xmm10, %xmm8
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000455; SSE-NEXT: pand %xmm4, %xmm8
456; SSE-NEXT: pmovmskb %xmm8, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000457; SSE-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrima5793822017-10-31 18:43:24 +0000458; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000459;
460; AVX12-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000461; AVX12: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000462; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
463; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000464; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000465; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
466; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000467; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000468; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000469; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
470; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000471; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
Simon Pilgrim0b21ef12017-09-18 16:45:05 +0000472; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
Simon Pilgrimd09c1ac2017-10-28 14:27:53 +0000473; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3
474; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
475; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
Simon Pilgrim11e29692017-09-14 10:30:22 +0000476; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
477; AVX12-NEXT: vpmovmskb %xmm0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000478; AVX12-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000479; AVX12-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000480; AVX12-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000481;
482; AVX512F-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000483; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000484; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
485; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
486; AVX512F-NEXT: kmovw %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000487; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000488; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000489; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000490;
491; AVX512BW-LABEL: v16f32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000492; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000493; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1
494; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1}
495; AVX512BW-NEXT: kmovd %k0, %eax
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000496; AVX512BW-NEXT: # kill: def %ax killed %ax killed %eax
Simon Pilgrim11e29692017-09-14 10:30:22 +0000497; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000498; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000499 %x0 = fcmp ogt <16 x float> %a, %b
500 %x1 = fcmp ogt <16 x float> %c, %d
501 %y = and <16 x i1> %x0, %x1
502 %res = bitcast <16 x i1> %y to i16
503 ret i16 %res
504}
505
506define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) {
507; SSE-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000508; SSE: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000509; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11
510; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10
511; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
512; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
513; SSE-NEXT: pcmpgtb %xmm6, %xmm2
514; SSE-NEXT: pcmpgtb %xmm7, %xmm3
515; SSE-NEXT: pcmpgtb %xmm4, %xmm0
516; SSE-NEXT: pcmpgtb %xmm5, %xmm1
517; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8
518; SSE-NEXT: pand %xmm2, %xmm8
519; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9
520; SSE-NEXT: pand %xmm3, %xmm9
521; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10
522; SSE-NEXT: pand %xmm0, %xmm10
523; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11
524; SSE-NEXT: pand %xmm1, %xmm11
525; SSE-NEXT: pextrb $15, %xmm11, %eax
526; SSE-NEXT: andb $1, %al
527; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
528; SSE-NEXT: pextrb $14, %xmm11, %eax
529; SSE-NEXT: andb $1, %al
530; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
531; SSE-NEXT: pextrb $13, %xmm11, %eax
532; SSE-NEXT: andb $1, %al
533; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
534; SSE-NEXT: pextrb $12, %xmm11, %eax
535; SSE-NEXT: andb $1, %al
536; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
537; SSE-NEXT: pextrb $11, %xmm11, %eax
538; SSE-NEXT: andb $1, %al
539; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
540; SSE-NEXT: pextrb $10, %xmm11, %eax
541; SSE-NEXT: andb $1, %al
542; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
543; SSE-NEXT: pextrb $9, %xmm11, %eax
544; SSE-NEXT: andb $1, %al
545; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
546; SSE-NEXT: pextrb $8, %xmm11, %eax
547; SSE-NEXT: andb $1, %al
548; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
549; SSE-NEXT: pextrb $7, %xmm11, %eax
550; SSE-NEXT: andb $1, %al
551; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
552; SSE-NEXT: pextrb $6, %xmm11, %eax
553; SSE-NEXT: andb $1, %al
554; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
555; SSE-NEXT: pextrb $5, %xmm11, %eax
556; SSE-NEXT: andb $1, %al
557; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
558; SSE-NEXT: pextrb $4, %xmm11, %eax
559; SSE-NEXT: andb $1, %al
560; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
561; SSE-NEXT: pextrb $3, %xmm11, %eax
562; SSE-NEXT: andb $1, %al
563; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
564; SSE-NEXT: pextrb $2, %xmm11, %eax
565; SSE-NEXT: andb $1, %al
566; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
567; SSE-NEXT: pextrb $1, %xmm11, %eax
568; SSE-NEXT: andb $1, %al
569; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
570; SSE-NEXT: pextrb $0, %xmm11, %eax
571; SSE-NEXT: andb $1, %al
572; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
573; SSE-NEXT: pextrb $15, %xmm10, %eax
574; SSE-NEXT: andb $1, %al
575; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
576; SSE-NEXT: pextrb $14, %xmm10, %eax
577; SSE-NEXT: andb $1, %al
578; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
579; SSE-NEXT: pextrb $13, %xmm10, %eax
580; SSE-NEXT: andb $1, %al
581; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
582; SSE-NEXT: pextrb $12, %xmm10, %eax
583; SSE-NEXT: andb $1, %al
584; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
585; SSE-NEXT: pextrb $11, %xmm10, %eax
586; SSE-NEXT: andb $1, %al
587; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
588; SSE-NEXT: pextrb $10, %xmm10, %eax
589; SSE-NEXT: andb $1, %al
590; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
591; SSE-NEXT: pextrb $9, %xmm10, %eax
592; SSE-NEXT: andb $1, %al
593; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
594; SSE-NEXT: pextrb $8, %xmm10, %eax
595; SSE-NEXT: andb $1, %al
596; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
597; SSE-NEXT: pextrb $7, %xmm10, %eax
598; SSE-NEXT: andb $1, %al
599; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
600; SSE-NEXT: pextrb $6, %xmm10, %eax
601; SSE-NEXT: andb $1, %al
602; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
603; SSE-NEXT: pextrb $5, %xmm10, %eax
604; SSE-NEXT: andb $1, %al
605; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
606; SSE-NEXT: pextrb $4, %xmm10, %eax
607; SSE-NEXT: andb $1, %al
608; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
609; SSE-NEXT: pextrb $3, %xmm10, %eax
610; SSE-NEXT: andb $1, %al
611; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
612; SSE-NEXT: pextrb $2, %xmm10, %eax
613; SSE-NEXT: andb $1, %al
614; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
615; SSE-NEXT: pextrb $1, %xmm10, %eax
616; SSE-NEXT: andb $1, %al
617; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
618; SSE-NEXT: pextrb $0, %xmm10, %eax
619; SSE-NEXT: andb $1, %al
620; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
621; SSE-NEXT: pextrb $15, %xmm9, %eax
622; SSE-NEXT: andb $1, %al
623; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
624; SSE-NEXT: pextrb $14, %xmm9, %eax
625; SSE-NEXT: andb $1, %al
626; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
627; SSE-NEXT: pextrb $13, %xmm9, %eax
628; SSE-NEXT: andb $1, %al
629; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
630; SSE-NEXT: pextrb $12, %xmm9, %eax
631; SSE-NEXT: andb $1, %al
632; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
633; SSE-NEXT: pextrb $11, %xmm9, %eax
634; SSE-NEXT: andb $1, %al
635; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
636; SSE-NEXT: pextrb $10, %xmm9, %eax
637; SSE-NEXT: andb $1, %al
638; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
639; SSE-NEXT: pextrb $9, %xmm9, %eax
640; SSE-NEXT: andb $1, %al
641; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
642; SSE-NEXT: pextrb $8, %xmm9, %eax
643; SSE-NEXT: andb $1, %al
644; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
645; SSE-NEXT: pextrb $7, %xmm9, %eax
646; SSE-NEXT: andb $1, %al
647; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
648; SSE-NEXT: pextrb $6, %xmm9, %eax
649; SSE-NEXT: andb $1, %al
650; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
651; SSE-NEXT: pextrb $5, %xmm9, %eax
652; SSE-NEXT: andb $1, %al
653; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
654; SSE-NEXT: pextrb $4, %xmm9, %eax
655; SSE-NEXT: andb $1, %al
656; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
657; SSE-NEXT: pextrb $3, %xmm9, %eax
658; SSE-NEXT: andb $1, %al
659; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
660; SSE-NEXT: pextrb $2, %xmm9, %eax
661; SSE-NEXT: andb $1, %al
662; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
663; SSE-NEXT: pextrb $1, %xmm9, %eax
664; SSE-NEXT: andb $1, %al
665; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
666; SSE-NEXT: pextrb $0, %xmm9, %eax
667; SSE-NEXT: andb $1, %al
668; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
669; SSE-NEXT: pextrb $15, %xmm8, %eax
670; SSE-NEXT: andb $1, %al
671; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
672; SSE-NEXT: pextrb $14, %xmm8, %eax
673; SSE-NEXT: andb $1, %al
674; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
675; SSE-NEXT: pextrb $13, %xmm8, %eax
676; SSE-NEXT: andb $1, %al
677; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
678; SSE-NEXT: pextrb $12, %xmm8, %eax
679; SSE-NEXT: andb $1, %al
680; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
681; SSE-NEXT: pextrb $11, %xmm8, %eax
682; SSE-NEXT: andb $1, %al
683; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
684; SSE-NEXT: pextrb $10, %xmm8, %eax
685; SSE-NEXT: andb $1, %al
686; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
687; SSE-NEXT: pextrb $9, %xmm8, %eax
688; SSE-NEXT: andb $1, %al
689; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
690; SSE-NEXT: pextrb $8, %xmm8, %eax
691; SSE-NEXT: andb $1, %al
692; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
693; SSE-NEXT: pextrb $7, %xmm8, %eax
694; SSE-NEXT: andb $1, %al
695; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
696; SSE-NEXT: pextrb $6, %xmm8, %eax
697; SSE-NEXT: andb $1, %al
698; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
699; SSE-NEXT: pextrb $5, %xmm8, %eax
700; SSE-NEXT: andb $1, %al
701; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
702; SSE-NEXT: pextrb $4, %xmm8, %eax
703; SSE-NEXT: andb $1, %al
704; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
705; SSE-NEXT: pextrb $3, %xmm8, %eax
706; SSE-NEXT: andb $1, %al
707; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
708; SSE-NEXT: pextrb $2, %xmm8, %eax
709; SSE-NEXT: andb $1, %al
710; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
711; SSE-NEXT: pextrb $1, %xmm8, %eax
712; SSE-NEXT: andb $1, %al
713; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
714; SSE-NEXT: pextrb $0, %xmm8, %eax
715; SSE-NEXT: andb $1, %al
716; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
717; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
718; SSE-NEXT: shll $16, %eax
719; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
720; SSE-NEXT: orl %eax, %ecx
721; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %edx
722; SSE-NEXT: shll $16, %edx
723; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
724; SSE-NEXT: orl %edx, %eax
725; SSE-NEXT: shlq $32, %rax
726; SSE-NEXT: orq %rcx, %rax
Simon Pilgrima5793822017-10-31 18:43:24 +0000727; SSE-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000728;
729; AVX1-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000730; AVX1: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000731; AVX1-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000732; AVX1-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +0000733; AVX1-NEXT: .cfi_offset %rbp, -16
734; AVX1-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000735; AVX1-NEXT: .cfi_def_cfa_register %rbp
736; AVX1-NEXT: andq $-32, %rsp
737; AVX1-NEXT: subq $64, %rsp
738; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
739; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
740; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8
741; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
742; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm1, %ymm8
743; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
744; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
745; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1
746; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0
747; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
748; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
749; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
750; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
751; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm2
752; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
753; AVX1-NEXT: vandps %ymm0, %ymm8, %ymm0
754; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
755; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
756; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
757; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm3
758; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
759; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
760; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
761; AVX1-NEXT: vpextrb $15, %xmm2, %eax
762; AVX1-NEXT: andb $1, %al
763; AVX1-NEXT: movb %al, (%rsp)
764; AVX1-NEXT: vpextrb $14, %xmm2, %eax
765; AVX1-NEXT: andb $1, %al
766; AVX1-NEXT: movb %al, (%rsp)
767; AVX1-NEXT: vpextrb $13, %xmm2, %eax
768; AVX1-NEXT: andb $1, %al
769; AVX1-NEXT: movb %al, (%rsp)
770; AVX1-NEXT: vpextrb $12, %xmm2, %eax
771; AVX1-NEXT: andb $1, %al
772; AVX1-NEXT: movb %al, (%rsp)
773; AVX1-NEXT: vpextrb $11, %xmm2, %eax
774; AVX1-NEXT: andb $1, %al
775; AVX1-NEXT: movb %al, (%rsp)
776; AVX1-NEXT: vpextrb $10, %xmm2, %eax
777; AVX1-NEXT: andb $1, %al
778; AVX1-NEXT: movb %al, (%rsp)
779; AVX1-NEXT: vpextrb $9, %xmm2, %eax
780; AVX1-NEXT: andb $1, %al
781; AVX1-NEXT: movb %al, (%rsp)
782; AVX1-NEXT: vpextrb $8, %xmm2, %eax
783; AVX1-NEXT: andb $1, %al
784; AVX1-NEXT: movb %al, (%rsp)
785; AVX1-NEXT: vpextrb $7, %xmm2, %eax
786; AVX1-NEXT: andb $1, %al
787; AVX1-NEXT: movb %al, (%rsp)
788; AVX1-NEXT: vpextrb $6, %xmm2, %eax
789; AVX1-NEXT: andb $1, %al
790; AVX1-NEXT: movb %al, (%rsp)
791; AVX1-NEXT: vpextrb $5, %xmm2, %eax
792; AVX1-NEXT: andb $1, %al
793; AVX1-NEXT: movb %al, (%rsp)
794; AVX1-NEXT: vpextrb $4, %xmm2, %eax
795; AVX1-NEXT: andb $1, %al
796; AVX1-NEXT: movb %al, (%rsp)
797; AVX1-NEXT: vpextrb $3, %xmm2, %eax
798; AVX1-NEXT: andb $1, %al
799; AVX1-NEXT: movb %al, (%rsp)
800; AVX1-NEXT: vpextrb $2, %xmm2, %eax
801; AVX1-NEXT: andb $1, %al
802; AVX1-NEXT: movb %al, (%rsp)
803; AVX1-NEXT: vpextrb $1, %xmm2, %eax
804; AVX1-NEXT: andb $1, %al
805; AVX1-NEXT: movb %al, (%rsp)
806; AVX1-NEXT: vpextrb $0, %xmm2, %eax
807; AVX1-NEXT: andb $1, %al
808; AVX1-NEXT: movb %al, (%rsp)
809; AVX1-NEXT: vpextrb $15, %xmm1, %eax
810; AVX1-NEXT: andb $1, %al
811; AVX1-NEXT: movb %al, (%rsp)
812; AVX1-NEXT: vpextrb $14, %xmm1, %eax
813; AVX1-NEXT: andb $1, %al
814; AVX1-NEXT: movb %al, (%rsp)
815; AVX1-NEXT: vpextrb $13, %xmm1, %eax
816; AVX1-NEXT: andb $1, %al
817; AVX1-NEXT: movb %al, (%rsp)
818; AVX1-NEXT: vpextrb $12, %xmm1, %eax
819; AVX1-NEXT: andb $1, %al
820; AVX1-NEXT: movb %al, (%rsp)
821; AVX1-NEXT: vpextrb $11, %xmm1, %eax
822; AVX1-NEXT: andb $1, %al
823; AVX1-NEXT: movb %al, (%rsp)
824; AVX1-NEXT: vpextrb $10, %xmm1, %eax
825; AVX1-NEXT: andb $1, %al
826; AVX1-NEXT: movb %al, (%rsp)
827; AVX1-NEXT: vpextrb $9, %xmm1, %eax
828; AVX1-NEXT: andb $1, %al
829; AVX1-NEXT: movb %al, (%rsp)
830; AVX1-NEXT: vpextrb $8, %xmm1, %eax
831; AVX1-NEXT: andb $1, %al
832; AVX1-NEXT: movb %al, (%rsp)
833; AVX1-NEXT: vpextrb $7, %xmm1, %eax
834; AVX1-NEXT: andb $1, %al
835; AVX1-NEXT: movb %al, (%rsp)
836; AVX1-NEXT: vpextrb $6, %xmm1, %eax
837; AVX1-NEXT: andb $1, %al
838; AVX1-NEXT: movb %al, (%rsp)
839; AVX1-NEXT: vpextrb $5, %xmm1, %eax
840; AVX1-NEXT: andb $1, %al
841; AVX1-NEXT: movb %al, (%rsp)
842; AVX1-NEXT: vpextrb $4, %xmm1, %eax
843; AVX1-NEXT: andb $1, %al
844; AVX1-NEXT: movb %al, (%rsp)
845; AVX1-NEXT: vpextrb $3, %xmm1, %eax
846; AVX1-NEXT: andb $1, %al
847; AVX1-NEXT: movb %al, (%rsp)
848; AVX1-NEXT: vpextrb $2, %xmm1, %eax
849; AVX1-NEXT: andb $1, %al
850; AVX1-NEXT: movb %al, (%rsp)
851; AVX1-NEXT: vpextrb $1, %xmm1, %eax
852; AVX1-NEXT: andb $1, %al
853; AVX1-NEXT: movb %al, (%rsp)
854; AVX1-NEXT: vpextrb $0, %xmm1, %eax
855; AVX1-NEXT: andb $1, %al
856; AVX1-NEXT: movb %al, (%rsp)
857; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
858; AVX1-NEXT: vpextrb $15, %xmm1, %eax
859; AVX1-NEXT: andb $1, %al
860; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
861; AVX1-NEXT: vpextrb $14, %xmm1, %eax
862; AVX1-NEXT: andb $1, %al
863; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
864; AVX1-NEXT: vpextrb $13, %xmm1, %eax
865; AVX1-NEXT: andb $1, %al
866; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
867; AVX1-NEXT: vpextrb $12, %xmm1, %eax
868; AVX1-NEXT: andb $1, %al
869; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
870; AVX1-NEXT: vpextrb $11, %xmm1, %eax
871; AVX1-NEXT: andb $1, %al
872; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
873; AVX1-NEXT: vpextrb $10, %xmm1, %eax
874; AVX1-NEXT: andb $1, %al
875; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
876; AVX1-NEXT: vpextrb $9, %xmm1, %eax
877; AVX1-NEXT: andb $1, %al
878; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
879; AVX1-NEXT: vpextrb $8, %xmm1, %eax
880; AVX1-NEXT: andb $1, %al
881; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
882; AVX1-NEXT: vpextrb $7, %xmm1, %eax
883; AVX1-NEXT: andb $1, %al
884; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
885; AVX1-NEXT: vpextrb $6, %xmm1, %eax
886; AVX1-NEXT: andb $1, %al
887; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
888; AVX1-NEXT: vpextrb $5, %xmm1, %eax
889; AVX1-NEXT: andb $1, %al
890; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
891; AVX1-NEXT: vpextrb $4, %xmm1, %eax
892; AVX1-NEXT: andb $1, %al
893; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
894; AVX1-NEXT: vpextrb $3, %xmm1, %eax
895; AVX1-NEXT: andb $1, %al
896; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
897; AVX1-NEXT: vpextrb $2, %xmm1, %eax
898; AVX1-NEXT: andb $1, %al
899; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
900; AVX1-NEXT: vpextrb $1, %xmm1, %eax
901; AVX1-NEXT: andb $1, %al
902; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
903; AVX1-NEXT: vpextrb $0, %xmm1, %eax
904; AVX1-NEXT: andb $1, %al
905; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
906; AVX1-NEXT: vpextrb $15, %xmm0, %eax
907; AVX1-NEXT: andb $1, %al
908; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
909; AVX1-NEXT: vpextrb $14, %xmm0, %eax
910; AVX1-NEXT: andb $1, %al
911; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
912; AVX1-NEXT: vpextrb $13, %xmm0, %eax
913; AVX1-NEXT: andb $1, %al
914; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
915; AVX1-NEXT: vpextrb $12, %xmm0, %eax
916; AVX1-NEXT: andb $1, %al
917; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
918; AVX1-NEXT: vpextrb $11, %xmm0, %eax
919; AVX1-NEXT: andb $1, %al
920; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
921; AVX1-NEXT: vpextrb $10, %xmm0, %eax
922; AVX1-NEXT: andb $1, %al
923; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
924; AVX1-NEXT: vpextrb $9, %xmm0, %eax
925; AVX1-NEXT: andb $1, %al
926; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
927; AVX1-NEXT: vpextrb $8, %xmm0, %eax
928; AVX1-NEXT: andb $1, %al
929; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
930; AVX1-NEXT: vpextrb $7, %xmm0, %eax
931; AVX1-NEXT: andb $1, %al
932; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
933; AVX1-NEXT: vpextrb $6, %xmm0, %eax
934; AVX1-NEXT: andb $1, %al
935; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
936; AVX1-NEXT: vpextrb $5, %xmm0, %eax
937; AVX1-NEXT: andb $1, %al
938; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
939; AVX1-NEXT: vpextrb $4, %xmm0, %eax
940; AVX1-NEXT: andb $1, %al
941; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
942; AVX1-NEXT: vpextrb $3, %xmm0, %eax
943; AVX1-NEXT: andb $1, %al
944; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
945; AVX1-NEXT: vpextrb $2, %xmm0, %eax
946; AVX1-NEXT: andb $1, %al
947; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
948; AVX1-NEXT: vpextrb $1, %xmm0, %eax
949; AVX1-NEXT: andb $1, %al
950; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
951; AVX1-NEXT: vpextrb $0, %xmm0, %eax
952; AVX1-NEXT: andb $1, %al
953; AVX1-NEXT: movb %al, {{[0-9]+}}(%rsp)
954; AVX1-NEXT: movl (%rsp), %ecx
955; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
956; AVX1-NEXT: shlq $32, %rax
957; AVX1-NEXT: orq %rcx, %rax
958; AVX1-NEXT: movq %rbp, %rsp
959; AVX1-NEXT: popq %rbp
960; AVX1-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +0000961; AVX1-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +0000962;
963; AVX2-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000964; AVX2: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +0000965; AVX2-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000966; AVX2-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +0000967; AVX2-NEXT: .cfi_offset %rbp, -16
968; AVX2-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +0000969; AVX2-NEXT: .cfi_def_cfa_register %rbp
970; AVX2-NEXT: andq $-32, %rsp
971; AVX2-NEXT: subq $64, %rsp
972; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
973; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm2
974; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
975; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
976; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm1
977; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
978; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
979; AVX2-NEXT: vpextrb $15, %xmm2, %eax
980; AVX2-NEXT: andb $1, %al
981; AVX2-NEXT: movb %al, (%rsp)
982; AVX2-NEXT: vpextrb $14, %xmm2, %eax
983; AVX2-NEXT: andb $1, %al
984; AVX2-NEXT: movb %al, (%rsp)
985; AVX2-NEXT: vpextrb $13, %xmm2, %eax
986; AVX2-NEXT: andb $1, %al
987; AVX2-NEXT: movb %al, (%rsp)
988; AVX2-NEXT: vpextrb $12, %xmm2, %eax
989; AVX2-NEXT: andb $1, %al
990; AVX2-NEXT: movb %al, (%rsp)
991; AVX2-NEXT: vpextrb $11, %xmm2, %eax
992; AVX2-NEXT: andb $1, %al
993; AVX2-NEXT: movb %al, (%rsp)
994; AVX2-NEXT: vpextrb $10, %xmm2, %eax
995; AVX2-NEXT: andb $1, %al
996; AVX2-NEXT: movb %al, (%rsp)
997; AVX2-NEXT: vpextrb $9, %xmm2, %eax
998; AVX2-NEXT: andb $1, %al
999; AVX2-NEXT: movb %al, (%rsp)
1000; AVX2-NEXT: vpextrb $8, %xmm2, %eax
1001; AVX2-NEXT: andb $1, %al
1002; AVX2-NEXT: movb %al, (%rsp)
1003; AVX2-NEXT: vpextrb $7, %xmm2, %eax
1004; AVX2-NEXT: andb $1, %al
1005; AVX2-NEXT: movb %al, (%rsp)
1006; AVX2-NEXT: vpextrb $6, %xmm2, %eax
1007; AVX2-NEXT: andb $1, %al
1008; AVX2-NEXT: movb %al, (%rsp)
1009; AVX2-NEXT: vpextrb $5, %xmm2, %eax
1010; AVX2-NEXT: andb $1, %al
1011; AVX2-NEXT: movb %al, (%rsp)
1012; AVX2-NEXT: vpextrb $4, %xmm2, %eax
1013; AVX2-NEXT: andb $1, %al
1014; AVX2-NEXT: movb %al, (%rsp)
1015; AVX2-NEXT: vpextrb $3, %xmm2, %eax
1016; AVX2-NEXT: andb $1, %al
1017; AVX2-NEXT: movb %al, (%rsp)
1018; AVX2-NEXT: vpextrb $2, %xmm2, %eax
1019; AVX2-NEXT: andb $1, %al
1020; AVX2-NEXT: movb %al, (%rsp)
1021; AVX2-NEXT: vpextrb $1, %xmm2, %eax
1022; AVX2-NEXT: andb $1, %al
1023; AVX2-NEXT: movb %al, (%rsp)
1024; AVX2-NEXT: vpextrb $0, %xmm2, %eax
1025; AVX2-NEXT: andb $1, %al
1026; AVX2-NEXT: movb %al, (%rsp)
1027; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1028; AVX2-NEXT: andb $1, %al
1029; AVX2-NEXT: movb %al, (%rsp)
1030; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1031; AVX2-NEXT: andb $1, %al
1032; AVX2-NEXT: movb %al, (%rsp)
1033; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1034; AVX2-NEXT: andb $1, %al
1035; AVX2-NEXT: movb %al, (%rsp)
1036; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1037; AVX2-NEXT: andb $1, %al
1038; AVX2-NEXT: movb %al, (%rsp)
1039; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1040; AVX2-NEXT: andb $1, %al
1041; AVX2-NEXT: movb %al, (%rsp)
1042; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1043; AVX2-NEXT: andb $1, %al
1044; AVX2-NEXT: movb %al, (%rsp)
1045; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1046; AVX2-NEXT: andb $1, %al
1047; AVX2-NEXT: movb %al, (%rsp)
1048; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1049; AVX2-NEXT: andb $1, %al
1050; AVX2-NEXT: movb %al, (%rsp)
1051; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1052; AVX2-NEXT: andb $1, %al
1053; AVX2-NEXT: movb %al, (%rsp)
1054; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1055; AVX2-NEXT: andb $1, %al
1056; AVX2-NEXT: movb %al, (%rsp)
1057; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1058; AVX2-NEXT: andb $1, %al
1059; AVX2-NEXT: movb %al, (%rsp)
1060; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1061; AVX2-NEXT: andb $1, %al
1062; AVX2-NEXT: movb %al, (%rsp)
1063; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1064; AVX2-NEXT: andb $1, %al
1065; AVX2-NEXT: movb %al, (%rsp)
1066; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1067; AVX2-NEXT: andb $1, %al
1068; AVX2-NEXT: movb %al, (%rsp)
1069; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1070; AVX2-NEXT: andb $1, %al
1071; AVX2-NEXT: movb %al, (%rsp)
1072; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1073; AVX2-NEXT: andb $1, %al
1074; AVX2-NEXT: movb %al, (%rsp)
1075; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1076; AVX2-NEXT: vpextrb $15, %xmm1, %eax
1077; AVX2-NEXT: andb $1, %al
1078; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1079; AVX2-NEXT: vpextrb $14, %xmm1, %eax
1080; AVX2-NEXT: andb $1, %al
1081; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1082; AVX2-NEXT: vpextrb $13, %xmm1, %eax
1083; AVX2-NEXT: andb $1, %al
1084; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1085; AVX2-NEXT: vpextrb $12, %xmm1, %eax
1086; AVX2-NEXT: andb $1, %al
1087; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1088; AVX2-NEXT: vpextrb $11, %xmm1, %eax
1089; AVX2-NEXT: andb $1, %al
1090; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1091; AVX2-NEXT: vpextrb $10, %xmm1, %eax
1092; AVX2-NEXT: andb $1, %al
1093; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1094; AVX2-NEXT: vpextrb $9, %xmm1, %eax
1095; AVX2-NEXT: andb $1, %al
1096; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1097; AVX2-NEXT: vpextrb $8, %xmm1, %eax
1098; AVX2-NEXT: andb $1, %al
1099; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1100; AVX2-NEXT: vpextrb $7, %xmm1, %eax
1101; AVX2-NEXT: andb $1, %al
1102; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1103; AVX2-NEXT: vpextrb $6, %xmm1, %eax
1104; AVX2-NEXT: andb $1, %al
1105; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1106; AVX2-NEXT: vpextrb $5, %xmm1, %eax
1107; AVX2-NEXT: andb $1, %al
1108; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1109; AVX2-NEXT: vpextrb $4, %xmm1, %eax
1110; AVX2-NEXT: andb $1, %al
1111; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1112; AVX2-NEXT: vpextrb $3, %xmm1, %eax
1113; AVX2-NEXT: andb $1, %al
1114; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1115; AVX2-NEXT: vpextrb $2, %xmm1, %eax
1116; AVX2-NEXT: andb $1, %al
1117; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1118; AVX2-NEXT: vpextrb $1, %xmm1, %eax
1119; AVX2-NEXT: andb $1, %al
1120; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1121; AVX2-NEXT: vpextrb $0, %xmm1, %eax
1122; AVX2-NEXT: andb $1, %al
1123; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1124; AVX2-NEXT: vpextrb $15, %xmm0, %eax
1125; AVX2-NEXT: andb $1, %al
1126; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1127; AVX2-NEXT: vpextrb $14, %xmm0, %eax
1128; AVX2-NEXT: andb $1, %al
1129; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1130; AVX2-NEXT: vpextrb $13, %xmm0, %eax
1131; AVX2-NEXT: andb $1, %al
1132; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1133; AVX2-NEXT: vpextrb $12, %xmm0, %eax
1134; AVX2-NEXT: andb $1, %al
1135; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1136; AVX2-NEXT: vpextrb $11, %xmm0, %eax
1137; AVX2-NEXT: andb $1, %al
1138; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1139; AVX2-NEXT: vpextrb $10, %xmm0, %eax
1140; AVX2-NEXT: andb $1, %al
1141; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1142; AVX2-NEXT: vpextrb $9, %xmm0, %eax
1143; AVX2-NEXT: andb $1, %al
1144; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1145; AVX2-NEXT: vpextrb $8, %xmm0, %eax
1146; AVX2-NEXT: andb $1, %al
1147; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1148; AVX2-NEXT: vpextrb $7, %xmm0, %eax
1149; AVX2-NEXT: andb $1, %al
1150; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1151; AVX2-NEXT: vpextrb $6, %xmm0, %eax
1152; AVX2-NEXT: andb $1, %al
1153; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1154; AVX2-NEXT: vpextrb $5, %xmm0, %eax
1155; AVX2-NEXT: andb $1, %al
1156; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1157; AVX2-NEXT: vpextrb $4, %xmm0, %eax
1158; AVX2-NEXT: andb $1, %al
1159; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1160; AVX2-NEXT: vpextrb $3, %xmm0, %eax
1161; AVX2-NEXT: andb $1, %al
1162; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1163; AVX2-NEXT: vpextrb $2, %xmm0, %eax
1164; AVX2-NEXT: andb $1, %al
1165; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1166; AVX2-NEXT: vpextrb $1, %xmm0, %eax
1167; AVX2-NEXT: andb $1, %al
1168; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1169; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1170; AVX2-NEXT: andb $1, %al
1171; AVX2-NEXT: movb %al, {{[0-9]+}}(%rsp)
1172; AVX2-NEXT: movl (%rsp), %ecx
1173; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
1174; AVX2-NEXT: shlq $32, %rax
1175; AVX2-NEXT: orq %rcx, %rax
1176; AVX2-NEXT: movq %rbp, %rsp
1177; AVX2-NEXT: popq %rbp
1178; AVX2-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001179; AVX2-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001180;
1181; AVX512F-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001182; AVX512F: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +00001183; AVX512F-NEXT: pushq %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +00001184; AVX512F-NEXT: .cfi_def_cfa_offset 16
Simon Pilgrim11e29692017-09-14 10:30:22 +00001185; AVX512F-NEXT: .cfi_offset %rbp, -16
1186; AVX512F-NEXT: movq %rsp, %rbp
Simon Pilgrim11e29692017-09-14 10:30:22 +00001187; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1188; AVX512F-NEXT: andq $-32, %rsp
1189; AVX512F-NEXT: subq $64, %rsp
1190; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
1191; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
1192; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
1193; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
1194; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
1195; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
1196; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1197; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1198; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
1199; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1200; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1201; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1202; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1203; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1204; AVX512F-NEXT: kmovw %k0, (%rsp)
1205; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
1206; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
1207; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1208; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1209; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1210; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
1211; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
1212; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
1213; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1214; AVX512F-NEXT: movl (%rsp), %ecx
1215; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
1216; AVX512F-NEXT: shlq $32, %rax
1217; AVX512F-NEXT: orq %rcx, %rax
1218; AVX512F-NEXT: movq %rbp, %rsp
1219; AVX512F-NEXT: popq %rbp
1220; AVX512F-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001221; AVX512F-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001222;
1223; AVX512BW-LABEL: v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +00001224; AVX512BW: # %bb.0:
Simon Pilgrim11e29692017-09-14 10:30:22 +00001225; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
1226; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1}
1227; AVX512BW-NEXT: kmovq %k0, %rax
1228; AVX512BW-NEXT: vzeroupper
Simon Pilgrima5793822017-10-31 18:43:24 +00001229; AVX512BW-NEXT: retq
Simon Pilgrim11e29692017-09-14 10:30:22 +00001230 %x0 = icmp sgt <64 x i8> %a, %b
1231 %x1 = icmp sgt <64 x i8> %c, %d
1232 %y = and <64 x i1> %x0, %x1
1233 %res = bitcast <64 x i1> %y to i64
1234 ret i64 %res
1235}