blob: d66486572f81cb5817b285eff6755ffa02a67870 [file] [log] [blame]
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
Craig Topper51f28862017-11-27 18:00:49 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW --check-prefix=AVX512BWNOVL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW --check-prefix=AVX512BWVL
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000010
11;
12; General cases - packing of vector comparison to legal vector result types
13;
14
15define <16 x i8> @vselect_packss_v16i16(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2, <16 x i8> %a3) {
Simon Pilgrimb092bd32017-09-11 14:03:47 +000016; SSE2-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000017; SSE2: # %bb.0:
Simon Pilgrimb092bd32017-09-11 14:03:47 +000018; SSE2-NEXT: pcmpeqw %xmm3, %xmm1
19; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
20; SSE2-NEXT: packsswb %xmm1, %xmm0
21; SSE2-NEXT: pand %xmm0, %xmm4
22; SSE2-NEXT: pandn %xmm5, %xmm0
23; SSE2-NEXT: por %xmm4, %xmm0
24; SSE2-NEXT: retq
25;
26; SSE42-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000027; SSE42: # %bb.0:
Simon Pilgrimb092bd32017-09-11 14:03:47 +000028; SSE42-NEXT: pcmpeqw %xmm3, %xmm1
29; SSE42-NEXT: pcmpeqw %xmm2, %xmm0
30; SSE42-NEXT: packsswb %xmm1, %xmm0
31; SSE42-NEXT: pblendvb %xmm0, %xmm4, %xmm5
32; SSE42-NEXT: movdqa %xmm5, %xmm0
33; SSE42-NEXT: retq
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000034;
35; AVX1-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; AVX1: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000037; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
38; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
39; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4
40; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
41; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
Simon Pilgrimb092bd32017-09-11 14:03:47 +000042; AVX1-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000043; AVX1-NEXT: vzeroupper
44; AVX1-NEXT: retq
45;
46; AVX2-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000047; AVX2: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000048; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
49; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
50; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrimb092bd32017-09-11 14:03:47 +000051; AVX2-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000052; AVX2-NEXT: vzeroupper
53; AVX2-NEXT: retq
54;
Craig Topper51f28862017-11-27 18:00:49 +000055; AVX512NOBW-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000056; AVX512NOBW: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +000057; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
58; AVX512NOBW-NEXT: vpmovsxwd %ymm0, %zmm0
59; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
60; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
61; AVX512NOBW-NEXT: vzeroupper
62; AVX512NOBW-NEXT: retq
63;
64; AVX512BWNOVL-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000065; AVX512BWNOVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +000066; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
67; AVX512BWNOVL-NEXT: vpmovwb %zmm0, %ymm0
68; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
69; AVX512BWNOVL-NEXT: vzeroupper
70; AVX512BWNOVL-NEXT: retq
71;
72; AVX512BWVL-LABEL: vselect_packss_v16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000073; AVX512BWVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +000074; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
75; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0
76; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
77; AVX512BWVL-NEXT: vzeroupper
78; AVX512BWVL-NEXT: retq
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +000079 %1 = icmp eq <16 x i16> %a0, %a1
80 %2 = sext <16 x i1> %1 to <16 x i8>
81 %3 = and <16 x i8> %2, %a2
82 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
83 %5 = and <16 x i8> %4, %a3
84 %6 = or <16 x i8> %3, %5
85 ret <16 x i8> %6
86}
87
88define <16 x i8> @vselect_packss_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a2, <16 x i8> %a3) {
Simon Pilgrim0a12c232017-10-24 15:38:16 +000089; SSE2-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000090; SSE2: # %bb.0:
Simon Pilgrim0a12c232017-10-24 15:38:16 +000091; SSE2-NEXT: pcmpeqd %xmm7, %xmm3
92; SSE2-NEXT: pcmpeqd %xmm6, %xmm2
93; SSE2-NEXT: packssdw %xmm3, %xmm2
94; SSE2-NEXT: pcmpeqd %xmm5, %xmm1
95; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
96; SSE2-NEXT: packssdw %xmm1, %xmm0
97; SSE2-NEXT: packsswb %xmm2, %xmm0
98; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1
99; SSE2-NEXT: pand %xmm0, %xmm1
100; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0
101; SSE2-NEXT: por %xmm1, %xmm0
102; SSE2-NEXT: retq
103;
104; SSE42-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000105; SSE42: # %bb.0:
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000106; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
107; SSE42-NEXT: pcmpeqd %xmm7, %xmm3
108; SSE42-NEXT: pcmpeqd %xmm6, %xmm2
109; SSE42-NEXT: packssdw %xmm3, %xmm2
110; SSE42-NEXT: pcmpeqd %xmm5, %xmm1
111; SSE42-NEXT: pcmpeqd %xmm4, %xmm0
112; SSE42-NEXT: packssdw %xmm1, %xmm0
113; SSE42-NEXT: packsswb %xmm2, %xmm0
114; SSE42-NEXT: pblendvb %xmm0, {{[0-9]+}}(%rsp), %xmm8
115; SSE42-NEXT: movdqa %xmm8, %xmm0
116; SSE42-NEXT: retq
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000117;
118; AVX1-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000119; AVX1: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000120; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
121; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
122; AVX1-NEXT: vpcmpeqd %xmm6, %xmm7, %xmm6
123; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000124; AVX1-NEXT: vpackssdw %xmm6, %xmm1, %xmm1
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000125; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
126; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
127; AVX1-NEXT: vpcmpeqd %xmm3, %xmm6, %xmm3
128; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000129; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000130; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000131; AVX1-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000132; AVX1-NEXT: vzeroupper
133; AVX1-NEXT: retq
134;
135; AVX2-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000136; AVX2: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000137; AVX2-NEXT: vpcmpeqd %ymm3, %ymm1, %ymm1
138; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
139; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
140; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
141; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
142; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
143; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm1
144; AVX2-NEXT: vpandn %xmm5, %xmm0, %xmm0
145; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
146; AVX2-NEXT: vzeroupper
147; AVX2-NEXT: retq
148;
Craig Topper51f28862017-11-27 18:00:49 +0000149; AVX512NOBW-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000150; AVX512NOBW: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000151; AVX512NOBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
152; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
153; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
154; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
155; AVX512NOBW-NEXT: vzeroupper
156; AVX512NOBW-NEXT: retq
157;
158; AVX512BWNOVL-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000159; AVX512BWNOVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000160; AVX512BWNOVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
161; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0
162; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
163; AVX512BWNOVL-NEXT: vzeroupper
164; AVX512BWNOVL-NEXT: retq
165;
166; AVX512BWVL-LABEL: vselect_packss_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000167; AVX512BWVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000168; AVX512BWVL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
169; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0
170; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
171; AVX512BWVL-NEXT: vzeroupper
172; AVX512BWVL-NEXT: retq
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000173 %1 = icmp eq <16 x i32> %a0, %a1
174 %2 = sext <16 x i1> %1 to <16 x i8>
175 %3 = and <16 x i8> %2, %a2
176 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
177 %5 = and <16 x i8> %4, %a3
178 %6 = or <16 x i8> %3, %5
179 ret <16 x i8> %6
180}
181
182define <16 x i8> @vselect_packss_v16i64(<16 x i64> %a0, <16 x i64> %a1, <16 x i8> %a2, <16 x i8> %a3) {
183; SSE2-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000184; SSE2: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000185; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm7
186; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,0,3,2]
187; SSE2-NEXT: pand %xmm7, %xmm8
188; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm6
189; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,0,3,2]
190; SSE2-NEXT: pand %xmm6, %xmm7
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000191; SSE2-NEXT: packssdw %xmm8, %xmm7
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000192; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm5
193; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,0,3,2]
194; SSE2-NEXT: pand %xmm5, %xmm6
195; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm4
196; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,0,3,2]
197; SSE2-NEXT: pand %xmm4, %xmm5
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000198; SSE2-NEXT: packssdw %xmm6, %xmm5
199; SSE2-NEXT: packssdw %xmm7, %xmm5
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000200; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm3
201; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
202; SSE2-NEXT: pand %xmm3, %xmm4
203; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm2
204; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
205; SSE2-NEXT: pand %xmm2, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000206; SSE2-NEXT: packssdw %xmm4, %xmm3
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000207; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm1
208; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
209; SSE2-NEXT: pand %xmm1, %xmm2
210; SSE2-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm0
211; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
212; SSE2-NEXT: pand %xmm0, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000213; SSE2-NEXT: packssdw %xmm2, %xmm1
214; SSE2-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000215; SSE2-NEXT: packsswb %xmm5, %xmm1
216; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0
217; SSE2-NEXT: pand %xmm1, %xmm0
218; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm1
219; SSE2-NEXT: por %xmm0, %xmm1
220; SSE2-NEXT: movdqa %xmm1, %xmm0
221; SSE2-NEXT: retq
222;
223; SSE42-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000224; SSE42: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000225; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm7
226; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm6
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000227; SSE42-NEXT: packssdw %xmm7, %xmm6
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000228; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm5
229; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm4
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000230; SSE42-NEXT: packssdw %xmm5, %xmm4
231; SSE42-NEXT: packssdw %xmm6, %xmm4
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000232; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm3
233; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000234; SSE42-NEXT: packssdw %xmm3, %xmm2
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000235; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm1
236; SSE42-NEXT: pcmpeqq {{[0-9]+}}(%rsp), %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000237; SSE42-NEXT: packssdw %xmm1, %xmm0
238; SSE42-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000239; SSE42-NEXT: packsswb %xmm4, %xmm0
240; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1
241; SSE42-NEXT: pand %xmm0, %xmm1
242; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0
243; SSE42-NEXT: por %xmm1, %xmm0
244; SSE42-NEXT: retq
245;
246; AVX1-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000247; AVX1: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000248; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8
249; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9
250; AVX1-NEXT: vpcmpeqq %xmm8, %xmm9, %xmm8
251; AVX1-NEXT: vpcmpeqq %xmm7, %xmm3, %xmm3
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000252; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm8
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000253; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7
254; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
255; AVX1-NEXT: vpcmpeqq %xmm7, %xmm3, %xmm3
256; AVX1-NEXT: vpcmpeqq %xmm6, %xmm2, %xmm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000257; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
258; AVX1-NEXT: vpackssdw %xmm8, %xmm2, %xmm2
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000259; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3
260; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
261; AVX1-NEXT: vpcmpeqq %xmm3, %xmm6, %xmm3
262; AVX1-NEXT: vpcmpeqq %xmm5, %xmm1, %xmm1
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000263; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000264; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
265; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
266; AVX1-NEXT: vpcmpeqq %xmm3, %xmm5, %xmm3
267; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000268; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
269; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000270; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
271; AVX1-NEXT: vpand {{[0-9]+}}(%rsp), %xmm0, %xmm1
272; AVX1-NEXT: vpandn {{[0-9]+}}(%rsp), %xmm0, %xmm0
273; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
274; AVX1-NEXT: vzeroupper
275; AVX1-NEXT: retq
276;
277; AVX2-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000278; AVX2: # %bb.0:
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000279; AVX2-NEXT: vpcmpeqq %ymm7, %ymm3, %ymm3
280; AVX2-NEXT: vpcmpeqq %ymm6, %ymm2, %ymm2
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000281; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000282; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
283; AVX2-NEXT: vpcmpeqq %ymm5, %ymm1, %ymm1
284; AVX2-NEXT: vpcmpeqq %ymm4, %ymm0, %ymm0
Simon Pilgrim0a12c232017-10-24 15:38:16 +0000285; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000286; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
287; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
288; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
289; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
290; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
291; AVX2-NEXT: vpand {{[0-9]+}}(%rsp), %xmm0, %xmm1
292; AVX2-NEXT: vpandn {{[0-9]+}}(%rsp), %xmm0, %xmm0
293; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
294; AVX2-NEXT: vzeroupper
295; AVX2-NEXT: retq
296;
Craig Topper51f28862017-11-27 18:00:49 +0000297; AVX512NOBW-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000298; AVX512NOBW: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000299; AVX512NOBW-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
300; AVX512NOBW-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
301; AVX512NOBW-NEXT: kunpckbw %k0, %k1, %k1
302; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
303; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
304; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0
305; AVX512NOBW-NEXT: vzeroupper
306; AVX512NOBW-NEXT: retq
307;
308; AVX512BWNOVL-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000309; AVX512BWNOVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000310; AVX512BWNOVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
311; AVX512BWNOVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
312; AVX512BWNOVL-NEXT: kunpckbw %k0, %k1, %k0
313; AVX512BWNOVL-NEXT: vpmovm2b %k0, %zmm0
314; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0
315; AVX512BWNOVL-NEXT: vzeroupper
316; AVX512BWNOVL-NEXT: retq
317;
318; AVX512BWVL-LABEL: vselect_packss_v16i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000319; AVX512BWVL: # %bb.0:
Craig Topper51f28862017-11-27 18:00:49 +0000320; AVX512BWVL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
321; AVX512BWVL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
322; AVX512BWVL-NEXT: kunpckbw %k0, %k1, %k0
323; AVX512BWVL-NEXT: vpmovm2b %k0, %xmm0
324; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm4, %xmm5, %xmm0
325; AVX512BWVL-NEXT: vzeroupper
326; AVX512BWVL-NEXT: retq
Simon Pilgrimd0ff65b2017-09-11 12:18:43 +0000327 %1 = icmp eq <16 x i64> %a0, %a1
328 %2 = sext <16 x i1> %1 to <16 x i8>
329 %3 = and <16 x i8> %2, %a2
330 %4 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
331 %5 = and <16 x i8> %4, %a3
332 %6 = or <16 x i8> %3, %5
333 ret <16 x i8> %6
334}
335
336;
337; PACKSS case
338;
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000339
340define <16 x i8> @vselect_packss(<16 x i16> %a0, <16 x i16> %a1, <16 x i8> %a2, <16 x i8> %a3) {
Simon Pilgrimb092bd32017-09-11 14:03:47 +0000341; SSE2-LABEL: vselect_packss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000342; SSE2: # %bb.0:
Simon Pilgrimb092bd32017-09-11 14:03:47 +0000343; SSE2-NEXT: pcmpeqw %xmm3, %xmm1
344; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
345; SSE2-NEXT: packsswb %xmm1, %xmm0
346; SSE2-NEXT: pand %xmm0, %xmm4
347; SSE2-NEXT: pandn %xmm5, %xmm0
348; SSE2-NEXT: por %xmm4, %xmm0
349; SSE2-NEXT: retq
350;
351; SSE42-LABEL: vselect_packss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000352; SSE42: # %bb.0:
Simon Pilgrimb092bd32017-09-11 14:03:47 +0000353; SSE42-NEXT: pcmpeqw %xmm3, %xmm1
354; SSE42-NEXT: pcmpeqw %xmm2, %xmm0
355; SSE42-NEXT: packsswb %xmm1, %xmm0
356; SSE42-NEXT: pblendvb %xmm0, %xmm4, %xmm5
357; SSE42-NEXT: movdqa %xmm5, %xmm0
358; SSE42-NEXT: retq
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000359;
360; AVX1-LABEL: vselect_packss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000361; AVX1: # %bb.0:
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000362; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
363; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
364; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4
365; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
366; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
Simon Pilgrimb092bd32017-09-11 14:03:47 +0000367; AVX1-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000368; AVX1-NEXT: vzeroupper
369; AVX1-NEXT: retq
370;
371; AVX2-LABEL: vselect_packss:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000372; AVX2: # %bb.0:
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000373; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
374; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
375; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrimb092bd32017-09-11 14:03:47 +0000376; AVX2-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000377; AVX2-NEXT: vzeroupper
378; AVX2-NEXT: retq
379;
Craig Topperc4d2dd82018-01-09 18:14:22 +0000380; AVX512-LABEL: vselect_packss:
381; AVX512: # %bb.0:
382; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
383; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
384; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
385; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0
386; AVX512-NEXT: vzeroupper
387; AVX512-NEXT: retq
Simon Pilgrimf6fa1d02017-09-11 10:50:03 +0000388 %1 = icmp eq <16 x i16> %a0, %a1
389 %2 = sext <16 x i1> %1 to <16 x i16>
390 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
391 %4 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
392 %5 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4)
393 %6 = and <16 x i8> %5, %a2
394 %7 = xor <16 x i8> %5, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
395 %8 = and <16 x i8> %7, %a3
396 %9 = or <16 x i8> %6, %8
397 ret <16 x i8> %9
398}
399declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)