blob: b120f5955c130a2b153ab486ae9608ae808b5c84 [file] [log] [blame]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
12
13;
14; PACKUS saturation truncation to vXi32
15;
16
17define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
18; SSE2-LABEL: trunc_packus_v4i64_v4i32:
19; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000020; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000021; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
22; SSE2-NEXT: movdqa %xmm0, %xmm3
23; SSE2-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000024; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
25; SSE2-NEXT: movdqa %xmm5, %xmm6
26; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000027; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000028; SSE2-NEXT: pcmpeqd %xmm5, %xmm3
29; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
30; SSE2-NEXT: pand %xmm7, %xmm4
31; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
32; SSE2-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000033; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000034; SSE2-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000035; SSE2-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000036; SSE2-NEXT: movdqa %xmm1, %xmm0
37; SSE2-NEXT: pxor %xmm2, %xmm0
38; SSE2-NEXT: movdqa %xmm5, %xmm4
39; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
40; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
41; SSE2-NEXT: pcmpeqd %xmm5, %xmm0
42; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
43; SSE2-NEXT: pand %xmm6, %xmm0
44; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
45; SSE2-NEXT: por %xmm0, %xmm4
46; SSE2-NEXT: pand %xmm4, %xmm1
47; SSE2-NEXT: pandn %xmm8, %xmm4
48; SSE2-NEXT: por %xmm1, %xmm4
49; SSE2-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000050; SSE2-NEXT: pxor %xmm2, %xmm0
51; SSE2-NEXT: movdqa %xmm0, %xmm1
52; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
53; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
54; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000055; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
56; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000057; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +000058; SSE2-NEXT: por %xmm0, %xmm1
59; SSE2-NEXT: pand %xmm4, %xmm1
60; SSE2-NEXT: movdqa %xmm3, %xmm0
61; SSE2-NEXT: pxor %xmm2, %xmm0
62; SSE2-NEXT: movdqa %xmm0, %xmm4
63; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
64; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
65; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
66; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
67; SSE2-NEXT: pand %xmm5, %xmm2
68; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
69; SSE2-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000070; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000071; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000072; SSE2-NEXT: retq
73;
74; SSSE3-LABEL: trunc_packus_v4i64_v4i32:
75; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000076; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000077; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
78; SSSE3-NEXT: movdqa %xmm0, %xmm3
79; SSSE3-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000080; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
81; SSSE3-NEXT: movdqa %xmm5, %xmm6
82; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000083; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000084; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3
85; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
86; SSSE3-NEXT: pand %xmm7, %xmm4
87; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
88; SSSE3-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000089; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000090; SSSE3-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000091; SSSE3-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000092; SSSE3-NEXT: movdqa %xmm1, %xmm0
93; SSSE3-NEXT: pxor %xmm2, %xmm0
94; SSSE3-NEXT: movdqa %xmm5, %xmm4
95; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
96; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
97; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0
98; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
99; SSSE3-NEXT: pand %xmm6, %xmm0
100; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
101; SSSE3-NEXT: por %xmm0, %xmm4
102; SSSE3-NEXT: pand %xmm4, %xmm1
103; SSSE3-NEXT: pandn %xmm8, %xmm4
104; SSSE3-NEXT: por %xmm1, %xmm4
105; SSSE3-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000106; SSSE3-NEXT: pxor %xmm2, %xmm0
107; SSSE3-NEXT: movdqa %xmm0, %xmm1
108; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
109; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
110; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000111; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
112; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000113; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000114; SSSE3-NEXT: por %xmm0, %xmm1
115; SSSE3-NEXT: pand %xmm4, %xmm1
116; SSSE3-NEXT: movdqa %xmm3, %xmm0
117; SSSE3-NEXT: pxor %xmm2, %xmm0
118; SSSE3-NEXT: movdqa %xmm0, %xmm4
119; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
120; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
121; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
122; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
123; SSSE3-NEXT: pand %xmm5, %xmm2
124; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
125; SSSE3-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000126; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000127; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000128; SSSE3-NEXT: retq
129;
130; SSE41-LABEL: trunc_packus_v4i64_v4i32:
131; SSE41: # %bb.0:
132; SSE41-NEXT: movdqa %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000133; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295]
134; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
135; SSE41-NEXT: pxor %xmm8, %xmm0
136; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647]
137; SSE41-NEXT: movdqa %xmm6, %xmm5
138; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
139; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
140; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000141; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000142; SSE41-NEXT: pand %xmm7, %xmm3
143; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000144; SSE41-NEXT: por %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000145; SSE41-NEXT: movapd %xmm4, %xmm5
146; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5
147; SSE41-NEXT: movdqa %xmm1, %xmm0
148; SSE41-NEXT: pxor %xmm8, %xmm0
149; SSE41-NEXT: movdqa %xmm6, %xmm2
150; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000151; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000152; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
153; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
154; SSE41-NEXT: pand %xmm3, %xmm6
155; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
156; SSE41-NEXT: por %xmm6, %xmm0
157; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
158; SSE41-NEXT: xorpd %xmm1, %xmm1
159; SSE41-NEXT: movapd %xmm4, %xmm0
160; SSE41-NEXT: xorpd %xmm8, %xmm0
161; SSE41-NEXT: movapd %xmm0, %xmm2
162; SSE41-NEXT: pcmpgtd %xmm8, %xmm2
163; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
164; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
165; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
166; SSE41-NEXT: pand %xmm3, %xmm6
167; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
168; SSE41-NEXT: por %xmm6, %xmm0
169; SSE41-NEXT: pxor %xmm2, %xmm2
170; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
171; SSE41-NEXT: movapd %xmm5, %xmm0
172; SSE41-NEXT: xorpd %xmm8, %xmm0
173; SSE41-NEXT: movapd %xmm0, %xmm3
174; SSE41-NEXT: pcmpgtd %xmm8, %xmm3
175; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
176; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
177; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
178; SSE41-NEXT: pand %xmm4, %xmm6
179; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
180; SSE41-NEXT: por %xmm6, %xmm0
181; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
182; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
183; SSE41-NEXT: movaps %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000184; SSE41-NEXT: retq
185;
186; AVX1-LABEL: trunc_packus_v4i64_v4i32:
187; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000188; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
190; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4294967295,4294967295]
191; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
192; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
193; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
194; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000195; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
196; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
197; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
198; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
199; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
200; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
201; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
202; AVX1-NEXT: vzeroupper
203; AVX1-NEXT: retq
204;
205; AVX2-SLOW-LABEL: trunc_packus_v4i64_v4i32:
206; AVX2-SLOW: # %bb.0:
207; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
208; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
209; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
210; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
211; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
212; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0
213; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
214; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000215; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000216; AVX2-SLOW-NEXT: vzeroupper
217; AVX2-SLOW-NEXT: retq
218;
219; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i32:
220; AVX2-FAST: # %bb.0:
221; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
222; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
223; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
224; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
225; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
226; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
227; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
228; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000229; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000230; AVX2-FAST-NEXT: vzeroupper
231; AVX2-FAST-NEXT: retq
232;
233; AVX512F-LABEL: trunc_packus_v4i64_v4i32:
234; AVX512F: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000235; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000236; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
237; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
238; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
239; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
240; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000241; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000242; AVX512F-NEXT: vzeroupper
243; AVX512F-NEXT: retq
244;
245; AVX512VL-LABEL: trunc_packus_v4i64_v4i32:
246; AVX512VL: # %bb.0:
247; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
248; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
249; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
250; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
251; AVX512VL-NEXT: vzeroupper
252; AVX512VL-NEXT: retq
253;
254; AVX512BW-LABEL: trunc_packus_v4i64_v4i32:
255; AVX512BW: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000256; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000257; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
258; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
259; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
260; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
261; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000262; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000263; AVX512BW-NEXT: vzeroupper
264; AVX512BW-NEXT: retq
265;
266; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i32:
267; AVX512BWVL: # %bb.0:
268; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
269; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
270; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
271; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
272; AVX512BWVL-NEXT: vzeroupper
273; AVX512BWVL-NEXT: retq
274 %1 = icmp slt <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
275 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
276 %3 = icmp sgt <4 x i64> %2, zeroinitializer
277 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer
278 %5 = trunc <4 x i64> %4 to <4 x i32>
279 ret <4 x i32> %5
280}
281
282
283define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64> %a0) {
284; SSE2-LABEL: trunc_packus_v8i64_v8i32:
285; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000286; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
287; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
288; SSE2-NEXT: movdqa %xmm0, %xmm5
289; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000290; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
291; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000292; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
293; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
294; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000295; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
296; SSE2-NEXT: pand %xmm7, %xmm4
297; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000298; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000299; SSE2-NEXT: pand %xmm5, %xmm0
300; SSE2-NEXT: pandn %xmm8, %xmm5
301; SSE2-NEXT: por %xmm0, %xmm5
302; SSE2-NEXT: movdqa %xmm1, %xmm0
303; SSE2-NEXT: pxor %xmm10, %xmm0
304; SSE2-NEXT: movdqa %xmm9, %xmm4
305; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
306; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
307; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
308; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
309; SSE2-NEXT: pand %xmm6, %xmm7
310; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
311; SSE2-NEXT: por %xmm7, %xmm0
312; SSE2-NEXT: pand %xmm0, %xmm1
313; SSE2-NEXT: pandn %xmm8, %xmm0
314; SSE2-NEXT: por %xmm1, %xmm0
315; SSE2-NEXT: movdqa %xmm2, %xmm1
316; SSE2-NEXT: pxor %xmm10, %xmm1
317; SSE2-NEXT: movdqa %xmm9, %xmm4
318; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
319; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
320; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000321; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000322; SSE2-NEXT: pand %xmm6, %xmm1
323; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
324; SSE2-NEXT: por %xmm1, %xmm6
325; SSE2-NEXT: pand %xmm6, %xmm2
326; SSE2-NEXT: pandn %xmm8, %xmm6
327; SSE2-NEXT: por %xmm2, %xmm6
328; SSE2-NEXT: movdqa %xmm3, %xmm1
329; SSE2-NEXT: pxor %xmm10, %xmm1
330; SSE2-NEXT: movdqa %xmm9, %xmm2
331; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
332; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
333; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
334; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
335; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000336; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
337; SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000338; SSE2-NEXT: pand %xmm2, %xmm3
339; SSE2-NEXT: pandn %xmm8, %xmm2
340; SSE2-NEXT: por %xmm3, %xmm2
341; SSE2-NEXT: movdqa %xmm2, %xmm1
342; SSE2-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000343; SSE2-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000344; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
345; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
346; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
347; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
348; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000349; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000350; SSE2-NEXT: por %xmm1, %xmm3
351; SSE2-NEXT: pand %xmm2, %xmm3
352; SSE2-NEXT: movdqa %xmm6, %xmm1
353; SSE2-NEXT: pxor %xmm10, %xmm1
354; SSE2-NEXT: movdqa %xmm1, %xmm2
355; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
356; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
357; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
358; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
359; SSE2-NEXT: pand %xmm4, %xmm7
360; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
361; SSE2-NEXT: por %xmm7, %xmm1
362; SSE2-NEXT: pand %xmm6, %xmm1
363; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
364; SSE2-NEXT: movdqa %xmm0, %xmm2
365; SSE2-NEXT: pxor %xmm10, %xmm2
366; SSE2-NEXT: movdqa %xmm2, %xmm3
367; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
368; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
369; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
370; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
371; SSE2-NEXT: pand %xmm4, %xmm2
372; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
373; SSE2-NEXT: por %xmm2, %xmm3
374; SSE2-NEXT: pand %xmm0, %xmm3
375; SSE2-NEXT: movdqa %xmm5, %xmm0
376; SSE2-NEXT: pxor %xmm10, %xmm0
377; SSE2-NEXT: movdqa %xmm0, %xmm2
378; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
379; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
380; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
381; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
382; SSE2-NEXT: pand %xmm4, %xmm6
383; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
384; SSE2-NEXT: por %xmm6, %xmm0
385; SSE2-NEXT: pand %xmm5, %xmm0
386; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000387; SSE2-NEXT: retq
388;
389; SSSE3-LABEL: trunc_packus_v8i64_v8i32:
390; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000391; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
392; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
393; SSSE3-NEXT: movdqa %xmm0, %xmm5
394; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000395; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
396; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000397; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
398; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
399; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000400; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
401; SSSE3-NEXT: pand %xmm7, %xmm4
402; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000403; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000404; SSSE3-NEXT: pand %xmm5, %xmm0
405; SSSE3-NEXT: pandn %xmm8, %xmm5
406; SSSE3-NEXT: por %xmm0, %xmm5
407; SSSE3-NEXT: movdqa %xmm1, %xmm0
408; SSSE3-NEXT: pxor %xmm10, %xmm0
409; SSSE3-NEXT: movdqa %xmm9, %xmm4
410; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
411; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
412; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
413; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
414; SSSE3-NEXT: pand %xmm6, %xmm7
415; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
416; SSSE3-NEXT: por %xmm7, %xmm0
417; SSSE3-NEXT: pand %xmm0, %xmm1
418; SSSE3-NEXT: pandn %xmm8, %xmm0
419; SSSE3-NEXT: por %xmm1, %xmm0
420; SSSE3-NEXT: movdqa %xmm2, %xmm1
421; SSSE3-NEXT: pxor %xmm10, %xmm1
422; SSSE3-NEXT: movdqa %xmm9, %xmm4
423; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
424; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
425; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000426; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000427; SSSE3-NEXT: pand %xmm6, %xmm1
428; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
429; SSSE3-NEXT: por %xmm1, %xmm6
430; SSSE3-NEXT: pand %xmm6, %xmm2
431; SSSE3-NEXT: pandn %xmm8, %xmm6
432; SSSE3-NEXT: por %xmm2, %xmm6
433; SSSE3-NEXT: movdqa %xmm3, %xmm1
434; SSSE3-NEXT: pxor %xmm10, %xmm1
435; SSSE3-NEXT: movdqa %xmm9, %xmm2
436; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
437; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
438; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
439; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
440; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000441; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442; SSSE3-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000443; SSSE3-NEXT: pand %xmm2, %xmm3
444; SSSE3-NEXT: pandn %xmm8, %xmm2
445; SSSE3-NEXT: por %xmm3, %xmm2
446; SSSE3-NEXT: movdqa %xmm2, %xmm1
447; SSSE3-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000448; SSSE3-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000449; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
450; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
451; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
452; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
453; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000454; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000455; SSSE3-NEXT: por %xmm1, %xmm3
456; SSSE3-NEXT: pand %xmm2, %xmm3
457; SSSE3-NEXT: movdqa %xmm6, %xmm1
458; SSSE3-NEXT: pxor %xmm10, %xmm1
459; SSSE3-NEXT: movdqa %xmm1, %xmm2
460; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
461; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
462; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
463; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
464; SSSE3-NEXT: pand %xmm4, %xmm7
465; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
466; SSSE3-NEXT: por %xmm7, %xmm1
467; SSSE3-NEXT: pand %xmm6, %xmm1
468; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
469; SSSE3-NEXT: movdqa %xmm0, %xmm2
470; SSSE3-NEXT: pxor %xmm10, %xmm2
471; SSSE3-NEXT: movdqa %xmm2, %xmm3
472; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
473; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
474; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
475; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
476; SSSE3-NEXT: pand %xmm4, %xmm2
477; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
478; SSSE3-NEXT: por %xmm2, %xmm3
479; SSSE3-NEXT: pand %xmm0, %xmm3
480; SSSE3-NEXT: movdqa %xmm5, %xmm0
481; SSSE3-NEXT: pxor %xmm10, %xmm0
482; SSSE3-NEXT: movdqa %xmm0, %xmm2
483; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
484; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
485; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
486; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
487; SSSE3-NEXT: pand %xmm4, %xmm6
488; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
489; SSSE3-NEXT: por %xmm6, %xmm0
490; SSSE3-NEXT: pand %xmm5, %xmm0
491; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000492; SSSE3-NEXT: retq
493;
494; SSE41-LABEL: trunc_packus_v8i64_v8i32:
495; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000496; SSE41-NEXT: movdqa %xmm0, %xmm4
497; SSE41-NEXT: movapd {{.*#+}} xmm7 = [4294967295,4294967295]
498; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
499; SSE41-NEXT: pxor %xmm10, %xmm0
500; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147483647,2147483647]
501; SSE41-NEXT: movdqa %xmm11, %xmm6
502; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
503; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
504; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
505; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
506; SSE41-NEXT: pand %xmm8, %xmm5
507; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
508; SSE41-NEXT: por %xmm5, %xmm0
509; SSE41-NEXT: movapd %xmm7, %xmm8
510; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm8
511; SSE41-NEXT: movdqa %xmm1, %xmm0
512; SSE41-NEXT: pxor %xmm10, %xmm0
513; SSE41-NEXT: movdqa %xmm11, %xmm4
514; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
515; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000516; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
517; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000518; SSE41-NEXT: pand %xmm5, %xmm6
519; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
520; SSE41-NEXT: por %xmm6, %xmm0
521; SSE41-NEXT: movapd %xmm7, %xmm9
522; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9
523; SSE41-NEXT: movdqa %xmm2, %xmm0
524; SSE41-NEXT: pxor %xmm10, %xmm0
525; SSE41-NEXT: movdqa %xmm11, %xmm1
526; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
527; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
528; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
529; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
530; SSE41-NEXT: pand %xmm4, %xmm5
531; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
532; SSE41-NEXT: por %xmm5, %xmm0
533; SSE41-NEXT: movapd %xmm7, %xmm4
534; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
535; SSE41-NEXT: movdqa %xmm3, %xmm0
536; SSE41-NEXT: pxor %xmm10, %xmm0
537; SSE41-NEXT: movdqa %xmm11, %xmm1
538; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
539; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
540; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
541; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
542; SSE41-NEXT: pand %xmm2, %xmm5
543; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
544; SSE41-NEXT: por %xmm5, %xmm0
545; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7
546; SSE41-NEXT: pxor %xmm2, %xmm2
547; SSE41-NEXT: movapd %xmm7, %xmm0
548; SSE41-NEXT: xorpd %xmm10, %xmm0
549; SSE41-NEXT: movapd %xmm0, %xmm1
550; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
551; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
552; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
553; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
554; SSE41-NEXT: pand %xmm3, %xmm5
555; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
556; SSE41-NEXT: por %xmm5, %xmm0
557; SSE41-NEXT: pxor %xmm3, %xmm3
558; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm3
559; SSE41-NEXT: movapd %xmm4, %xmm0
560; SSE41-NEXT: xorpd %xmm10, %xmm0
561; SSE41-NEXT: movapd %xmm0, %xmm1
562; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
563; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
564; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
565; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
566; SSE41-NEXT: pand %xmm5, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000567; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
568; SSE41-NEXT: por %xmm6, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000569; SSE41-NEXT: pxor %xmm1, %xmm1
570; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
571; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
572; SSE41-NEXT: movapd %xmm9, %xmm0
573; SSE41-NEXT: xorpd %xmm10, %xmm0
574; SSE41-NEXT: movapd %xmm0, %xmm3
575; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
576; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
577; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
578; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
579; SSE41-NEXT: pand %xmm4, %xmm5
580; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
581; SSE41-NEXT: por %xmm5, %xmm0
582; SSE41-NEXT: pxor %xmm3, %xmm3
583; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm3
584; SSE41-NEXT: movapd %xmm8, %xmm0
585; SSE41-NEXT: xorpd %xmm10, %xmm0
586; SSE41-NEXT: movapd %xmm0, %xmm4
587; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
588; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
589; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
590; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
591; SSE41-NEXT: pand %xmm5, %xmm6
592; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
593; SSE41-NEXT: por %xmm6, %xmm0
594; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
595; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
596; SSE41-NEXT: movaps %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000597; SSE41-NEXT: retq
598;
599; AVX1-LABEL: trunc_packus_v8i64_v8i32:
600; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000601; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
602; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
603; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4294967295,4294967295]
604; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
605; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
606; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
607; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
609; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
610; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
611; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
612; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000613; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
614; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
615; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
616; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
617; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
619; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
620; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
621; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
622; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
623; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
624; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
625; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
626; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
627; AVX1-NEXT: retq
628;
629; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i32:
630; AVX2-SLOW: # %bb.0:
631; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
632; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000633; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000634; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
635; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000636; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000637; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
638; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
639; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
640; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000641; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
642; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
643; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
644; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
645; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
646; AVX2-SLOW-NEXT: retq
647;
648; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i32:
649; AVX2-FAST: # %bb.0:
650; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
651; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000652; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000653; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
654; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000655; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000656; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
657; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
658; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
659; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000660; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
661; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
662; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
663; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
664; AVX2-FAST-NEXT: retq
665;
666; AVX512-LABEL: trunc_packus_v8i64_v8i32:
667; AVX512: # %bb.0:
668; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
669; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
670; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
671; AVX512-NEXT: vpmovqd %zmm0, %ymm0
672; AVX512-NEXT: retq
673 %1 = icmp slt <8 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
674 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
675 %3 = icmp sgt <8 x i64> %2, zeroinitializer
676 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
677 %5 = trunc <8 x i64> %4 to <8 x i32>
678 ret <8 x i32> %5
679}
680
681;
682; PACKUS saturation truncation to vXi16
683;
684
685define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) {
686; SSE2-LABEL: trunc_packus_v8i64_v8i16:
687; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000688; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
689; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
690; SSE2-NEXT: movdqa %xmm1, %xmm5
691; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000692; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
693; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000694; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
695; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
696; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000697; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
698; SSE2-NEXT: pand %xmm7, %xmm4
699; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
700; SSE2-NEXT: por %xmm4, %xmm5
701; SSE2-NEXT: pand %xmm5, %xmm1
702; SSE2-NEXT: pandn %xmm8, %xmm5
703; SSE2-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000704; SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000705; SSE2-NEXT: pxor %xmm10, %xmm1
706; SSE2-NEXT: movdqa %xmm9, %xmm4
707; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000708; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000709; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
710; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
711; SSE2-NEXT: pand %xmm6, %xmm7
712; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
713; SSE2-NEXT: por %xmm7, %xmm1
714; SSE2-NEXT: pand %xmm1, %xmm0
715; SSE2-NEXT: pandn %xmm8, %xmm1
716; SSE2-NEXT: por %xmm0, %xmm1
717; SSE2-NEXT: movdqa %xmm3, %xmm0
718; SSE2-NEXT: pxor %xmm10, %xmm0
719; SSE2-NEXT: movdqa %xmm9, %xmm4
720; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
721; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
722; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
723; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
724; SSE2-NEXT: pand %xmm6, %xmm0
725; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
726; SSE2-NEXT: por %xmm0, %xmm6
727; SSE2-NEXT: pand %xmm6, %xmm3
728; SSE2-NEXT: pandn %xmm8, %xmm6
729; SSE2-NEXT: por %xmm3, %xmm6
730; SSE2-NEXT: movdqa %xmm2, %xmm0
731; SSE2-NEXT: pxor %xmm10, %xmm0
732; SSE2-NEXT: movdqa %xmm9, %xmm3
733; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
734; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
735; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
736; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
737; SSE2-NEXT: pand %xmm4, %xmm0
738; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
739; SSE2-NEXT: por %xmm0, %xmm3
740; SSE2-NEXT: pand %xmm3, %xmm2
741; SSE2-NEXT: pandn %xmm8, %xmm3
742; SSE2-NEXT: por %xmm2, %xmm3
743; SSE2-NEXT: movdqa %xmm3, %xmm0
744; SSE2-NEXT: pxor %xmm10, %xmm0
745; SSE2-NEXT: movdqa %xmm0, %xmm2
746; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
747; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
748; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
749; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
750; SSE2-NEXT: pand %xmm4, %xmm7
751; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
752; SSE2-NEXT: por %xmm7, %xmm0
753; SSE2-NEXT: pand %xmm3, %xmm0
754; SSE2-NEXT: movdqa %xmm6, %xmm2
755; SSE2-NEXT: pxor %xmm10, %xmm2
756; SSE2-NEXT: movdqa %xmm2, %xmm3
757; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
758; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
759; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
760; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
761; SSE2-NEXT: pand %xmm4, %xmm7
762; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
763; SSE2-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000764; SSE2-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000765; SSE2-NEXT: movdqa %xmm1, %xmm3
766; SSE2-NEXT: pxor %xmm10, %xmm3
767; SSE2-NEXT: movdqa %xmm3, %xmm4
768; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
769; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
770; SSE2-NEXT: pcmpeqd %xmm10, %xmm3
771; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
772; SSE2-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000773; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000774; SSE2-NEXT: por %xmm3, %xmm4
775; SSE2-NEXT: pand %xmm1, %xmm4
776; SSE2-NEXT: movdqa %xmm5, %xmm1
777; SSE2-NEXT: pxor %xmm10, %xmm1
778; SSE2-NEXT: movdqa %xmm1, %xmm3
779; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
780; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
781; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
782; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
783; SSE2-NEXT: pand %xmm6, %xmm1
784; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
785; SSE2-NEXT: por %xmm1, %xmm3
786; SSE2-NEXT: pand %xmm5, %xmm3
787; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000788; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000789; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
790; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
791; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
792; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
793; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
794; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000795; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000796; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
797; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000798; SSE2-NEXT: retq
799;
800; SSSE3-LABEL: trunc_packus_v8i64_v8i16:
801; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000802; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
803; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
804; SSSE3-NEXT: movdqa %xmm1, %xmm5
805; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000806; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
807; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000808; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
809; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
810; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000811; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
812; SSSE3-NEXT: pand %xmm7, %xmm4
813; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
814; SSSE3-NEXT: por %xmm4, %xmm5
815; SSSE3-NEXT: pand %xmm5, %xmm1
816; SSSE3-NEXT: pandn %xmm8, %xmm5
817; SSSE3-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000818; SSSE3-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000819; SSSE3-NEXT: pxor %xmm10, %xmm1
820; SSSE3-NEXT: movdqa %xmm9, %xmm4
821; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000822; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000823; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
824; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
825; SSSE3-NEXT: pand %xmm6, %xmm7
826; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
827; SSSE3-NEXT: por %xmm7, %xmm1
828; SSSE3-NEXT: pand %xmm1, %xmm0
829; SSSE3-NEXT: pandn %xmm8, %xmm1
830; SSSE3-NEXT: por %xmm0, %xmm1
831; SSSE3-NEXT: movdqa %xmm3, %xmm0
832; SSSE3-NEXT: pxor %xmm10, %xmm0
833; SSSE3-NEXT: movdqa %xmm9, %xmm4
834; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
835; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
836; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
837; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
838; SSSE3-NEXT: pand %xmm6, %xmm0
839; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
840; SSSE3-NEXT: por %xmm0, %xmm6
841; SSSE3-NEXT: pand %xmm6, %xmm3
842; SSSE3-NEXT: pandn %xmm8, %xmm6
843; SSSE3-NEXT: por %xmm3, %xmm6
844; SSSE3-NEXT: movdqa %xmm2, %xmm0
845; SSSE3-NEXT: pxor %xmm10, %xmm0
846; SSSE3-NEXT: movdqa %xmm9, %xmm3
847; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
848; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
849; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
850; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
851; SSSE3-NEXT: pand %xmm4, %xmm0
852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
853; SSSE3-NEXT: por %xmm0, %xmm3
854; SSSE3-NEXT: pand %xmm3, %xmm2
855; SSSE3-NEXT: pandn %xmm8, %xmm3
856; SSSE3-NEXT: por %xmm2, %xmm3
857; SSSE3-NEXT: movdqa %xmm3, %xmm0
858; SSSE3-NEXT: pxor %xmm10, %xmm0
859; SSSE3-NEXT: movdqa %xmm0, %xmm2
860; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
861; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
862; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
863; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
864; SSSE3-NEXT: pand %xmm4, %xmm7
865; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
866; SSSE3-NEXT: por %xmm7, %xmm0
867; SSSE3-NEXT: pand %xmm3, %xmm0
868; SSSE3-NEXT: movdqa %xmm6, %xmm2
869; SSSE3-NEXT: pxor %xmm10, %xmm2
870; SSSE3-NEXT: movdqa %xmm2, %xmm3
871; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
872; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
873; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
874; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
875; SSSE3-NEXT: pand %xmm4, %xmm7
876; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
877; SSSE3-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000878; SSSE3-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000879; SSSE3-NEXT: movdqa %xmm1, %xmm3
880; SSSE3-NEXT: pxor %xmm10, %xmm3
881; SSSE3-NEXT: movdqa %xmm3, %xmm4
882; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
883; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
884; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3
885; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
886; SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000887; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000888; SSSE3-NEXT: por %xmm3, %xmm4
889; SSSE3-NEXT: pand %xmm1, %xmm4
890; SSSE3-NEXT: movdqa %xmm5, %xmm1
891; SSSE3-NEXT: pxor %xmm10, %xmm1
892; SSSE3-NEXT: movdqa %xmm1, %xmm3
893; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
894; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
895; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
896; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
897; SSSE3-NEXT: pand %xmm6, %xmm1
898; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
899; SSSE3-NEXT: por %xmm1, %xmm3
900; SSSE3-NEXT: pand %xmm5, %xmm3
901; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000902; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000903; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
904; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
905; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
906; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
907; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
908; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000909; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000910; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
911; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000912; SSSE3-NEXT: retq
913;
914; SSE41-LABEL: trunc_packus_v8i64_v8i16:
915; SSE41: # %bb.0:
916; SSE41-NEXT: movdqa %xmm0, %xmm8
Simon Pilgrim0be55672018-02-11 10:52:37 +0000917; SSE41-NEXT: movapd {{.*#+}} xmm7 = [65535,65535]
918; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
919; SSE41-NEXT: movdqa %xmm3, %xmm0
920; SSE41-NEXT: pxor %xmm9, %xmm0
921; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147549183,2147549183]
922; SSE41-NEXT: movdqa %xmm10, %xmm6
923; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
924; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm6[0,0,2,2]
925; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
926; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
927; SSE41-NEXT: pand %xmm5, %xmm4
928; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
929; SSE41-NEXT: por %xmm4, %xmm0
930; SSE41-NEXT: movapd %xmm7, %xmm6
931; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm6
932; SSE41-NEXT: movdqa %xmm2, %xmm0
933; SSE41-NEXT: pxor %xmm9, %xmm0
934; SSE41-NEXT: movdqa %xmm10, %xmm3
935; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
936; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
937; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
938; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000939; SSE41-NEXT: pand %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000940; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
941; SSE41-NEXT: por %xmm5, %xmm0
942; SSE41-NEXT: movapd %xmm7, %xmm3
943; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
944; SSE41-NEXT: movdqa %xmm1, %xmm0
945; SSE41-NEXT: pxor %xmm9, %xmm0
946; SSE41-NEXT: movdqa %xmm10, %xmm2
947; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
948; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
949; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
950; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
951; SSE41-NEXT: pand %xmm4, %xmm5
952; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
953; SSE41-NEXT: por %xmm5, %xmm0
954; SSE41-NEXT: movapd %xmm7, %xmm2
955; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
956; SSE41-NEXT: movdqa %xmm8, %xmm0
957; SSE41-NEXT: pxor %xmm9, %xmm0
958; SSE41-NEXT: movdqa %xmm10, %xmm1
959; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
960; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
961; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
962; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
963; SSE41-NEXT: pand %xmm4, %xmm5
964; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
965; SSE41-NEXT: por %xmm5, %xmm0
966; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7
967; SSE41-NEXT: movapd %xmm7, %xmm0
968; SSE41-NEXT: xorpd %xmm9, %xmm0
969; SSE41-NEXT: movapd %xmm0, %xmm1
970; SSE41-NEXT: pcmpgtd %xmm9, %xmm1
971; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
972; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
973; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
974; SSE41-NEXT: pand %xmm4, %xmm5
975; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
976; SSE41-NEXT: por %xmm5, %xmm0
977; SSE41-NEXT: pxor %xmm8, %xmm8
978; SSE41-NEXT: pxor %xmm1, %xmm1
979; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1
980; SSE41-NEXT: movapd %xmm2, %xmm0
981; SSE41-NEXT: xorpd %xmm9, %xmm0
982; SSE41-NEXT: movapd %xmm0, %xmm5
983; SSE41-NEXT: pcmpgtd %xmm9, %xmm5
984; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
985; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
986; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
987; SSE41-NEXT: pand %xmm7, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000988; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
989; SSE41-NEXT: por %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000990; SSE41-NEXT: pxor %xmm7, %xmm7
991; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm7
992; SSE41-NEXT: movapd %xmm3, %xmm0
993; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000994; SSE41-NEXT: movapd %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000995; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
996; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
997; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
998; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
999; SSE41-NEXT: pand %xmm4, %xmm5
1000; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1001; SSE41-NEXT: por %xmm5, %xmm0
1002; SSE41-NEXT: pxor %xmm2, %xmm2
1003; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
1004; SSE41-NEXT: movapd %xmm6, %xmm0
1005; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001006; SSE41-NEXT: movapd %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001007; SSE41-NEXT: pcmpgtd %xmm9, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001008; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001009; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1010; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1011; SSE41-NEXT: pand %xmm4, %xmm5
1012; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1013; SSE41-NEXT: por %xmm5, %xmm0
1014; SSE41-NEXT: pxor %xmm3, %xmm3
1015; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm3
1016; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm8[1,2,3],xmm3[4],xmm8[5,6,7]
1017; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm8[1,2,3],xmm2[4],xmm8[5,6,7]
1018; SSE41-NEXT: packusdw %xmm3, %xmm2
1019; SSE41-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0],xmm8[1,2,3],xmm7[4],xmm8[5,6,7]
1020; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm8[1,2,3],xmm1[4],xmm8[5,6,7]
1021; SSE41-NEXT: packusdw %xmm7, %xmm1
1022; SSE41-NEXT: packusdw %xmm2, %xmm1
1023; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001024; SSE41-NEXT: retq
1025;
1026; AVX1-LABEL: trunc_packus_v8i64_v8i16:
1027; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001028; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [65535,65535,65535,65535]
1029; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1030; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535]
1031; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1032; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1033; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1034; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1035; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1036; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1037; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1038; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1039; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001040; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1041; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm8
1042; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1043; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1044; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1045; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
1046; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm3
1047; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
1048; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1049; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
1050; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1051; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1052; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
1053; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1054; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
1055; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1056; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
1057; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1058; AVX1-NEXT: vzeroupper
1059; AVX1-NEXT: retq
1060;
1061; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i16:
1062; AVX2-SLOW: # %bb.0:
1063; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535]
1064; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001065; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001066; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1067; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001068; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001069; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1070; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
1071; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1072; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001073; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1074; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1075; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1076; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1077; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1078; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1079; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001080; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001081; AVX2-SLOW-NEXT: vzeroupper
1082; AVX2-SLOW-NEXT: retq
1083;
1084; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i16:
1085; AVX2-FAST: # %bb.0:
1086; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535]
1087; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001088; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001089; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1090; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001091; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001092; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1093; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
1094; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1095; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001096; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1097; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
1098; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
1099; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1100; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1101; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001102; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001103; AVX2-FAST-NEXT: vzeroupper
1104; AVX2-FAST-NEXT: retq
1105;
1106; AVX512-LABEL: trunc_packus_v8i64_v8i16:
1107; AVX512: # %bb.0:
1108; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1109; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1110; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1111; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1112; AVX512-NEXT: vzeroupper
1113; AVX512-NEXT: retq
1114 %1 = icmp slt <8 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1115 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1116 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1117 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1118 %5 = trunc <8 x i64> %4 to <8 x i16>
1119 ret <8 x i16> %5
1120}
1121
1122define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) {
1123; SSE2-LABEL: trunc_packus_v8i32_v8i16:
1124; SSE2: # %bb.0:
1125; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1126; SSE2-NEXT: movdqa %xmm2, %xmm3
1127; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001128; SSE2-NEXT: pand %xmm3, %xmm1
1129; SSE2-NEXT: pandn %xmm2, %xmm3
1130; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001131; SSE2-NEXT: movdqa %xmm2, %xmm1
1132; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1133; SSE2-NEXT: pand %xmm1, %xmm0
1134; SSE2-NEXT: pandn %xmm2, %xmm1
1135; SSE2-NEXT: por %xmm0, %xmm1
1136; SSE2-NEXT: pxor %xmm2, %xmm2
1137; SSE2-NEXT: movdqa %xmm1, %xmm0
1138; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1139; SSE2-NEXT: pand %xmm1, %xmm0
1140; SSE2-NEXT: movdqa %xmm3, %xmm1
1141; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1142; SSE2-NEXT: pand %xmm3, %xmm1
1143; SSE2-NEXT: pslld $16, %xmm1
1144; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001145; SSE2-NEXT: pslld $16, %xmm0
1146; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001147; SSE2-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001148; SSE2-NEXT: retq
1149;
1150; SSSE3-LABEL: trunc_packus_v8i32_v8i16:
1151; SSSE3: # %bb.0:
1152; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1153; SSSE3-NEXT: movdqa %xmm2, %xmm3
1154; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001155; SSSE3-NEXT: pand %xmm3, %xmm1
1156; SSSE3-NEXT: pandn %xmm2, %xmm3
1157; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001158; SSSE3-NEXT: movdqa %xmm2, %xmm1
1159; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
1160; SSSE3-NEXT: pand %xmm1, %xmm0
1161; SSSE3-NEXT: pandn %xmm2, %xmm1
1162; SSSE3-NEXT: por %xmm0, %xmm1
1163; SSSE3-NEXT: pxor %xmm2, %xmm2
1164; SSSE3-NEXT: movdqa %xmm1, %xmm0
1165; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
1166; SSSE3-NEXT: pand %xmm1, %xmm0
1167; SSSE3-NEXT: movdqa %xmm3, %xmm1
1168; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1169; SSSE3-NEXT: pand %xmm3, %xmm1
1170; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1171; SSSE3-NEXT: pshufb %xmm2, %xmm1
1172; SSSE3-NEXT: pshufb %xmm2, %xmm0
1173; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001174; SSSE3-NEXT: retq
1175;
1176; SSE41-LABEL: trunc_packus_v8i32_v8i16:
1177; SSE41: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001178; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001179; SSE41-NEXT: retq
1180;
1181; AVX1-LABEL: trunc_packus_v8i32_v8i16:
1182; AVX1: # %bb.0:
1183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001184; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001185; AVX1-NEXT: vzeroupper
1186; AVX1-NEXT: retq
1187;
1188; AVX2-LABEL: trunc_packus_v8i32_v8i16:
1189; AVX2: # %bb.0:
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001190; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1191; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001192; AVX2-NEXT: vzeroupper
1193; AVX2-NEXT: retq
1194;
1195; AVX512F-LABEL: trunc_packus_v8i32_v8i16:
1196; AVX512F: # %bb.0:
1197; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1198; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1199; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1200; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1201; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001202; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001203; AVX512F-NEXT: vzeroupper
1204; AVX512F-NEXT: retq
1205;
1206; AVX512VL-LABEL: trunc_packus_v8i32_v8i16:
1207; AVX512VL: # %bb.0:
1208; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1209; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1210; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1211; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1212; AVX512VL-NEXT: vzeroupper
1213; AVX512VL-NEXT: retq
1214;
1215; AVX512BW-LABEL: trunc_packus_v8i32_v8i16:
1216; AVX512BW: # %bb.0:
1217; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1218; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1219; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1220; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1221; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001222; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001223; AVX512BW-NEXT: vzeroupper
1224; AVX512BW-NEXT: retq
1225;
1226; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16:
1227; AVX512BWVL: # %bb.0:
1228; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1229; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1230; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1231; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
1232; AVX512BWVL-NEXT: vzeroupper
1233; AVX512BWVL-NEXT: retq
1234 %1 = icmp slt <8 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1235 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1236 %3 = icmp sgt <8 x i32> %2, zeroinitializer
1237 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1238 %5 = trunc <8 x i32> %4 to <8 x i16>
1239 ret <8 x i16> %5
1240}
1241
1242define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) {
1243; SSE2-LABEL: trunc_packus_v16i32_v16i16:
1244; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001245; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1246; SSE2-NEXT: movdqa %xmm6, %xmm4
1247; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1248; SSE2-NEXT: pand %xmm4, %xmm1
1249; SSE2-NEXT: pandn %xmm6, %xmm4
1250; SSE2-NEXT: por %xmm1, %xmm4
1251; SSE2-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001252; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001253; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001254; SSE2-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001255; SSE2-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001256; SSE2-NEXT: movdqa %xmm6, %xmm0
1257; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
1258; SSE2-NEXT: pand %xmm0, %xmm3
1259; SSE2-NEXT: pandn %xmm6, %xmm0
1260; SSE2-NEXT: por %xmm3, %xmm0
1261; SSE2-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001262; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001263; SSE2-NEXT: pand %xmm3, %xmm2
1264; SSE2-NEXT: pandn %xmm6, %xmm3
1265; SSE2-NEXT: por %xmm2, %xmm3
1266; SSE2-NEXT: pxor %xmm2, %xmm2
1267; SSE2-NEXT: movdqa %xmm3, %xmm1
1268; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1269; SSE2-NEXT: pand %xmm3, %xmm1
1270; SSE2-NEXT: movdqa %xmm0, %xmm3
1271; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1272; SSE2-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001273; SSE2-NEXT: movdqa %xmm5, %xmm0
1274; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001275; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001276; SSE2-NEXT: movdqa %xmm4, %xmm5
1277; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1278; SSE2-NEXT: pand %xmm4, %xmm5
1279; SSE2-NEXT: pslld $16, %xmm5
1280; SSE2-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001281; SSE2-NEXT: pslld $16, %xmm0
1282; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001283; SSE2-NEXT: packssdw %xmm5, %xmm0
1284; SSE2-NEXT: pslld $16, %xmm3
1285; SSE2-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001286; SSE2-NEXT: pslld $16, %xmm1
1287; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001288; SSE2-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001289; SSE2-NEXT: retq
1290;
1291; SSSE3-LABEL: trunc_packus_v16i32_v16i16:
1292; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001293; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1294; SSSE3-NEXT: movdqa %xmm6, %xmm4
1295; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
1296; SSSE3-NEXT: pand %xmm4, %xmm1
1297; SSSE3-NEXT: pandn %xmm6, %xmm4
1298; SSSE3-NEXT: por %xmm1, %xmm4
1299; SSSE3-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001300; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001301; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001302; SSSE3-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001303; SSSE3-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001304; SSSE3-NEXT: movdqa %xmm6, %xmm0
1305; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
1306; SSSE3-NEXT: pand %xmm0, %xmm3
1307; SSSE3-NEXT: pandn %xmm6, %xmm0
1308; SSSE3-NEXT: por %xmm3, %xmm0
1309; SSSE3-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001310; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001311; SSSE3-NEXT: pand %xmm3, %xmm2
1312; SSSE3-NEXT: pandn %xmm6, %xmm3
1313; SSSE3-NEXT: por %xmm2, %xmm3
1314; SSSE3-NEXT: pxor %xmm2, %xmm2
1315; SSSE3-NEXT: movdqa %xmm3, %xmm1
1316; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1317; SSSE3-NEXT: pand %xmm3, %xmm1
1318; SSSE3-NEXT: movdqa %xmm0, %xmm3
1319; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
1320; SSSE3-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001321; SSSE3-NEXT: movdqa %xmm5, %xmm0
1322; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001323; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001324; SSSE3-NEXT: movdqa %xmm4, %xmm5
1325; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
1326; SSSE3-NEXT: pand %xmm4, %xmm5
1327; SSSE3-NEXT: pslld $16, %xmm5
1328; SSSE3-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001329; SSSE3-NEXT: pslld $16, %xmm0
1330; SSSE3-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001331; SSSE3-NEXT: packssdw %xmm5, %xmm0
1332; SSSE3-NEXT: pslld $16, %xmm3
1333; SSSE3-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001334; SSSE3-NEXT: pslld $16, %xmm1
1335; SSSE3-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001336; SSSE3-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001337; SSSE3-NEXT: retq
1338;
1339; SSE41-LABEL: trunc_packus_v16i32_v16i16:
1340; SSE41: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001341; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001342; SSE41-NEXT: packusdw %xmm3, %xmm2
1343; SSE41-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001344; SSE41-NEXT: retq
1345;
1346; AVX1-LABEL: trunc_packus_v16i32_v16i16:
1347; AVX1: # %bb.0:
1348; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001349; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001351; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1352; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1353; AVX1-NEXT: retq
1354;
1355; AVX2-LABEL: trunc_packus_v16i32_v16i16:
1356; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001357; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
1358; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001359; AVX2-NEXT: retq
1360;
1361; AVX512-LABEL: trunc_packus_v16i32_v16i16:
1362; AVX512: # %bb.0:
1363; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1364; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1365; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1366; AVX512-NEXT: vpmovdw %zmm0, %ymm0
1367; AVX512-NEXT: retq
1368 %1 = icmp slt <16 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1369 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1370 %3 = icmp sgt <16 x i32> %2, zeroinitializer
1371 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
1372 %5 = trunc <16 x i32> %4 to <16 x i16>
1373 ret <16 x i16> %5
1374}
1375
1376;
1377; PACKUS saturation truncation to v16i8
1378;
1379
1380define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) {
1381; SSE2-LABEL: trunc_packus_v8i64_v8i8:
1382; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001383; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1384; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1385; SSE2-NEXT: movdqa %xmm1, %xmm5
1386; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001387; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1388; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001389; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1390; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1391; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001392; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1393; SSE2-NEXT: pand %xmm7, %xmm4
1394; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1395; SSE2-NEXT: por %xmm4, %xmm5
1396; SSE2-NEXT: pand %xmm5, %xmm1
1397; SSE2-NEXT: pandn %xmm8, %xmm5
1398; SSE2-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001399; SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001400; SSE2-NEXT: pxor %xmm10, %xmm1
1401; SSE2-NEXT: movdqa %xmm9, %xmm4
1402; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001403; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001404; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
1405; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1406; SSE2-NEXT: pand %xmm6, %xmm7
1407; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1408; SSE2-NEXT: por %xmm7, %xmm1
1409; SSE2-NEXT: pand %xmm1, %xmm0
1410; SSE2-NEXT: pandn %xmm8, %xmm1
1411; SSE2-NEXT: por %xmm0, %xmm1
1412; SSE2-NEXT: movdqa %xmm3, %xmm0
1413; SSE2-NEXT: pxor %xmm10, %xmm0
1414; SSE2-NEXT: movdqa %xmm9, %xmm4
1415; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1416; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1417; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1418; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1419; SSE2-NEXT: pand %xmm6, %xmm0
1420; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1421; SSE2-NEXT: por %xmm0, %xmm6
1422; SSE2-NEXT: pand %xmm6, %xmm3
1423; SSE2-NEXT: pandn %xmm8, %xmm6
1424; SSE2-NEXT: por %xmm3, %xmm6
1425; SSE2-NEXT: movdqa %xmm2, %xmm0
1426; SSE2-NEXT: pxor %xmm10, %xmm0
1427; SSE2-NEXT: movdqa %xmm9, %xmm3
1428; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1429; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1430; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1431; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1432; SSE2-NEXT: pand %xmm4, %xmm0
1433; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1434; SSE2-NEXT: por %xmm0, %xmm3
1435; SSE2-NEXT: pand %xmm3, %xmm2
1436; SSE2-NEXT: pandn %xmm8, %xmm3
1437; SSE2-NEXT: por %xmm2, %xmm3
1438; SSE2-NEXT: movdqa %xmm3, %xmm0
1439; SSE2-NEXT: pxor %xmm10, %xmm0
1440; SSE2-NEXT: movdqa %xmm0, %xmm2
1441; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
1442; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1443; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
1444; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1445; SSE2-NEXT: pand %xmm4, %xmm7
1446; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1447; SSE2-NEXT: por %xmm7, %xmm0
1448; SSE2-NEXT: pand %xmm3, %xmm0
1449; SSE2-NEXT: movdqa %xmm6, %xmm2
1450; SSE2-NEXT: pxor %xmm10, %xmm2
1451; SSE2-NEXT: movdqa %xmm2, %xmm3
1452; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
1453; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1454; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
1455; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1456; SSE2-NEXT: pand %xmm4, %xmm7
1457; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1458; SSE2-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001459; SSE2-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001460; SSE2-NEXT: movdqa %xmm1, %xmm3
1461; SSE2-NEXT: pxor %xmm10, %xmm3
1462; SSE2-NEXT: movdqa %xmm3, %xmm4
1463; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
1464; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1465; SSE2-NEXT: pcmpeqd %xmm10, %xmm3
1466; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1467; SSE2-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001468; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001469; SSE2-NEXT: por %xmm3, %xmm4
1470; SSE2-NEXT: pand %xmm1, %xmm4
1471; SSE2-NEXT: movdqa %xmm5, %xmm1
1472; SSE2-NEXT: pxor %xmm10, %xmm1
1473; SSE2-NEXT: movdqa %xmm1, %xmm3
1474; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
1475; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1476; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
1477; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1478; SSE2-NEXT: pand %xmm6, %xmm1
1479; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1480; SSE2-NEXT: por %xmm1, %xmm3
1481; SSE2-NEXT: pand %xmm5, %xmm3
1482; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001483; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001484; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
1485; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
1486; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1487; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1488; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
1489; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001490; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001491; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1492; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001493; SSE2-NEXT: retq
1494;
1495; SSSE3-LABEL: trunc_packus_v8i64_v8i8:
1496; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001497; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1498; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1499; SSSE3-NEXT: movdqa %xmm1, %xmm5
1500; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001501; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1502; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001503; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
1504; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1505; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001506; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1507; SSSE3-NEXT: pand %xmm7, %xmm4
1508; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1509; SSSE3-NEXT: por %xmm4, %xmm5
1510; SSSE3-NEXT: pand %xmm5, %xmm1
1511; SSSE3-NEXT: pandn %xmm8, %xmm5
1512; SSSE3-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001513; SSSE3-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001514; SSSE3-NEXT: pxor %xmm10, %xmm1
1515; SSSE3-NEXT: movdqa %xmm9, %xmm4
1516; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001517; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001518; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
1519; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1520; SSSE3-NEXT: pand %xmm6, %xmm7
1521; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1522; SSSE3-NEXT: por %xmm7, %xmm1
1523; SSSE3-NEXT: pand %xmm1, %xmm0
1524; SSSE3-NEXT: pandn %xmm8, %xmm1
1525; SSSE3-NEXT: por %xmm0, %xmm1
1526; SSSE3-NEXT: movdqa %xmm3, %xmm0
1527; SSSE3-NEXT: pxor %xmm10, %xmm0
1528; SSSE3-NEXT: movdqa %xmm9, %xmm4
1529; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
1530; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1531; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
1532; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1533; SSSE3-NEXT: pand %xmm6, %xmm0
1534; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1535; SSSE3-NEXT: por %xmm0, %xmm6
1536; SSSE3-NEXT: pand %xmm6, %xmm3
1537; SSSE3-NEXT: pandn %xmm8, %xmm6
1538; SSSE3-NEXT: por %xmm3, %xmm6
1539; SSSE3-NEXT: movdqa %xmm2, %xmm0
1540; SSSE3-NEXT: pxor %xmm10, %xmm0
1541; SSSE3-NEXT: movdqa %xmm9, %xmm3
1542; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
1543; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1544; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
1545; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1546; SSSE3-NEXT: pand %xmm4, %xmm0
1547; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1548; SSSE3-NEXT: por %xmm0, %xmm3
1549; SSSE3-NEXT: pand %xmm3, %xmm2
1550; SSSE3-NEXT: pandn %xmm8, %xmm3
1551; SSSE3-NEXT: por %xmm2, %xmm3
1552; SSSE3-NEXT: movdqa %xmm3, %xmm0
1553; SSSE3-NEXT: pxor %xmm10, %xmm0
1554; SSSE3-NEXT: movdqa %xmm0, %xmm2
1555; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
1556; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1557; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
1558; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1559; SSSE3-NEXT: pand %xmm4, %xmm7
1560; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1561; SSSE3-NEXT: por %xmm7, %xmm0
1562; SSSE3-NEXT: pand %xmm3, %xmm0
1563; SSSE3-NEXT: movdqa %xmm6, %xmm2
1564; SSSE3-NEXT: pxor %xmm10, %xmm2
1565; SSSE3-NEXT: movdqa %xmm2, %xmm3
1566; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
1567; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1568; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
1569; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1570; SSSE3-NEXT: pand %xmm4, %xmm7
1571; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1572; SSSE3-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001573; SSSE3-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001574; SSSE3-NEXT: movdqa %xmm1, %xmm3
1575; SSSE3-NEXT: pxor %xmm10, %xmm3
1576; SSSE3-NEXT: movdqa %xmm3, %xmm4
1577; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
1578; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1579; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3
1580; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1581; SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001582; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001583; SSSE3-NEXT: por %xmm3, %xmm4
1584; SSSE3-NEXT: pand %xmm1, %xmm4
1585; SSSE3-NEXT: movdqa %xmm5, %xmm1
1586; SSSE3-NEXT: pxor %xmm10, %xmm1
1587; SSSE3-NEXT: movdqa %xmm1, %xmm3
1588; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
1589; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1590; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
1591; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1592; SSSE3-NEXT: pand %xmm6, %xmm1
1593; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1594; SSSE3-NEXT: por %xmm1, %xmm3
1595; SSSE3-NEXT: pand %xmm5, %xmm3
1596; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001597; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001598; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
1599; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
1600; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1601; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1602; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
1603; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001604; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001605; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1606; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001607; SSSE3-NEXT: retq
1608;
1609; SSE41-LABEL: trunc_packus_v8i64_v8i8:
1610; SSE41: # %bb.0:
1611; SSE41-NEXT: movdqa %xmm0, %xmm8
Simon Pilgrim0be55672018-02-11 10:52:37 +00001612; SSE41-NEXT: movapd {{.*#+}} xmm7 = [255,255]
1613; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
1614; SSE41-NEXT: movdqa %xmm3, %xmm0
1615; SSE41-NEXT: pxor %xmm9, %xmm0
1616; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483903,2147483903]
1617; SSE41-NEXT: movdqa %xmm10, %xmm6
1618; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
1619; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm6[0,0,2,2]
1620; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1621; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1622; SSE41-NEXT: pand %xmm5, %xmm4
1623; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
1624; SSE41-NEXT: por %xmm4, %xmm0
1625; SSE41-NEXT: movapd %xmm7, %xmm6
1626; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm6
1627; SSE41-NEXT: movdqa %xmm2, %xmm0
1628; SSE41-NEXT: pxor %xmm9, %xmm0
1629; SSE41-NEXT: movdqa %xmm10, %xmm3
1630; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1631; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1632; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1633; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001634; SSE41-NEXT: pand %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001635; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1636; SSE41-NEXT: por %xmm5, %xmm0
1637; SSE41-NEXT: movapd %xmm7, %xmm3
1638; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1639; SSE41-NEXT: movdqa %xmm1, %xmm0
1640; SSE41-NEXT: pxor %xmm9, %xmm0
1641; SSE41-NEXT: movdqa %xmm10, %xmm2
1642; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
1643; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1644; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1645; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1646; SSE41-NEXT: pand %xmm4, %xmm5
1647; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1648; SSE41-NEXT: por %xmm5, %xmm0
1649; SSE41-NEXT: movapd %xmm7, %xmm2
1650; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
1651; SSE41-NEXT: movdqa %xmm8, %xmm0
1652; SSE41-NEXT: pxor %xmm9, %xmm0
1653; SSE41-NEXT: movdqa %xmm10, %xmm1
1654; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
1655; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
1656; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1657; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1658; SSE41-NEXT: pand %xmm4, %xmm5
1659; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1660; SSE41-NEXT: por %xmm5, %xmm0
1661; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7
1662; SSE41-NEXT: movapd %xmm7, %xmm0
1663; SSE41-NEXT: xorpd %xmm9, %xmm0
1664; SSE41-NEXT: movapd %xmm0, %xmm1
1665; SSE41-NEXT: pcmpgtd %xmm9, %xmm1
1666; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
1667; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1668; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1669; SSE41-NEXT: pand %xmm4, %xmm5
1670; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1671; SSE41-NEXT: por %xmm5, %xmm0
1672; SSE41-NEXT: pxor %xmm8, %xmm8
1673; SSE41-NEXT: pxor %xmm1, %xmm1
1674; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1
1675; SSE41-NEXT: movapd %xmm2, %xmm0
1676; SSE41-NEXT: xorpd %xmm9, %xmm0
1677; SSE41-NEXT: movapd %xmm0, %xmm5
1678; SSE41-NEXT: pcmpgtd %xmm9, %xmm5
1679; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1680; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1681; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1682; SSE41-NEXT: pand %xmm7, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001683; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
1684; SSE41-NEXT: por %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001685; SSE41-NEXT: pxor %xmm7, %xmm7
1686; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm7
1687; SSE41-NEXT: movapd %xmm3, %xmm0
1688; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001689; SSE41-NEXT: movapd %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001690; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
1691; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1692; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1693; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1694; SSE41-NEXT: pand %xmm4, %xmm5
1695; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1696; SSE41-NEXT: por %xmm5, %xmm0
1697; SSE41-NEXT: pxor %xmm2, %xmm2
1698; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
1699; SSE41-NEXT: movapd %xmm6, %xmm0
1700; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001701; SSE41-NEXT: movapd %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001702; SSE41-NEXT: pcmpgtd %xmm9, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001703; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001704; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1705; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1706; SSE41-NEXT: pand %xmm4, %xmm5
1707; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1708; SSE41-NEXT: por %xmm5, %xmm0
1709; SSE41-NEXT: pxor %xmm3, %xmm3
1710; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm3
1711; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm8[1,2,3],xmm3[4],xmm8[5,6,7]
1712; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm8[1,2,3],xmm2[4],xmm8[5,6,7]
1713; SSE41-NEXT: packusdw %xmm3, %xmm2
1714; SSE41-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0],xmm8[1,2,3],xmm7[4],xmm8[5,6,7]
1715; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm8[1,2,3],xmm1[4],xmm8[5,6,7]
1716; SSE41-NEXT: packusdw %xmm7, %xmm1
1717; SSE41-NEXT: packusdw %xmm2, %xmm1
1718; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001719; SSE41-NEXT: retq
1720;
1721; AVX1-LABEL: trunc_packus_v8i64_v8i8:
1722; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001723; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [255,255,255,255]
1724; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1725; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
1726; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1727; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1728; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1729; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1730; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1731; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1732; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1733; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1734; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001735; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1736; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm8
1737; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1738; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1739; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1740; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
1741; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm3
1742; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
1743; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1744; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
1745; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1746; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1747; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
1748; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1749; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
1750; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1751; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
1752; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1753; AVX1-NEXT: vzeroupper
1754; AVX1-NEXT: retq
1755;
1756; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i8:
1757; AVX2-SLOW: # %bb.0:
1758; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
1759; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001760; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001761; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1762; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001763; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001764; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1765; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
1766; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1767; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001768; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1769; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1770; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1771; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1772; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1773; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1774; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001775; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001776; AVX2-SLOW-NEXT: vzeroupper
1777; AVX2-SLOW-NEXT: retq
1778;
1779; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i8:
1780; AVX2-FAST: # %bb.0:
1781; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
1782; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001783; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001784; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1785; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001786; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001787; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1788; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
1789; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1790; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001791; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1792; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
1793; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
1794; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1795; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1796; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001797; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001798; AVX2-FAST-NEXT: vzeroupper
1799; AVX2-FAST-NEXT: retq
1800;
1801; AVX512-LABEL: trunc_packus_v8i64_v8i8:
1802; AVX512: # %bb.0:
1803; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1804; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1805; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1806; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1807; AVX512-NEXT: vzeroupper
1808; AVX512-NEXT: retq
1809 %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1810 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1811 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1812 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1813 %5 = trunc <8 x i64> %4 to <8 x i8>
1814 ret <8 x i8> %5
1815}
1816
1817define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) {
1818; SSE2-LABEL: trunc_packus_v16i64_v16i8:
1819; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001820; SSE2-NEXT: movdqa {{.*#+}} xmm13 = [255,255]
1821; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
1822; SSE2-NEXT: movdqa %xmm7, %xmm8
1823; SSE2-NEXT: pxor %xmm9, %xmm8
1824; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
1825; SSE2-NEXT: movdqa %xmm11, %xmm10
1826; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
1827; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
1828; SSE2-NEXT: pcmpeqd %xmm11, %xmm8
1829; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
1830; SSE2-NEXT: pand %xmm12, %xmm8
1831; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
1832; SSE2-NEXT: por %xmm8, %xmm10
1833; SSE2-NEXT: pand %xmm10, %xmm7
1834; SSE2-NEXT: pandn %xmm13, %xmm10
1835; SSE2-NEXT: por %xmm7, %xmm10
1836; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
1837; SSE2-NEXT: movdqa %xmm6, %xmm7
1838; SSE2-NEXT: pxor %xmm9, %xmm7
1839; SSE2-NEXT: movdqa %xmm11, %xmm8
1840; SSE2-NEXT: pcmpgtd %xmm7, %xmm8
1841; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm8[0,0,2,2]
1842; SSE2-NEXT: pcmpeqd %xmm11, %xmm7
1843; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
1844; SSE2-NEXT: pand %xmm12, %xmm10
1845; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm8[1,1,3,3]
1846; SSE2-NEXT: por %xmm10, %xmm14
1847; SSE2-NEXT: pand %xmm14, %xmm6
1848; SSE2-NEXT: pandn %xmm13, %xmm14
1849; SSE2-NEXT: por %xmm6, %xmm14
1850; SSE2-NEXT: movdqa %xmm5, %xmm6
1851; SSE2-NEXT: pxor %xmm9, %xmm6
1852; SSE2-NEXT: movdqa %xmm11, %xmm7
1853; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
1854; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1855; SSE2-NEXT: pcmpeqd %xmm11, %xmm6
1856; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1857; SSE2-NEXT: pand %xmm8, %xmm6
1858; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
1859; SSE2-NEXT: por %xmm6, %xmm7
1860; SSE2-NEXT: pand %xmm7, %xmm5
1861; SSE2-NEXT: pandn %xmm13, %xmm7
1862; SSE2-NEXT: por %xmm5, %xmm7
1863; SSE2-NEXT: movdqa %xmm7, %xmm10
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001864; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00001865; SSE2-NEXT: movdqa %xmm4, %xmm5
1866; SSE2-NEXT: pxor %xmm9, %xmm5
1867; SSE2-NEXT: movdqa %xmm11, %xmm6
1868; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1869; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1870; SSE2-NEXT: pcmpeqd %xmm11, %xmm5
1871; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1872; SSE2-NEXT: pand %xmm7, %xmm5
1873; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1874; SSE2-NEXT: por %xmm5, %xmm6
1875; SSE2-NEXT: pand %xmm6, %xmm4
1876; SSE2-NEXT: pandn %xmm13, %xmm6
1877; SSE2-NEXT: por %xmm4, %xmm6
1878; SSE2-NEXT: movdqa %xmm6, %xmm7
Geoff Berry94503c72018-02-01 18:54:01 +00001879; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00001880; SSE2-NEXT: movdqa %xmm3, %xmm4
1881; SSE2-NEXT: pxor %xmm9, %xmm4
1882; SSE2-NEXT: movdqa %xmm11, %xmm5
1883; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
1884; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1885; SSE2-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001886; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001887; SSE2-NEXT: pand %xmm6, %xmm4
1888; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1889; SSE2-NEXT: por %xmm4, %xmm5
1890; SSE2-NEXT: pand %xmm5, %xmm3
1891; SSE2-NEXT: pandn %xmm13, %xmm5
1892; SSE2-NEXT: por %xmm3, %xmm5
1893; SSE2-NEXT: movdqa %xmm5, %xmm8
1894; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill
1895; SSE2-NEXT: movdqa %xmm2, %xmm3
1896; SSE2-NEXT: pxor %xmm9, %xmm3
1897; SSE2-NEXT: movdqa %xmm11, %xmm4
1898; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1899; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1900; SSE2-NEXT: pcmpeqd %xmm11, %xmm3
1901; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1902; SSE2-NEXT: pand %xmm5, %xmm3
1903; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm4[1,1,3,3]
1904; SSE2-NEXT: por %xmm3, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001905; SSE2-NEXT: pand %xmm15, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001906; SSE2-NEXT: pandn %xmm13, %xmm15
1907; SSE2-NEXT: movdqa %xmm13, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001908; SSE2-NEXT: por %xmm2, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001909; SSE2-NEXT: movdqa %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001910; SSE2-NEXT: pxor %xmm9, %xmm2
1911; SSE2-NEXT: movdqa %xmm11, %xmm3
1912; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1913; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1914; SSE2-NEXT: pcmpeqd %xmm11, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001915; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001916; SSE2-NEXT: pand %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001917; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
1918; SSE2-NEXT: por %xmm2, %xmm13
1919; SSE2-NEXT: pand %xmm13, %xmm1
1920; SSE2-NEXT: pandn %xmm5, %xmm13
1921; SSE2-NEXT: por %xmm1, %xmm13
1922; SSE2-NEXT: movdqa %xmm0, %xmm1
1923; SSE2-NEXT: pxor %xmm9, %xmm1
1924; SSE2-NEXT: movdqa %xmm11, %xmm2
1925; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1926; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1927; SSE2-NEXT: pcmpeqd %xmm11, %xmm1
1928; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1929; SSE2-NEXT: pand %xmm4, %xmm1
1930; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm2[1,1,3,3]
1931; SSE2-NEXT: por %xmm1, %xmm11
1932; SSE2-NEXT: pand %xmm11, %xmm0
1933; SSE2-NEXT: pandn %xmm5, %xmm11
1934; SSE2-NEXT: por %xmm0, %xmm11
1935; SSE2-NEXT: movdqa %xmm11, %xmm0
1936; SSE2-NEXT: pxor %xmm9, %xmm0
1937; SSE2-NEXT: movdqa %xmm0, %xmm1
1938; SSE2-NEXT: pcmpgtd %xmm9, %xmm1
1939; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
1940; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1941; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1942; SSE2-NEXT: pand %xmm4, %xmm0
1943; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1944; SSE2-NEXT: por %xmm0, %xmm2
1945; SSE2-NEXT: movdqa %xmm13, %xmm0
1946; SSE2-NEXT: pxor %xmm9, %xmm0
1947; SSE2-NEXT: movdqa %xmm0, %xmm4
1948; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
1949; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1950; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1951; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1952; SSE2-NEXT: pand %xmm5, %xmm0
1953; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm4[1,1,3,3]
1954; SSE2-NEXT: por %xmm0, %xmm12
1955; SSE2-NEXT: movdqa %xmm15, %xmm0
1956; SSE2-NEXT: pxor %xmm9, %xmm0
1957; SSE2-NEXT: movdqa %xmm0, %xmm4
1958; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
1959; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1960; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1961; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1962; SSE2-NEXT: pand %xmm5, %xmm0
1963; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1964; SSE2-NEXT: por %xmm0, %xmm6
1965; SSE2-NEXT: movdqa %xmm8, %xmm0
1966; SSE2-NEXT: pxor %xmm9, %xmm0
1967; SSE2-NEXT: movdqa %xmm0, %xmm4
1968; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
1969; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1970; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1971; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1972; SSE2-NEXT: pand %xmm5, %xmm0
1973; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm4[1,1,3,3]
1974; SSE2-NEXT: por %xmm0, %xmm8
1975; SSE2-NEXT: movdqa %xmm7, %xmm0
1976; SSE2-NEXT: pxor %xmm9, %xmm0
1977; SSE2-NEXT: movdqa %xmm0, %xmm5
1978; SSE2-NEXT: pcmpgtd %xmm9, %xmm5
1979; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1980; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1981; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1982; SSE2-NEXT: pand %xmm7, %xmm0
1983; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1984; SSE2-NEXT: por %xmm0, %xmm5
1985; SSE2-NEXT: movdqa %xmm10, %xmm0
1986; SSE2-NEXT: pxor %xmm9, %xmm0
1987; SSE2-NEXT: movdqa %xmm0, %xmm7
1988; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
1989; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1990; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1991; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
1992; SSE2-NEXT: pand %xmm10, %xmm0
1993; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
1994; SSE2-NEXT: por %xmm0, %xmm10
1995; SSE2-NEXT: movdqa %xmm14, %xmm0
1996; SSE2-NEXT: pxor %xmm9, %xmm0
1997; SSE2-NEXT: movdqa %xmm0, %xmm7
1998; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
1999; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2000; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2001; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2002; SSE2-NEXT: pand %xmm0, %xmm1
2003; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2004; SSE2-NEXT: por %xmm1, %xmm0
2005; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload
2006; SSE2-NEXT: movdqa %xmm4, %xmm1
2007; SSE2-NEXT: pxor %xmm9, %xmm1
2008; SSE2-NEXT: movdqa %xmm1, %xmm7
2009; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
2010; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
2011; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2012; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002013; SSE2-NEXT: pand %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002014; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2015; SSE2-NEXT: por %xmm1, %xmm3
2016; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
2017; SSE2-NEXT: pand %xmm1, %xmm3
2018; SSE2-NEXT: pand %xmm4, %xmm3
2019; SSE2-NEXT: pand %xmm1, %xmm0
2020; SSE2-NEXT: pand %xmm14, %xmm0
2021; SSE2-NEXT: packuswb %xmm3, %xmm0
2022; SSE2-NEXT: pand %xmm1, %xmm10
2023; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Folded Reload
2024; SSE2-NEXT: pand %xmm1, %xmm5
2025; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Folded Reload
2026; SSE2-NEXT: packuswb %xmm10, %xmm5
2027; SSE2-NEXT: packuswb %xmm0, %xmm5
2028; SSE2-NEXT: pand %xmm1, %xmm8
2029; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
2030; SSE2-NEXT: pand %xmm1, %xmm6
2031; SSE2-NEXT: pand %xmm15, %xmm6
2032; SSE2-NEXT: packuswb %xmm8, %xmm6
2033; SSE2-NEXT: pand %xmm1, %xmm12
2034; SSE2-NEXT: pand %xmm13, %xmm12
2035; SSE2-NEXT: pand %xmm1, %xmm2
2036; SSE2-NEXT: pand %xmm11, %xmm2
2037; SSE2-NEXT: packuswb %xmm12, %xmm2
2038; SSE2-NEXT: packuswb %xmm6, %xmm2
2039; SSE2-NEXT: packuswb %xmm5, %xmm2
2040; SSE2-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002041; SSE2-NEXT: retq
2042;
2043; SSSE3-LABEL: trunc_packus_v16i64_v16i8:
2044; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002045; SSSE3-NEXT: movdqa {{.*#+}} xmm13 = [255,255]
2046; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
2047; SSSE3-NEXT: movdqa %xmm7, %xmm8
2048; SSSE3-NEXT: pxor %xmm9, %xmm8
2049; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
2050; SSSE3-NEXT: movdqa %xmm11, %xmm10
2051; SSSE3-NEXT: pcmpgtd %xmm8, %xmm10
2052; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
2053; SSSE3-NEXT: pcmpeqd %xmm11, %xmm8
2054; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
2055; SSSE3-NEXT: pand %xmm12, %xmm8
2056; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
2057; SSSE3-NEXT: por %xmm8, %xmm10
2058; SSSE3-NEXT: pand %xmm10, %xmm7
2059; SSSE3-NEXT: pandn %xmm13, %xmm10
2060; SSSE3-NEXT: por %xmm7, %xmm10
2061; SSSE3-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
2062; SSSE3-NEXT: movdqa %xmm6, %xmm7
2063; SSSE3-NEXT: pxor %xmm9, %xmm7
2064; SSSE3-NEXT: movdqa %xmm11, %xmm8
2065; SSSE3-NEXT: pcmpgtd %xmm7, %xmm8
2066; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm8[0,0,2,2]
2067; SSSE3-NEXT: pcmpeqd %xmm11, %xmm7
2068; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2069; SSSE3-NEXT: pand %xmm12, %xmm10
2070; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm8[1,1,3,3]
2071; SSSE3-NEXT: por %xmm10, %xmm14
2072; SSSE3-NEXT: pand %xmm14, %xmm6
2073; SSSE3-NEXT: pandn %xmm13, %xmm14
2074; SSSE3-NEXT: por %xmm6, %xmm14
2075; SSSE3-NEXT: movdqa %xmm5, %xmm6
2076; SSSE3-NEXT: pxor %xmm9, %xmm6
2077; SSSE3-NEXT: movdqa %xmm11, %xmm7
2078; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
2079; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
2080; SSSE3-NEXT: pcmpeqd %xmm11, %xmm6
2081; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2082; SSSE3-NEXT: pand %xmm8, %xmm6
2083; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
2084; SSSE3-NEXT: por %xmm6, %xmm7
2085; SSSE3-NEXT: pand %xmm7, %xmm5
2086; SSSE3-NEXT: pandn %xmm13, %xmm7
2087; SSSE3-NEXT: por %xmm5, %xmm7
2088; SSSE3-NEXT: movdqa %xmm7, %xmm10
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002089; SSSE3-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002090; SSSE3-NEXT: movdqa %xmm4, %xmm5
2091; SSSE3-NEXT: pxor %xmm9, %xmm5
2092; SSSE3-NEXT: movdqa %xmm11, %xmm6
2093; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
2094; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2095; SSSE3-NEXT: pcmpeqd %xmm11, %xmm5
2096; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2097; SSSE3-NEXT: pand %xmm7, %xmm5
2098; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2099; SSSE3-NEXT: por %xmm5, %xmm6
2100; SSSE3-NEXT: pand %xmm6, %xmm4
2101; SSSE3-NEXT: pandn %xmm13, %xmm6
2102; SSSE3-NEXT: por %xmm4, %xmm6
2103; SSSE3-NEXT: movdqa %xmm6, %xmm7
Geoff Berry94503c72018-02-01 18:54:01 +00002104; SSSE3-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002105; SSSE3-NEXT: movdqa %xmm3, %xmm4
2106; SSSE3-NEXT: pxor %xmm9, %xmm4
2107; SSSE3-NEXT: movdqa %xmm11, %xmm5
2108; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
2109; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2110; SSSE3-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002111; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002112; SSSE3-NEXT: pand %xmm6, %xmm4
2113; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2114; SSSE3-NEXT: por %xmm4, %xmm5
2115; SSSE3-NEXT: pand %xmm5, %xmm3
2116; SSSE3-NEXT: pandn %xmm13, %xmm5
2117; SSSE3-NEXT: por %xmm3, %xmm5
2118; SSSE3-NEXT: movdqa %xmm5, %xmm8
2119; SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill
2120; SSSE3-NEXT: movdqa %xmm2, %xmm3
2121; SSSE3-NEXT: pxor %xmm9, %xmm3
2122; SSSE3-NEXT: movdqa %xmm11, %xmm4
2123; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
2124; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2125; SSSE3-NEXT: pcmpeqd %xmm11, %xmm3
2126; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2127; SSSE3-NEXT: pand %xmm5, %xmm3
2128; SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm4[1,1,3,3]
2129; SSSE3-NEXT: por %xmm3, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002130; SSSE3-NEXT: pand %xmm15, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002131; SSSE3-NEXT: pandn %xmm13, %xmm15
2132; SSSE3-NEXT: movdqa %xmm13, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002133; SSSE3-NEXT: por %xmm2, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002134; SSSE3-NEXT: movdqa %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002135; SSSE3-NEXT: pxor %xmm9, %xmm2
2136; SSSE3-NEXT: movdqa %xmm11, %xmm3
2137; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
2138; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2139; SSSE3-NEXT: pcmpeqd %xmm11, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002140; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002141; SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002142; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
2143; SSSE3-NEXT: por %xmm2, %xmm13
2144; SSSE3-NEXT: pand %xmm13, %xmm1
2145; SSSE3-NEXT: pandn %xmm5, %xmm13
2146; SSSE3-NEXT: por %xmm1, %xmm13
2147; SSSE3-NEXT: movdqa %xmm0, %xmm1
2148; SSSE3-NEXT: pxor %xmm9, %xmm1
2149; SSSE3-NEXT: movdqa %xmm11, %xmm2
2150; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2151; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2152; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1
2153; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2154; SSSE3-NEXT: pand %xmm4, %xmm1
2155; SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm2[1,1,3,3]
2156; SSSE3-NEXT: por %xmm1, %xmm11
2157; SSSE3-NEXT: pand %xmm11, %xmm0
2158; SSSE3-NEXT: pandn %xmm5, %xmm11
2159; SSSE3-NEXT: por %xmm0, %xmm11
2160; SSSE3-NEXT: movdqa %xmm11, %xmm0
2161; SSSE3-NEXT: pxor %xmm9, %xmm0
2162; SSSE3-NEXT: movdqa %xmm0, %xmm1
2163; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1
2164; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2165; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2166; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2167; SSSE3-NEXT: pand %xmm4, %xmm0
2168; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2169; SSSE3-NEXT: por %xmm0, %xmm2
2170; SSSE3-NEXT: movdqa %xmm13, %xmm0
2171; SSSE3-NEXT: pxor %xmm9, %xmm0
2172; SSSE3-NEXT: movdqa %xmm0, %xmm4
2173; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2174; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2175; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2176; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2177; SSSE3-NEXT: pand %xmm5, %xmm0
2178; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm4[1,1,3,3]
2179; SSSE3-NEXT: por %xmm0, %xmm12
2180; SSSE3-NEXT: movdqa %xmm15, %xmm0
2181; SSSE3-NEXT: pxor %xmm9, %xmm0
2182; SSSE3-NEXT: movdqa %xmm0, %xmm4
2183; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2184; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2185; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2186; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2187; SSSE3-NEXT: pand %xmm5, %xmm0
2188; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
2189; SSSE3-NEXT: por %xmm0, %xmm6
2190; SSSE3-NEXT: movdqa %xmm8, %xmm0
2191; SSSE3-NEXT: pxor %xmm9, %xmm0
2192; SSSE3-NEXT: movdqa %xmm0, %xmm4
2193; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2194; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2195; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2196; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2197; SSSE3-NEXT: pand %xmm5, %xmm0
2198; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm4[1,1,3,3]
2199; SSSE3-NEXT: por %xmm0, %xmm8
2200; SSSE3-NEXT: movdqa %xmm7, %xmm0
2201; SSSE3-NEXT: pxor %xmm9, %xmm0
2202; SSSE3-NEXT: movdqa %xmm0, %xmm5
2203; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
2204; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2205; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2206; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
2207; SSSE3-NEXT: pand %xmm7, %xmm0
2208; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2209; SSSE3-NEXT: por %xmm0, %xmm5
2210; SSSE3-NEXT: movdqa %xmm10, %xmm0
2211; SSSE3-NEXT: pxor %xmm9, %xmm0
2212; SSSE3-NEXT: movdqa %xmm0, %xmm7
2213; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2214; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2215; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2216; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
2217; SSSE3-NEXT: pand %xmm10, %xmm0
2218; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2219; SSSE3-NEXT: por %xmm0, %xmm10
2220; SSSE3-NEXT: movdqa %xmm14, %xmm0
2221; SSSE3-NEXT: pxor %xmm9, %xmm0
2222; SSSE3-NEXT: movdqa %xmm0, %xmm7
2223; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2224; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2225; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2226; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2227; SSSE3-NEXT: pand %xmm0, %xmm1
2228; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2229; SSSE3-NEXT: por %xmm1, %xmm0
2230; SSSE3-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload
2231; SSSE3-NEXT: movdqa %xmm4, %xmm1
2232; SSSE3-NEXT: pxor %xmm9, %xmm1
2233; SSSE3-NEXT: movdqa %xmm1, %xmm7
2234; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2235; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
2236; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2237; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002238; SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002239; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2240; SSSE3-NEXT: por %xmm1, %xmm3
2241; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
2242; SSSE3-NEXT: pand %xmm1, %xmm3
2243; SSSE3-NEXT: pand %xmm4, %xmm3
2244; SSSE3-NEXT: pand %xmm1, %xmm0
2245; SSSE3-NEXT: pand %xmm14, %xmm0
2246; SSSE3-NEXT: packuswb %xmm3, %xmm0
2247; SSSE3-NEXT: pand %xmm1, %xmm10
2248; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Folded Reload
2249; SSSE3-NEXT: pand %xmm1, %xmm5
2250; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Folded Reload
2251; SSSE3-NEXT: packuswb %xmm10, %xmm5
2252; SSSE3-NEXT: packuswb %xmm0, %xmm5
2253; SSSE3-NEXT: pand %xmm1, %xmm8
2254; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
2255; SSSE3-NEXT: pand %xmm1, %xmm6
2256; SSSE3-NEXT: pand %xmm15, %xmm6
2257; SSSE3-NEXT: packuswb %xmm8, %xmm6
2258; SSSE3-NEXT: pand %xmm1, %xmm12
2259; SSSE3-NEXT: pand %xmm13, %xmm12
2260; SSSE3-NEXT: pand %xmm1, %xmm2
2261; SSSE3-NEXT: pand %xmm11, %xmm2
2262; SSSE3-NEXT: packuswb %xmm12, %xmm2
2263; SSSE3-NEXT: packuswb %xmm6, %xmm2
2264; SSSE3-NEXT: packuswb %xmm5, %xmm2
2265; SSSE3-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002266; SSSE3-NEXT: retq
2267;
2268; SSE41-LABEL: trunc_packus_v16i64_v16i8:
2269; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002270; SSE41-NEXT: movdqa %xmm0, %xmm8
2271; SSE41-NEXT: movapd {{.*#+}} xmm9 = [255,255]
2272; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002273; SSE41-NEXT: movdqa %xmm7, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002274; SSE41-NEXT: pxor %xmm10, %xmm0
2275; SSE41-NEXT: movdqa {{.*#+}} xmm12 = [2147483903,2147483903]
2276; SSE41-NEXT: movdqa %xmm12, %xmm11
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002277; SSE41-NEXT: pcmpgtd %xmm0, %xmm11
Simon Pilgrim0be55672018-02-11 10:52:37 +00002278; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
2279; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002280; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002281; SSE41-NEXT: pand %xmm13, %xmm14
2282; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm11[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002283; SSE41-NEXT: por %xmm14, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002284; SSE41-NEXT: movapd %xmm9, %xmm11
2285; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm11
2286; SSE41-NEXT: movdqa %xmm6, %xmm0
2287; SSE41-NEXT: pxor %xmm10, %xmm0
2288; SSE41-NEXT: movdqa %xmm12, %xmm7
2289; SSE41-NEXT: pcmpgtd %xmm0, %xmm7
2290; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2291; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2292; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2293; SSE41-NEXT: pand %xmm13, %xmm14
2294; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2295; SSE41-NEXT: por %xmm14, %xmm0
2296; SSE41-NEXT: movapd %xmm9, %xmm13
2297; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm13
2298; SSE41-NEXT: movdqa %xmm5, %xmm0
2299; SSE41-NEXT: pxor %xmm10, %xmm0
2300; SSE41-NEXT: movdqa %xmm12, %xmm6
2301; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
2302; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[0,0,2,2]
2303; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2304; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2305; SSE41-NEXT: pand %xmm14, %xmm7
2306; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2307; SSE41-NEXT: por %xmm7, %xmm0
2308; SSE41-NEXT: movapd %xmm9, %xmm14
2309; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm14
2310; SSE41-NEXT: movdqa %xmm4, %xmm0
2311; SSE41-NEXT: pxor %xmm10, %xmm0
2312; SSE41-NEXT: movdqa %xmm12, %xmm5
2313; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
2314; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2315; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2316; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2317; SSE41-NEXT: pand %xmm6, %xmm7
2318; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
2319; SSE41-NEXT: por %xmm7, %xmm0
2320; SSE41-NEXT: movapd %xmm9, %xmm15
2321; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm15
2322; SSE41-NEXT: movdqa %xmm3, %xmm0
2323; SSE41-NEXT: pxor %xmm10, %xmm0
2324; SSE41-NEXT: movdqa %xmm12, %xmm4
2325; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
2326; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2327; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2328; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2329; SSE41-NEXT: pand %xmm6, %xmm7
2330; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2331; SSE41-NEXT: por %xmm7, %xmm0
2332; SSE41-NEXT: movapd %xmm9, %xmm4
2333; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
2334; SSE41-NEXT: movdqa %xmm2, %xmm0
2335; SSE41-NEXT: pxor %xmm10, %xmm0
2336; SSE41-NEXT: movdqa %xmm12, %xmm3
2337; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
2338; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
2339; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2340; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2341; SSE41-NEXT: pand %xmm6, %xmm7
2342; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2343; SSE41-NEXT: por %xmm7, %xmm0
2344; SSE41-NEXT: movapd %xmm9, %xmm6
2345; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002346; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002347; SSE41-NEXT: pxor %xmm10, %xmm0
2348; SSE41-NEXT: movdqa %xmm12, %xmm2
2349; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2350; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2351; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2352; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2353; SSE41-NEXT: pand %xmm3, %xmm7
2354; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2355; SSE41-NEXT: por %xmm7, %xmm0
2356; SSE41-NEXT: movapd %xmm9, %xmm7
2357; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002358; SSE41-NEXT: movdqa %xmm8, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002359; SSE41-NEXT: pxor %xmm10, %xmm0
2360; SSE41-NEXT: movdqa %xmm12, %xmm1
2361; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
2362; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2363; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2364; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2365; SSE41-NEXT: pand %xmm2, %xmm3
2366; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2367; SSE41-NEXT: por %xmm3, %xmm0
2368; SSE41-NEXT: movapd %xmm9, %xmm2
2369; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002370; SSE41-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002371; SSE41-NEXT: xorpd %xmm10, %xmm0
2372; SSE41-NEXT: movapd %xmm0, %xmm1
2373; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
2374; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
2375; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2376; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2377; SSE41-NEXT: pand %xmm3, %xmm5
2378; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2379; SSE41-NEXT: por %xmm5, %xmm0
2380; SSE41-NEXT: xorpd %xmm8, %xmm8
2381; SSE41-NEXT: pxor %xmm1, %xmm1
2382; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
2383; SSE41-NEXT: movapd %xmm7, %xmm0
2384; SSE41-NEXT: xorpd %xmm10, %xmm0
2385; SSE41-NEXT: movapd %xmm0, %xmm2
2386; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
2387; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
2388; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2389; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2390; SSE41-NEXT: pand %xmm5, %xmm3
2391; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2392; SSE41-NEXT: por %xmm3, %xmm0
2393; SSE41-NEXT: pxor %xmm12, %xmm12
2394; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm12
2395; SSE41-NEXT: movapd %xmm6, %xmm0
2396; SSE41-NEXT: xorpd %xmm10, %xmm0
2397; SSE41-NEXT: movapd %xmm0, %xmm3
2398; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2399; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2400; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2401; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2402; SSE41-NEXT: pand %xmm5, %xmm7
2403; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2404; SSE41-NEXT: por %xmm7, %xmm0
2405; SSE41-NEXT: pxor %xmm7, %xmm7
2406; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm7
2407; SSE41-NEXT: movapd %xmm4, %xmm0
2408; SSE41-NEXT: xorpd %xmm10, %xmm0
2409; SSE41-NEXT: movapd %xmm0, %xmm3
2410; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2411; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2412; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2413; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2414; SSE41-NEXT: pand %xmm5, %xmm6
2415; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2416; SSE41-NEXT: por %xmm6, %xmm0
2417; SSE41-NEXT: pxor %xmm6, %xmm6
2418; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002419; SSE41-NEXT: movapd %xmm15, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002420; SSE41-NEXT: xorpd %xmm10, %xmm0
2421; SSE41-NEXT: movapd %xmm0, %xmm3
2422; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2423; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2424; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2425; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2426; SSE41-NEXT: pand %xmm4, %xmm5
2427; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2428; SSE41-NEXT: por %xmm5, %xmm0
2429; SSE41-NEXT: pxor %xmm4, %xmm4
2430; SSE41-NEXT: blendvpd %xmm0, %xmm15, %xmm4
2431; SSE41-NEXT: movapd %xmm14, %xmm0
2432; SSE41-NEXT: xorpd %xmm10, %xmm0
2433; SSE41-NEXT: movapd %xmm0, %xmm3
2434; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2435; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2436; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2437; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2438; SSE41-NEXT: pand %xmm5, %xmm2
2439; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2440; SSE41-NEXT: por %xmm2, %xmm0
2441; SSE41-NEXT: xorpd %xmm15, %xmm15
2442; SSE41-NEXT: blendvpd %xmm0, %xmm14, %xmm15
2443; SSE41-NEXT: movapd %xmm13, %xmm0
2444; SSE41-NEXT: xorpd %xmm10, %xmm0
2445; SSE41-NEXT: movapd %xmm0, %xmm2
2446; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
2447; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm2[0,0,2,2]
2448; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2449; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2450; SSE41-NEXT: pand %xmm14, %xmm3
2451; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2452; SSE41-NEXT: por %xmm3, %xmm0
2453; SSE41-NEXT: pxor %xmm2, %xmm2
2454; SSE41-NEXT: blendvpd %xmm0, %xmm13, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002455; SSE41-NEXT: movapd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002456; SSE41-NEXT: xorpd %xmm10, %xmm0
2457; SSE41-NEXT: movapd %xmm0, %xmm3
2458; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2459; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm3[0,0,2,2]
2460; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2461; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2462; SSE41-NEXT: pand %xmm13, %xmm5
2463; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2464; SSE41-NEXT: por %xmm5, %xmm0
2465; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm8
2466; SSE41-NEXT: andpd %xmm9, %xmm8
2467; SSE41-NEXT: andpd %xmm9, %xmm2
2468; SSE41-NEXT: packuswb %xmm8, %xmm2
2469; SSE41-NEXT: andpd %xmm9, %xmm15
2470; SSE41-NEXT: andpd %xmm9, %xmm4
2471; SSE41-NEXT: packuswb %xmm15, %xmm4
2472; SSE41-NEXT: packuswb %xmm2, %xmm4
2473; SSE41-NEXT: andpd %xmm9, %xmm6
2474; SSE41-NEXT: andpd %xmm9, %xmm7
2475; SSE41-NEXT: packuswb %xmm6, %xmm7
2476; SSE41-NEXT: andpd %xmm9, %xmm12
2477; SSE41-NEXT: andpd %xmm9, %xmm1
2478; SSE41-NEXT: packuswb %xmm12, %xmm1
2479; SSE41-NEXT: packuswb %xmm7, %xmm1
2480; SSE41-NEXT: packuswb %xmm4, %xmm1
2481; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002482; SSE41-NEXT: retq
2483;
2484; AVX1-LABEL: trunc_packus_v16i64_v16i8:
2485; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002486; AVX1-NEXT: vmovapd {{.*#+}} ymm5 = [255,255,255,255]
2487; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002488; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002489; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2490; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm7
2491; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2492; AVX1-NEXT: vblendvpd %ymm6, %ymm3, %ymm5, %ymm3
2493; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
2494; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2495; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm7
2496; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2497; AVX1-NEXT: vblendvpd %ymm6, %ymm2, %ymm5, %ymm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002498; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
2499; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2500; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm7
2501; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002502; AVX1-NEXT: vblendvpd %ymm6, %ymm1, %ymm5, %ymm1
2503; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
2504; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2505; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm7
2506; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2507; AVX1-NEXT: vblendvpd %ymm6, %ymm0, %ymm5, %ymm0
2508; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002509; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002510; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002511; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
2512; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002513; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002514; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
2515; AVX1-NEXT: vpand %xmm3, %xmm7, %xmm3
2516; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
2517; AVX1-NEXT: vpackuswb %xmm6, %xmm3, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002518; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002519; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
2520; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002521; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002522; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
2523; AVX1-NEXT: vpand %xmm2, %xmm7, %xmm2
2524; AVX1-NEXT: vpackuswb %xmm6, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002525; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002526; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002527; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002528; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
2529; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002530; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm6
2531; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
2532; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002533; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002534; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
2535; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002536; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002537; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
2538; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002539; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002540; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002541; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002542; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002543; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2544; AVX1-NEXT: vzeroupper
2545; AVX1-NEXT: retq
2546;
2547; AVX2-SLOW-LABEL: trunc_packus_v16i64_v16i8:
2548; AVX2-SLOW: # %bb.0:
2549; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
2550; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002551; AVX2-SLOW-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002552; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5
2553; AVX2-SLOW-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2554; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5
2555; AVX2-SLOW-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2556; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5
2557; AVX2-SLOW-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002558; AVX2-SLOW-NEXT: vpxor %xmm4, %xmm4, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00002559; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5
2560; AVX2-SLOW-NEXT: vpand %ymm1, %ymm5, %ymm1
2561; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5
2562; AVX2-SLOW-NEXT: vpand %ymm0, %ymm5, %ymm0
2563; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm5
2564; AVX2-SLOW-NEXT: vpand %ymm3, %ymm5, %ymm3
2565; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm4
2566; AVX2-SLOW-NEXT: vpand %ymm2, %ymm4, %ymm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002567; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
2568; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2569; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
2570; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
2571; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2572; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2573; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2574; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2575; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2576; AVX2-SLOW-NEXT: vpshufb %xmm4, %xmm2, %xmm2
2577; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2578; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2579; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2580; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2581; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2582; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2583; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2584; AVX2-SLOW-NEXT: vpshufb %xmm4, %xmm0, %xmm0
2585; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2586; AVX2-SLOW-NEXT: vzeroupper
2587; AVX2-SLOW-NEXT: retq
2588;
2589; AVX2-FAST-LABEL: trunc_packus_v16i64_v16i8:
2590; AVX2-FAST: # %bb.0:
2591; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
2592; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002593; AVX2-FAST-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002594; AVX2-FAST-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5
2595; AVX2-FAST-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2596; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5
2597; AVX2-FAST-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2598; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5
2599; AVX2-FAST-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002600; AVX2-FAST-NEXT: vpxor %xmm4, %xmm4, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00002601; AVX2-FAST-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5
2602; AVX2-FAST-NEXT: vpand %ymm1, %ymm5, %ymm1
2603; AVX2-FAST-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5
2604; AVX2-FAST-NEXT: vpand %ymm0, %ymm5, %ymm0
2605; AVX2-FAST-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm5
2606; AVX2-FAST-NEXT: vpand %ymm3, %ymm5, %ymm3
2607; AVX2-FAST-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm4
2608; AVX2-FAST-NEXT: vpand %ymm2, %ymm4, %ymm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002609; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
2610; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
2611; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
2612; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2613; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2614; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2615; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2616; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2617; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm2
2618; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
2619; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
2620; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2621; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2622; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2623; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm0, %xmm0
2624; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2625; AVX2-FAST-NEXT: vzeroupper
2626; AVX2-FAST-NEXT: retq
2627;
2628; AVX512-LABEL: trunc_packus_v16i64_v16i8:
2629; AVX512: # %bb.0:
2630; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255]
2631; AVX512-NEXT: vpminsq %zmm2, %zmm0, %zmm0
2632; AVX512-NEXT: vpminsq %zmm2, %zmm1, %zmm1
2633; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
2634; AVX512-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1
2635; AVX512-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0
2636; AVX512-NEXT: vpmovqd %zmm0, %ymm0
2637; AVX512-NEXT: vpmovqd %zmm1, %ymm1
2638; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2639; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2640; AVX512-NEXT: vzeroupper
2641; AVX512-NEXT: retq
2642 %1 = icmp slt <16 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2643 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2644 %3 = icmp sgt <16 x i64> %2, zeroinitializer
2645 %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> zeroinitializer
2646 %5 = trunc <16 x i64> %4 to <16 x i8>
2647 ret <16 x i8> %5
2648}
2649
2650define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) {
2651; SSE2-LABEL: trunc_packus_v8i32_v8i8:
2652; SSE2: # %bb.0:
2653; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2654; SSE2-NEXT: movdqa %xmm2, %xmm3
2655; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002656; SSE2-NEXT: pand %xmm3, %xmm1
2657; SSE2-NEXT: pandn %xmm2, %xmm3
2658; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002659; SSE2-NEXT: movdqa %xmm2, %xmm1
2660; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2661; SSE2-NEXT: pand %xmm1, %xmm0
2662; SSE2-NEXT: pandn %xmm2, %xmm1
2663; SSE2-NEXT: por %xmm0, %xmm1
2664; SSE2-NEXT: pxor %xmm2, %xmm2
2665; SSE2-NEXT: movdqa %xmm1, %xmm0
2666; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
2667; SSE2-NEXT: pand %xmm1, %xmm0
2668; SSE2-NEXT: movdqa %xmm3, %xmm1
2669; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
2670; SSE2-NEXT: pand %xmm3, %xmm1
2671; SSE2-NEXT: pslld $16, %xmm1
2672; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002673; SSE2-NEXT: pslld $16, %xmm0
2674; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002675; SSE2-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002676; SSE2-NEXT: retq
2677;
2678; SSSE3-LABEL: trunc_packus_v8i32_v8i8:
2679; SSSE3: # %bb.0:
2680; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2681; SSSE3-NEXT: movdqa %xmm2, %xmm3
2682; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002683; SSSE3-NEXT: pand %xmm3, %xmm1
2684; SSSE3-NEXT: pandn %xmm2, %xmm3
2685; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002686; SSSE3-NEXT: movdqa %xmm2, %xmm1
2687; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
2688; SSSE3-NEXT: pand %xmm1, %xmm0
2689; SSSE3-NEXT: pandn %xmm2, %xmm1
2690; SSSE3-NEXT: por %xmm0, %xmm1
2691; SSSE3-NEXT: pxor %xmm2, %xmm2
2692; SSSE3-NEXT: movdqa %xmm1, %xmm0
2693; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
2694; SSSE3-NEXT: pand %xmm1, %xmm0
2695; SSSE3-NEXT: movdqa %xmm3, %xmm1
2696; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
2697; SSSE3-NEXT: pand %xmm3, %xmm1
2698; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2699; SSSE3-NEXT: pshufb %xmm2, %xmm1
2700; SSSE3-NEXT: pshufb %xmm2, %xmm0
2701; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002702; SSSE3-NEXT: retq
2703;
2704; SSE41-LABEL: trunc_packus_v8i32_v8i8:
2705; SSE41: # %bb.0:
2706; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2707; SSE41-NEXT: pminsd %xmm2, %xmm1
2708; SSE41-NEXT: pminsd %xmm2, %xmm0
2709; SSE41-NEXT: pxor %xmm2, %xmm2
2710; SSE41-NEXT: pmaxsd %xmm2, %xmm0
2711; SSE41-NEXT: pmaxsd %xmm2, %xmm1
2712; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2713; SSE41-NEXT: pshufb %xmm2, %xmm1
2714; SSE41-NEXT: pshufb %xmm2, %xmm0
2715; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2716; SSE41-NEXT: retq
2717;
2718; AVX1-LABEL: trunc_packus_v8i32_v8i8:
2719; AVX1: # %bb.0:
2720; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2721; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255]
2722; AVX1-NEXT: vpminsd %xmm2, %xmm1, %xmm1
2723; AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
2724; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2725; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
2726; AVX1-NEXT: vpmaxsd %xmm2, %xmm1, %xmm1
2727; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2728; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2729; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2730; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2731; AVX1-NEXT: vzeroupper
2732; AVX1-NEXT: retq
2733;
2734; AVX2-LABEL: trunc_packus_v8i32_v8i8:
2735; AVX2: # %bb.0:
2736; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2737; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2738; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
2739; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2740; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2741; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00002742; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002743; AVX2-NEXT: vzeroupper
2744; AVX2-NEXT: retq
2745;
2746; AVX512F-LABEL: trunc_packus_v8i32_v8i8:
2747; AVX512F: # %bb.0:
2748; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2749; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2750; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
2751; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2752; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00002753; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002754; AVX512F-NEXT: vzeroupper
2755; AVX512F-NEXT: retq
2756;
2757; AVX512VL-LABEL: trunc_packus_v8i32_v8i8:
2758; AVX512VL: # %bb.0:
2759; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2760; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2761; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2762; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2763; AVX512VL-NEXT: vzeroupper
2764; AVX512VL-NEXT: retq
2765;
2766; AVX512BW-LABEL: trunc_packus_v8i32_v8i8:
2767; AVX512BW: # %bb.0:
2768; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2769; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2770; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
2771; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2772; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00002773; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002774; AVX512BW-NEXT: vzeroupper
2775; AVX512BW-NEXT: retq
2776;
2777; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8:
2778; AVX512BWVL: # %bb.0:
2779; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2780; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2781; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2782; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
2783; AVX512BWVL-NEXT: vzeroupper
2784; AVX512BWVL-NEXT: retq
2785 %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2786 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2787 %3 = icmp sgt <8 x i32> %2, zeroinitializer
2788 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
2789 %5 = trunc <8 x i32> %4 to <8 x i8>
2790 ret <8 x i8> %5
2791}
2792
2793define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32> %a0) {
2794; SSE2-LABEL: trunc_packus_v16i32_v16i8:
2795; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002796; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255]
2797; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002798; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002799; SSE2-NEXT: pand %xmm6, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002800; SSE2-NEXT: pandn %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002801; SSE2-NEXT: por %xmm3, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002802; SSE2-NEXT: movdqa %xmm9, %xmm4
2803; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
2804; SSE2-NEXT: pand %xmm4, %xmm2
2805; SSE2-NEXT: pandn %xmm9, %xmm4
2806; SSE2-NEXT: por %xmm2, %xmm4
2807; SSE2-NEXT: movdqa %xmm9, %xmm2
2808; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2809; SSE2-NEXT: pand %xmm2, %xmm1
2810; SSE2-NEXT: pandn %xmm9, %xmm2
2811; SSE2-NEXT: por %xmm1, %xmm2
2812; SSE2-NEXT: movdqa %xmm9, %xmm1
2813; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
2814; SSE2-NEXT: pand %xmm1, %xmm0
2815; SSE2-NEXT: pandn %xmm9, %xmm1
2816; SSE2-NEXT: por %xmm0, %xmm1
2817; SSE2-NEXT: pxor %xmm8, %xmm8
2818; SSE2-NEXT: movdqa %xmm1, %xmm0
2819; SSE2-NEXT: pcmpgtd %xmm8, %xmm0
2820; SSE2-NEXT: movdqa %xmm2, %xmm7
2821; SSE2-NEXT: pcmpgtd %xmm8, %xmm7
2822; SSE2-NEXT: movdqa %xmm4, %xmm3
2823; SSE2-NEXT: pcmpgtd %xmm8, %xmm3
2824; SSE2-NEXT: movdqa %xmm6, %xmm5
2825; SSE2-NEXT: pcmpgtd %xmm8, %xmm5
2826; SSE2-NEXT: pand %xmm9, %xmm5
2827; SSE2-NEXT: pand %xmm6, %xmm5
2828; SSE2-NEXT: pand %xmm9, %xmm3
2829; SSE2-NEXT: pand %xmm4, %xmm3
2830; SSE2-NEXT: packuswb %xmm5, %xmm3
2831; SSE2-NEXT: pand %xmm9, %xmm7
2832; SSE2-NEXT: pand %xmm2, %xmm7
2833; SSE2-NEXT: pand %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002834; SSE2-NEXT: pand %xmm1, %xmm0
2835; SSE2-NEXT: packuswb %xmm7, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002836; SSE2-NEXT: packuswb %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002837; SSE2-NEXT: retq
2838;
2839; SSSE3-LABEL: trunc_packus_v16i32_v16i8:
2840; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002841; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255]
2842; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002843; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002844; SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002845; SSSE3-NEXT: pandn %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002846; SSSE3-NEXT: por %xmm3, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002847; SSSE3-NEXT: movdqa %xmm9, %xmm4
2848; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
2849; SSSE3-NEXT: pand %xmm4, %xmm2
2850; SSSE3-NEXT: pandn %xmm9, %xmm4
2851; SSSE3-NEXT: por %xmm2, %xmm4
2852; SSSE3-NEXT: movdqa %xmm9, %xmm2
2853; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2854; SSSE3-NEXT: pand %xmm2, %xmm1
2855; SSSE3-NEXT: pandn %xmm9, %xmm2
2856; SSSE3-NEXT: por %xmm1, %xmm2
2857; SSSE3-NEXT: movdqa %xmm9, %xmm1
2858; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
2859; SSSE3-NEXT: pand %xmm1, %xmm0
2860; SSSE3-NEXT: pandn %xmm9, %xmm1
2861; SSSE3-NEXT: por %xmm0, %xmm1
2862; SSSE3-NEXT: pxor %xmm8, %xmm8
2863; SSSE3-NEXT: movdqa %xmm1, %xmm0
2864; SSSE3-NEXT: pcmpgtd %xmm8, %xmm0
2865; SSSE3-NEXT: movdqa %xmm2, %xmm7
2866; SSSE3-NEXT: pcmpgtd %xmm8, %xmm7
2867; SSSE3-NEXT: movdqa %xmm4, %xmm3
2868; SSSE3-NEXT: pcmpgtd %xmm8, %xmm3
2869; SSSE3-NEXT: movdqa %xmm6, %xmm5
2870; SSSE3-NEXT: pcmpgtd %xmm8, %xmm5
2871; SSSE3-NEXT: pand %xmm9, %xmm5
2872; SSSE3-NEXT: pand %xmm6, %xmm5
2873; SSSE3-NEXT: pand %xmm9, %xmm3
2874; SSSE3-NEXT: pand %xmm4, %xmm3
2875; SSSE3-NEXT: packuswb %xmm5, %xmm3
2876; SSSE3-NEXT: pand %xmm9, %xmm7
2877; SSSE3-NEXT: pand %xmm2, %xmm7
2878; SSSE3-NEXT: pand %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002879; SSSE3-NEXT: pand %xmm1, %xmm0
2880; SSSE3-NEXT: packuswb %xmm7, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002881; SSSE3-NEXT: packuswb %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002882; SSSE3-NEXT: retq
2883;
2884; SSE41-LABEL: trunc_packus_v16i32_v16i8:
2885; SSE41: # %bb.0:
2886; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
2887; SSE41-NEXT: pminsd %xmm4, %xmm3
2888; SSE41-NEXT: pminsd %xmm4, %xmm2
2889; SSE41-NEXT: pminsd %xmm4, %xmm1
2890; SSE41-NEXT: pminsd %xmm4, %xmm0
2891; SSE41-NEXT: pxor %xmm5, %xmm5
2892; SSE41-NEXT: pmaxsd %xmm5, %xmm0
2893; SSE41-NEXT: pmaxsd %xmm5, %xmm1
2894; SSE41-NEXT: pmaxsd %xmm5, %xmm2
2895; SSE41-NEXT: pmaxsd %xmm5, %xmm3
2896; SSE41-NEXT: pand %xmm4, %xmm3
2897; SSE41-NEXT: pand %xmm4, %xmm2
2898; SSE41-NEXT: packuswb %xmm3, %xmm2
2899; SSE41-NEXT: pand %xmm4, %xmm1
2900; SSE41-NEXT: pand %xmm4, %xmm0
2901; SSE41-NEXT: packuswb %xmm1, %xmm0
2902; SSE41-NEXT: packuswb %xmm2, %xmm0
2903; SSE41-NEXT: retq
2904;
2905; AVX1-LABEL: trunc_packus_v16i32_v16i8:
2906; AVX1: # %bb.0:
2907; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2908; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255]
2909; AVX1-NEXT: vpminsd %xmm3, %xmm2, %xmm2
2910; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1
2911; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2912; AVX1-NEXT: vpminsd %xmm3, %xmm4, %xmm4
2913; AVX1-NEXT: vpminsd %xmm3, %xmm0, %xmm0
2914; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
2915; AVX1-NEXT: vpmaxsd %xmm5, %xmm0, %xmm0
2916; AVX1-NEXT: vpmaxsd %xmm5, %xmm4, %xmm4
2917; AVX1-NEXT: vpmaxsd %xmm5, %xmm1, %xmm1
2918; AVX1-NEXT: vpmaxsd %xmm5, %xmm2, %xmm2
2919; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2920; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
2921; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
2922; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2
2923; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2924; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2925; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2926; AVX1-NEXT: vzeroupper
2927; AVX1-NEXT: retq
2928;
2929; AVX2-LABEL: trunc_packus_v16i32_v16i8:
2930; AVX2: # %bb.0:
2931; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
2932; AVX2-NEXT: vpminsd %ymm2, %ymm1, %ymm1
2933; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
2934; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2935; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
2936; AVX2-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1
2937; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2938; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2939; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2940; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2941; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2942; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
2943; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2944; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2945; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2946; AVX2-NEXT: vzeroupper
2947; AVX2-NEXT: retq
2948;
2949; AVX512-LABEL: trunc_packus_v16i32_v16i8:
2950; AVX512: # %bb.0:
2951; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
2952; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
2953; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
2954; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2955; AVX512-NEXT: vzeroupper
2956; AVX512-NEXT: retq
2957 %1 = icmp slt <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2958 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2959 %3 = icmp sgt <16 x i32> %2, zeroinitializer
2960 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
2961 %5 = trunc <16 x i32> %4 to <16 x i8>
2962 ret <16 x i8> %5
2963}
2964
2965define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00002966; SSE-LABEL: trunc_packus_v16i16_v16i8:
2967; SSE: # %bb.0:
2968; SSE-NEXT: packuswb %xmm1, %xmm0
2969; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002970;
2971; AVX1-LABEL: trunc_packus_v16i16_v16i8:
2972; AVX1: # %bb.0:
2973; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00002974; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002975; AVX1-NEXT: vzeroupper
2976; AVX1-NEXT: retq
2977;
2978; AVX2-LABEL: trunc_packus_v16i16_v16i8:
2979; AVX2: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002980; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00002981; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002982; AVX2-NEXT: vzeroupper
2983; AVX2-NEXT: retq
2984;
2985; AVX512F-LABEL: trunc_packus_v16i16_v16i8:
2986; AVX512F: # %bb.0:
2987; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
2988; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
2989; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
2990; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
2991; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2992; AVX512F-NEXT: vzeroupper
2993; AVX512F-NEXT: retq
2994;
2995; AVX512VL-LABEL: trunc_packus_v16i16_v16i8:
2996; AVX512VL: # %bb.0:
2997; AVX512VL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
2998; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2999; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3000; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3001; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3002; AVX512VL-NEXT: vzeroupper
3003; AVX512VL-NEXT: retq
3004;
3005; AVX512BW-LABEL: trunc_packus_v16i16_v16i8:
3006; AVX512BW: # %bb.0:
3007; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3008; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3009; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3010; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00003011; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003012; AVX512BW-NEXT: vzeroupper
3013; AVX512BW-NEXT: retq
3014;
3015; AVX512BWVL-LABEL: trunc_packus_v16i16_v16i8:
3016; AVX512BWVL: # %bb.0:
3017; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3018; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3019; AVX512BWVL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3020; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
3021; AVX512BWVL-NEXT: vzeroupper
3022; AVX512BWVL-NEXT: retq
3023 %1 = icmp slt <16 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3024 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3025 %3 = icmp sgt <16 x i16> %2, zeroinitializer
3026 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
3027 %5 = trunc <16 x i16> %4 to <16 x i8>
3028 ret <16 x i8> %5
3029}
3030
3031define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003032; SSE-LABEL: trunc_packus_v32i16_v32i8:
3033; SSE: # %bb.0:
3034; SSE-NEXT: packuswb %xmm1, %xmm0
3035; SSE-NEXT: packuswb %xmm3, %xmm2
3036; SSE-NEXT: movdqa %xmm2, %xmm1
3037; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003038;
3039; AVX1-LABEL: trunc_packus_v32i16_v32i8:
3040; AVX1: # %bb.0:
3041; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrimae00a712018-02-06 14:07:46 +00003042; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
3043; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3044; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003045; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3046; AVX1-NEXT: retq
3047;
3048; AVX2-LABEL: trunc_packus_v32i16_v32i8:
3049; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003050; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
3051; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003052; AVX2-NEXT: retq
3053;
3054; AVX512F-LABEL: trunc_packus_v32i16_v32i8:
3055; AVX512F: # %bb.0:
3056; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3057; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3058; AVX512F-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3059; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
3060; AVX512F-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3061; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3062; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3063; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3064; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
3065; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
3066; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3067; AVX512F-NEXT: retq
3068;
3069; AVX512VL-LABEL: trunc_packus_v32i16_v32i8:
3070; AVX512VL: # %bb.0:
3071; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3072; AVX512VL-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3073; AVX512VL-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3074; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
3075; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3076; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3077; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3078; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3079; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
3080; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
3081; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3082; AVX512VL-NEXT: retq
3083;
3084; AVX512BW-LABEL: trunc_packus_v32i16_v32i8:
3085; AVX512BW: # %bb.0:
3086; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3087; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3088; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3089; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
3090; AVX512BW-NEXT: retq
3091;
3092; AVX512BWVL-LABEL: trunc_packus_v32i16_v32i8:
3093; AVX512BWVL: # %bb.0:
3094; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3095; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3096; AVX512BWVL-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3097; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
3098; AVX512BWVL-NEXT: retq
3099 %1 = icmp slt <32 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3100 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3101 %3 = icmp sgt <32 x i16> %2, zeroinitializer
3102 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
3103 %5 = trunc <32 x i16> %4 to <32 x i8>
3104 ret <32 x i8> %5
3105}