blob: f47425b92c4cdb40f0c12f50e73ebb54149f3d72 [file] [log] [blame]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
12
13;
14; PACKUS saturation truncation to vXi32
15;
16
17define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
18; SSE2-LABEL: trunc_packus_v4i64_v4i32:
19; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000020; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000021; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
22; SSE2-NEXT: movdqa %xmm0, %xmm3
23; SSE2-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000024; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
25; SSE2-NEXT: movdqa %xmm5, %xmm6
26; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000027; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000028; SSE2-NEXT: pcmpeqd %xmm5, %xmm3
29; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
30; SSE2-NEXT: pand %xmm7, %xmm4
31; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
32; SSE2-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000033; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000034; SSE2-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000035; SSE2-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000036; SSE2-NEXT: movdqa %xmm1, %xmm0
37; SSE2-NEXT: pxor %xmm2, %xmm0
38; SSE2-NEXT: movdqa %xmm5, %xmm4
39; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
40; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
41; SSE2-NEXT: pcmpeqd %xmm5, %xmm0
42; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
43; SSE2-NEXT: pand %xmm6, %xmm0
44; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
45; SSE2-NEXT: por %xmm0, %xmm4
46; SSE2-NEXT: pand %xmm4, %xmm1
47; SSE2-NEXT: pandn %xmm8, %xmm4
48; SSE2-NEXT: por %xmm1, %xmm4
49; SSE2-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000050; SSE2-NEXT: pxor %xmm2, %xmm0
51; SSE2-NEXT: movdqa %xmm0, %xmm1
52; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
53; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
54; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000055; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
56; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000057; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +000058; SSE2-NEXT: por %xmm0, %xmm1
59; SSE2-NEXT: pand %xmm4, %xmm1
60; SSE2-NEXT: movdqa %xmm3, %xmm0
61; SSE2-NEXT: pxor %xmm2, %xmm0
62; SSE2-NEXT: movdqa %xmm0, %xmm4
63; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
64; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
65; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
66; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
67; SSE2-NEXT: pand %xmm5, %xmm2
68; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
69; SSE2-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000070; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000071; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000072; SSE2-NEXT: retq
73;
74; SSSE3-LABEL: trunc_packus_v4i64_v4i32:
75; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000076; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000077; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
78; SSSE3-NEXT: movdqa %xmm0, %xmm3
79; SSSE3-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000080; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
81; SSSE3-NEXT: movdqa %xmm5, %xmm6
82; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000083; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000084; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3
85; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
86; SSSE3-NEXT: pand %xmm7, %xmm4
87; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
88; SSSE3-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000089; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000090; SSSE3-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000091; SSSE3-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000092; SSSE3-NEXT: movdqa %xmm1, %xmm0
93; SSSE3-NEXT: pxor %xmm2, %xmm0
94; SSSE3-NEXT: movdqa %xmm5, %xmm4
95; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
96; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
97; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0
98; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
99; SSSE3-NEXT: pand %xmm6, %xmm0
100; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
101; SSSE3-NEXT: por %xmm0, %xmm4
102; SSSE3-NEXT: pand %xmm4, %xmm1
103; SSSE3-NEXT: pandn %xmm8, %xmm4
104; SSSE3-NEXT: por %xmm1, %xmm4
105; SSSE3-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000106; SSSE3-NEXT: pxor %xmm2, %xmm0
107; SSSE3-NEXT: movdqa %xmm0, %xmm1
108; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
109; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
110; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000111; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
112; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000113; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000114; SSSE3-NEXT: por %xmm0, %xmm1
115; SSSE3-NEXT: pand %xmm4, %xmm1
116; SSSE3-NEXT: movdqa %xmm3, %xmm0
117; SSSE3-NEXT: pxor %xmm2, %xmm0
118; SSSE3-NEXT: movdqa %xmm0, %xmm4
119; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
120; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
121; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
122; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
123; SSSE3-NEXT: pand %xmm5, %xmm2
124; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
125; SSSE3-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000126; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000127; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000128; SSSE3-NEXT: retq
129;
130; SSE41-LABEL: trunc_packus_v4i64_v4i32:
131; SSE41: # %bb.0:
132; SSE41-NEXT: movdqa %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000133; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295]
134; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
135; SSE41-NEXT: pxor %xmm8, %xmm0
136; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647]
137; SSE41-NEXT: movdqa %xmm6, %xmm5
138; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
139; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
140; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000141; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000142; SSE41-NEXT: pand %xmm7, %xmm3
143; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000144; SSE41-NEXT: por %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000145; SSE41-NEXT: movapd %xmm4, %xmm5
146; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5
147; SSE41-NEXT: movdqa %xmm1, %xmm0
148; SSE41-NEXT: pxor %xmm8, %xmm0
149; SSE41-NEXT: movdqa %xmm6, %xmm2
150; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000151; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000152; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
153; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
154; SSE41-NEXT: pand %xmm3, %xmm6
155; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
156; SSE41-NEXT: por %xmm6, %xmm0
157; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
158; SSE41-NEXT: xorpd %xmm1, %xmm1
159; SSE41-NEXT: movapd %xmm4, %xmm0
160; SSE41-NEXT: xorpd %xmm8, %xmm0
161; SSE41-NEXT: movapd %xmm0, %xmm2
162; SSE41-NEXT: pcmpgtd %xmm8, %xmm2
163; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
164; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
165; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
166; SSE41-NEXT: pand %xmm3, %xmm6
167; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
168; SSE41-NEXT: por %xmm6, %xmm0
169; SSE41-NEXT: pxor %xmm2, %xmm2
170; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
171; SSE41-NEXT: movapd %xmm5, %xmm0
172; SSE41-NEXT: xorpd %xmm8, %xmm0
173; SSE41-NEXT: movapd %xmm0, %xmm3
174; SSE41-NEXT: pcmpgtd %xmm8, %xmm3
175; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
176; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
177; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
178; SSE41-NEXT: pand %xmm4, %xmm6
179; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
180; SSE41-NEXT: por %xmm6, %xmm0
181; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
182; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
183; SSE41-NEXT: movaps %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000184; SSE41-NEXT: retq
185;
186; AVX1-LABEL: trunc_packus_v4i64_v4i32:
187; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000188; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
190; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4294967295,4294967295]
191; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
192; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
193; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
194; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000195; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
196; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
197; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
198; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
199; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
200; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
201; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
202; AVX1-NEXT: vzeroupper
203; AVX1-NEXT: retq
204;
205; AVX2-SLOW-LABEL: trunc_packus_v4i64_v4i32:
206; AVX2-SLOW: # %bb.0:
207; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
208; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
209; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
210; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
211; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
212; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0
213; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
214; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000215; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000216; AVX2-SLOW-NEXT: vzeroupper
217; AVX2-SLOW-NEXT: retq
218;
219; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i32:
220; AVX2-FAST: # %bb.0:
221; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
222; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
223; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
224; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
225; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
226; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
227; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
228; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000229; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000230; AVX2-FAST-NEXT: vzeroupper
231; AVX2-FAST-NEXT: retq
232;
233; AVX512F-LABEL: trunc_packus_v4i64_v4i32:
234; AVX512F: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000235; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000236; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
237; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
238; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
239; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
240; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000241; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000242; AVX512F-NEXT: vzeroupper
243; AVX512F-NEXT: retq
244;
245; AVX512VL-LABEL: trunc_packus_v4i64_v4i32:
246; AVX512VL: # %bb.0:
247; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
248; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
249; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
250; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
251; AVX512VL-NEXT: vzeroupper
252; AVX512VL-NEXT: retq
253;
254; AVX512BW-LABEL: trunc_packus_v4i64_v4i32:
255; AVX512BW: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000256; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000257; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
258; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
259; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
260; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
261; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000262; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000263; AVX512BW-NEXT: vzeroupper
264; AVX512BW-NEXT: retq
265;
266; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i32:
267; AVX512BWVL: # %bb.0:
268; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
269; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
270; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
271; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
272; AVX512BWVL-NEXT: vzeroupper
273; AVX512BWVL-NEXT: retq
274 %1 = icmp slt <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
275 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
276 %3 = icmp sgt <4 x i64> %2, zeroinitializer
277 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer
278 %5 = trunc <4 x i64> %4 to <4 x i32>
279 ret <4 x i32> %5
280}
281
282
283define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64> %a0) {
284; SSE2-LABEL: trunc_packus_v8i64_v8i32:
285; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000286; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
287; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
288; SSE2-NEXT: movdqa %xmm0, %xmm5
289; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000290; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
291; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000292; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
293; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
294; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000295; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
296; SSE2-NEXT: pand %xmm7, %xmm4
297; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000298; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000299; SSE2-NEXT: pand %xmm5, %xmm0
300; SSE2-NEXT: pandn %xmm8, %xmm5
301; SSE2-NEXT: por %xmm0, %xmm5
302; SSE2-NEXT: movdqa %xmm1, %xmm0
303; SSE2-NEXT: pxor %xmm10, %xmm0
304; SSE2-NEXT: movdqa %xmm9, %xmm4
305; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
306; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
307; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
308; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
309; SSE2-NEXT: pand %xmm6, %xmm7
310; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
311; SSE2-NEXT: por %xmm7, %xmm0
312; SSE2-NEXT: pand %xmm0, %xmm1
313; SSE2-NEXT: pandn %xmm8, %xmm0
314; SSE2-NEXT: por %xmm1, %xmm0
315; SSE2-NEXT: movdqa %xmm2, %xmm1
316; SSE2-NEXT: pxor %xmm10, %xmm1
317; SSE2-NEXT: movdqa %xmm9, %xmm4
318; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
319; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
320; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000321; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000322; SSE2-NEXT: pand %xmm6, %xmm1
323; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
324; SSE2-NEXT: por %xmm1, %xmm6
325; SSE2-NEXT: pand %xmm6, %xmm2
326; SSE2-NEXT: pandn %xmm8, %xmm6
327; SSE2-NEXT: por %xmm2, %xmm6
328; SSE2-NEXT: movdqa %xmm3, %xmm1
329; SSE2-NEXT: pxor %xmm10, %xmm1
330; SSE2-NEXT: movdqa %xmm9, %xmm2
331; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
332; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
333; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
334; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
335; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000336; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
337; SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000338; SSE2-NEXT: pand %xmm2, %xmm3
339; SSE2-NEXT: pandn %xmm8, %xmm2
340; SSE2-NEXT: por %xmm3, %xmm2
341; SSE2-NEXT: movdqa %xmm2, %xmm1
342; SSE2-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000343; SSE2-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000344; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
345; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
346; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
347; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
348; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000349; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000350; SSE2-NEXT: por %xmm1, %xmm3
351; SSE2-NEXT: pand %xmm2, %xmm3
352; SSE2-NEXT: movdqa %xmm6, %xmm1
353; SSE2-NEXT: pxor %xmm10, %xmm1
354; SSE2-NEXT: movdqa %xmm1, %xmm2
355; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
356; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
357; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
358; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
359; SSE2-NEXT: pand %xmm4, %xmm7
360; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
361; SSE2-NEXT: por %xmm7, %xmm1
362; SSE2-NEXT: pand %xmm6, %xmm1
363; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
364; SSE2-NEXT: movdqa %xmm0, %xmm2
365; SSE2-NEXT: pxor %xmm10, %xmm2
366; SSE2-NEXT: movdqa %xmm2, %xmm3
367; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
368; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
369; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
370; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
371; SSE2-NEXT: pand %xmm4, %xmm2
372; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
373; SSE2-NEXT: por %xmm2, %xmm3
374; SSE2-NEXT: pand %xmm0, %xmm3
375; SSE2-NEXT: movdqa %xmm5, %xmm0
376; SSE2-NEXT: pxor %xmm10, %xmm0
377; SSE2-NEXT: movdqa %xmm0, %xmm2
378; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
379; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
380; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
381; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
382; SSE2-NEXT: pand %xmm4, %xmm6
383; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
384; SSE2-NEXT: por %xmm6, %xmm0
385; SSE2-NEXT: pand %xmm5, %xmm0
386; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000387; SSE2-NEXT: retq
388;
389; SSSE3-LABEL: trunc_packus_v8i64_v8i32:
390; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000391; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
392; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
393; SSSE3-NEXT: movdqa %xmm0, %xmm5
394; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000395; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
396; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000397; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
398; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
399; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000400; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
401; SSSE3-NEXT: pand %xmm7, %xmm4
402; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000403; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000404; SSSE3-NEXT: pand %xmm5, %xmm0
405; SSSE3-NEXT: pandn %xmm8, %xmm5
406; SSSE3-NEXT: por %xmm0, %xmm5
407; SSSE3-NEXT: movdqa %xmm1, %xmm0
408; SSSE3-NEXT: pxor %xmm10, %xmm0
409; SSSE3-NEXT: movdqa %xmm9, %xmm4
410; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
411; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
412; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
413; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
414; SSSE3-NEXT: pand %xmm6, %xmm7
415; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
416; SSSE3-NEXT: por %xmm7, %xmm0
417; SSSE3-NEXT: pand %xmm0, %xmm1
418; SSSE3-NEXT: pandn %xmm8, %xmm0
419; SSSE3-NEXT: por %xmm1, %xmm0
420; SSSE3-NEXT: movdqa %xmm2, %xmm1
421; SSSE3-NEXT: pxor %xmm10, %xmm1
422; SSSE3-NEXT: movdqa %xmm9, %xmm4
423; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
424; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
425; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000426; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000427; SSSE3-NEXT: pand %xmm6, %xmm1
428; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
429; SSSE3-NEXT: por %xmm1, %xmm6
430; SSSE3-NEXT: pand %xmm6, %xmm2
431; SSSE3-NEXT: pandn %xmm8, %xmm6
432; SSSE3-NEXT: por %xmm2, %xmm6
433; SSSE3-NEXT: movdqa %xmm3, %xmm1
434; SSSE3-NEXT: pxor %xmm10, %xmm1
435; SSSE3-NEXT: movdqa %xmm9, %xmm2
436; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
437; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
438; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
439; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
440; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000441; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442; SSSE3-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000443; SSSE3-NEXT: pand %xmm2, %xmm3
444; SSSE3-NEXT: pandn %xmm8, %xmm2
445; SSSE3-NEXT: por %xmm3, %xmm2
446; SSSE3-NEXT: movdqa %xmm2, %xmm1
447; SSSE3-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000448; SSSE3-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000449; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
450; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
451; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
452; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
453; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000454; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000455; SSSE3-NEXT: por %xmm1, %xmm3
456; SSSE3-NEXT: pand %xmm2, %xmm3
457; SSSE3-NEXT: movdqa %xmm6, %xmm1
458; SSSE3-NEXT: pxor %xmm10, %xmm1
459; SSSE3-NEXT: movdqa %xmm1, %xmm2
460; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
461; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
462; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
463; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
464; SSSE3-NEXT: pand %xmm4, %xmm7
465; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
466; SSSE3-NEXT: por %xmm7, %xmm1
467; SSSE3-NEXT: pand %xmm6, %xmm1
468; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
469; SSSE3-NEXT: movdqa %xmm0, %xmm2
470; SSSE3-NEXT: pxor %xmm10, %xmm2
471; SSSE3-NEXT: movdqa %xmm2, %xmm3
472; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
473; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
474; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
475; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
476; SSSE3-NEXT: pand %xmm4, %xmm2
477; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
478; SSSE3-NEXT: por %xmm2, %xmm3
479; SSSE3-NEXT: pand %xmm0, %xmm3
480; SSSE3-NEXT: movdqa %xmm5, %xmm0
481; SSSE3-NEXT: pxor %xmm10, %xmm0
482; SSSE3-NEXT: movdqa %xmm0, %xmm2
483; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
484; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
485; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
486; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
487; SSSE3-NEXT: pand %xmm4, %xmm6
488; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
489; SSSE3-NEXT: por %xmm6, %xmm0
490; SSSE3-NEXT: pand %xmm5, %xmm0
491; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000492; SSSE3-NEXT: retq
493;
494; SSE41-LABEL: trunc_packus_v8i64_v8i32:
495; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000496; SSE41-NEXT: movdqa %xmm0, %xmm4
497; SSE41-NEXT: movapd {{.*#+}} xmm7 = [4294967295,4294967295]
498; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
499; SSE41-NEXT: pxor %xmm10, %xmm0
500; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147483647,2147483647]
501; SSE41-NEXT: movdqa %xmm11, %xmm6
502; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
503; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
504; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
505; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
506; SSE41-NEXT: pand %xmm8, %xmm5
507; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
508; SSE41-NEXT: por %xmm5, %xmm0
509; SSE41-NEXT: movapd %xmm7, %xmm8
510; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm8
511; SSE41-NEXT: movdqa %xmm1, %xmm0
512; SSE41-NEXT: pxor %xmm10, %xmm0
513; SSE41-NEXT: movdqa %xmm11, %xmm4
514; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
515; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000516; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
517; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000518; SSE41-NEXT: pand %xmm5, %xmm6
519; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
520; SSE41-NEXT: por %xmm6, %xmm0
521; SSE41-NEXT: movapd %xmm7, %xmm9
522; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9
523; SSE41-NEXT: movdqa %xmm2, %xmm0
524; SSE41-NEXT: pxor %xmm10, %xmm0
525; SSE41-NEXT: movdqa %xmm11, %xmm1
526; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
527; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
528; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
529; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
530; SSE41-NEXT: pand %xmm4, %xmm5
531; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
532; SSE41-NEXT: por %xmm5, %xmm0
533; SSE41-NEXT: movapd %xmm7, %xmm4
534; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
535; SSE41-NEXT: movdqa %xmm3, %xmm0
536; SSE41-NEXT: pxor %xmm10, %xmm0
537; SSE41-NEXT: movdqa %xmm11, %xmm1
538; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
539; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
540; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
541; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
542; SSE41-NEXT: pand %xmm2, %xmm5
543; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
544; SSE41-NEXT: por %xmm5, %xmm0
545; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7
546; SSE41-NEXT: pxor %xmm2, %xmm2
547; SSE41-NEXT: movapd %xmm7, %xmm0
548; SSE41-NEXT: xorpd %xmm10, %xmm0
549; SSE41-NEXT: movapd %xmm0, %xmm1
550; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
551; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
552; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
553; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
554; SSE41-NEXT: pand %xmm3, %xmm5
555; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
556; SSE41-NEXT: por %xmm5, %xmm0
557; SSE41-NEXT: pxor %xmm3, %xmm3
558; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm3
559; SSE41-NEXT: movapd %xmm4, %xmm0
560; SSE41-NEXT: xorpd %xmm10, %xmm0
561; SSE41-NEXT: movapd %xmm0, %xmm1
562; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
563; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
564; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
565; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
566; SSE41-NEXT: pand %xmm5, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000567; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
568; SSE41-NEXT: por %xmm6, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000569; SSE41-NEXT: pxor %xmm1, %xmm1
570; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
571; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
572; SSE41-NEXT: movapd %xmm9, %xmm0
573; SSE41-NEXT: xorpd %xmm10, %xmm0
574; SSE41-NEXT: movapd %xmm0, %xmm3
575; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
576; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
577; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
578; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
579; SSE41-NEXT: pand %xmm4, %xmm5
580; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
581; SSE41-NEXT: por %xmm5, %xmm0
582; SSE41-NEXT: pxor %xmm3, %xmm3
583; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm3
584; SSE41-NEXT: movapd %xmm8, %xmm0
585; SSE41-NEXT: xorpd %xmm10, %xmm0
586; SSE41-NEXT: movapd %xmm0, %xmm4
587; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
588; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
589; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
590; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
591; SSE41-NEXT: pand %xmm5, %xmm6
592; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
593; SSE41-NEXT: por %xmm6, %xmm0
594; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
595; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
596; SSE41-NEXT: movaps %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000597; SSE41-NEXT: retq
598;
599; AVX1-LABEL: trunc_packus_v8i64_v8i32:
600; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000601; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
602; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
603; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4294967295,4294967295]
604; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
605; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
606; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
607; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
609; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
610; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
611; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
612; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000613; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
614; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
615; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
616; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
617; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
619; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
620; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
621; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
622; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
623; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
624; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
625; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
626; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
627; AVX1-NEXT: retq
628;
629; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i32:
630; AVX2-SLOW: # %bb.0:
631; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
632; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000633; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000634; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
635; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000636; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000637; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
638; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
639; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
640; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000641; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
642; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
643; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
644; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
645; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
646; AVX2-SLOW-NEXT: retq
647;
648; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i32:
649; AVX2-FAST: # %bb.0:
650; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
651; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000652; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000653; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
654; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000655; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000656; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
657; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
658; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
659; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000660; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
661; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
662; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
663; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
664; AVX2-FAST-NEXT: retq
665;
666; AVX512-LABEL: trunc_packus_v8i64_v8i32:
667; AVX512: # %bb.0:
668; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
669; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
670; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
671; AVX512-NEXT: vpmovqd %zmm0, %ymm0
672; AVX512-NEXT: retq
673 %1 = icmp slt <8 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
674 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
675 %3 = icmp sgt <8 x i64> %2, zeroinitializer
676 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
677 %5 = trunc <8 x i64> %4 to <8 x i32>
678 ret <8 x i32> %5
679}
680
681;
682; PACKUS saturation truncation to vXi16
683;
684
685define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) {
686; SSE2-LABEL: trunc_packus_v8i64_v8i16:
687; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000688; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
689; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
690; SSE2-NEXT: movdqa %xmm1, %xmm5
691; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000692; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
693; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000694; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
695; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
696; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000697; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
698; SSE2-NEXT: pand %xmm7, %xmm4
699; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
700; SSE2-NEXT: por %xmm4, %xmm5
701; SSE2-NEXT: pand %xmm5, %xmm1
702; SSE2-NEXT: pandn %xmm8, %xmm5
703; SSE2-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000704; SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000705; SSE2-NEXT: pxor %xmm10, %xmm1
706; SSE2-NEXT: movdqa %xmm9, %xmm4
707; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000708; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000709; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
710; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
711; SSE2-NEXT: pand %xmm6, %xmm7
712; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
713; SSE2-NEXT: por %xmm7, %xmm1
714; SSE2-NEXT: pand %xmm1, %xmm0
715; SSE2-NEXT: pandn %xmm8, %xmm1
716; SSE2-NEXT: por %xmm0, %xmm1
717; SSE2-NEXT: movdqa %xmm3, %xmm0
718; SSE2-NEXT: pxor %xmm10, %xmm0
719; SSE2-NEXT: movdqa %xmm9, %xmm4
720; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
721; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
722; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
723; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
724; SSE2-NEXT: pand %xmm6, %xmm0
725; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
726; SSE2-NEXT: por %xmm0, %xmm6
727; SSE2-NEXT: pand %xmm6, %xmm3
728; SSE2-NEXT: pandn %xmm8, %xmm6
729; SSE2-NEXT: por %xmm3, %xmm6
730; SSE2-NEXT: movdqa %xmm2, %xmm0
731; SSE2-NEXT: pxor %xmm10, %xmm0
732; SSE2-NEXT: movdqa %xmm9, %xmm3
733; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
734; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
735; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
736; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
737; SSE2-NEXT: pand %xmm4, %xmm0
738; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
739; SSE2-NEXT: por %xmm0, %xmm3
740; SSE2-NEXT: pand %xmm3, %xmm2
741; SSE2-NEXT: pandn %xmm8, %xmm3
742; SSE2-NEXT: por %xmm2, %xmm3
743; SSE2-NEXT: movdqa %xmm3, %xmm0
744; SSE2-NEXT: pxor %xmm10, %xmm0
745; SSE2-NEXT: movdqa %xmm0, %xmm2
746; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
747; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
748; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
749; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
750; SSE2-NEXT: pand %xmm4, %xmm7
751; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
752; SSE2-NEXT: por %xmm7, %xmm0
753; SSE2-NEXT: pand %xmm3, %xmm0
754; SSE2-NEXT: movdqa %xmm6, %xmm2
755; SSE2-NEXT: pxor %xmm10, %xmm2
756; SSE2-NEXT: movdqa %xmm2, %xmm3
757; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
758; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
759; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
760; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
761; SSE2-NEXT: pand %xmm4, %xmm7
762; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
763; SSE2-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000764; SSE2-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000765; SSE2-NEXT: movdqa %xmm1, %xmm3
766; SSE2-NEXT: pxor %xmm10, %xmm3
767; SSE2-NEXT: movdqa %xmm3, %xmm4
768; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
769; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
770; SSE2-NEXT: pcmpeqd %xmm10, %xmm3
771; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
772; SSE2-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000773; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000774; SSE2-NEXT: por %xmm3, %xmm4
775; SSE2-NEXT: pand %xmm1, %xmm4
776; SSE2-NEXT: movdqa %xmm5, %xmm1
777; SSE2-NEXT: pxor %xmm10, %xmm1
778; SSE2-NEXT: movdqa %xmm1, %xmm3
779; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
780; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
781; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
782; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
783; SSE2-NEXT: pand %xmm6, %xmm1
784; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
785; SSE2-NEXT: por %xmm1, %xmm3
786; SSE2-NEXT: pand %xmm5, %xmm3
787; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000788; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000789; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
790; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
791; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
792; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
793; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
794; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000795; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000796; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
797; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000798; SSE2-NEXT: retq
799;
800; SSSE3-LABEL: trunc_packus_v8i64_v8i16:
801; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000802; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
803; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
804; SSSE3-NEXT: movdqa %xmm1, %xmm5
805; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000806; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
807; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000808; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
809; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
810; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000811; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
812; SSSE3-NEXT: pand %xmm7, %xmm4
813; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
814; SSSE3-NEXT: por %xmm4, %xmm5
815; SSSE3-NEXT: pand %xmm5, %xmm1
816; SSSE3-NEXT: pandn %xmm8, %xmm5
817; SSSE3-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000818; SSSE3-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000819; SSSE3-NEXT: pxor %xmm10, %xmm1
820; SSSE3-NEXT: movdqa %xmm9, %xmm4
821; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000822; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000823; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
824; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
825; SSSE3-NEXT: pand %xmm6, %xmm7
826; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
827; SSSE3-NEXT: por %xmm7, %xmm1
828; SSSE3-NEXT: pand %xmm1, %xmm0
829; SSSE3-NEXT: pandn %xmm8, %xmm1
830; SSSE3-NEXT: por %xmm0, %xmm1
831; SSSE3-NEXT: movdqa %xmm3, %xmm0
832; SSSE3-NEXT: pxor %xmm10, %xmm0
833; SSSE3-NEXT: movdqa %xmm9, %xmm4
834; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
835; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
836; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
837; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
838; SSSE3-NEXT: pand %xmm6, %xmm0
839; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
840; SSSE3-NEXT: por %xmm0, %xmm6
841; SSSE3-NEXT: pand %xmm6, %xmm3
842; SSSE3-NEXT: pandn %xmm8, %xmm6
843; SSSE3-NEXT: por %xmm3, %xmm6
844; SSSE3-NEXT: movdqa %xmm2, %xmm0
845; SSSE3-NEXT: pxor %xmm10, %xmm0
846; SSSE3-NEXT: movdqa %xmm9, %xmm3
847; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
848; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
849; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
850; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
851; SSSE3-NEXT: pand %xmm4, %xmm0
852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
853; SSSE3-NEXT: por %xmm0, %xmm3
854; SSSE3-NEXT: pand %xmm3, %xmm2
855; SSSE3-NEXT: pandn %xmm8, %xmm3
856; SSSE3-NEXT: por %xmm2, %xmm3
857; SSSE3-NEXT: movdqa %xmm3, %xmm0
858; SSSE3-NEXT: pxor %xmm10, %xmm0
859; SSSE3-NEXT: movdqa %xmm0, %xmm2
860; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
861; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
862; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
863; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
864; SSSE3-NEXT: pand %xmm4, %xmm7
865; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
866; SSSE3-NEXT: por %xmm7, %xmm0
867; SSSE3-NEXT: pand %xmm3, %xmm0
868; SSSE3-NEXT: movdqa %xmm6, %xmm2
869; SSSE3-NEXT: pxor %xmm10, %xmm2
870; SSSE3-NEXT: movdqa %xmm2, %xmm3
871; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
872; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
873; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
874; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
875; SSSE3-NEXT: pand %xmm4, %xmm7
876; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
877; SSSE3-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000878; SSSE3-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000879; SSSE3-NEXT: movdqa %xmm1, %xmm3
880; SSSE3-NEXT: pxor %xmm10, %xmm3
881; SSSE3-NEXT: movdqa %xmm3, %xmm4
882; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
883; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
884; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3
885; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
886; SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000887; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000888; SSSE3-NEXT: por %xmm3, %xmm4
889; SSSE3-NEXT: pand %xmm1, %xmm4
890; SSSE3-NEXT: movdqa %xmm5, %xmm1
891; SSSE3-NEXT: pxor %xmm10, %xmm1
892; SSSE3-NEXT: movdqa %xmm1, %xmm3
893; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
894; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
895; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
896; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
897; SSSE3-NEXT: pand %xmm6, %xmm1
898; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
899; SSSE3-NEXT: por %xmm1, %xmm3
900; SSSE3-NEXT: pand %xmm5, %xmm3
901; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000902; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000903; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
904; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
905; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
906; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
907; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
908; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000909; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000910; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
911; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000912; SSSE3-NEXT: retq
913;
914; SSE41-LABEL: trunc_packus_v8i64_v8i16:
915; SSE41: # %bb.0:
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000916; SSE41-NEXT: movdqa %xmm0, %xmm9
Simon Pilgrim0be55672018-02-11 10:52:37 +0000917; SSE41-NEXT: movapd {{.*#+}} xmm7 = [65535,65535]
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000918; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000919; SSE41-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000920; SSE41-NEXT: pxor %xmm10, %xmm0
921; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147549183,2147549183]
922; SSE41-NEXT: movdqa %xmm11, %xmm4
923; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
924; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
925; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
926; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
927; SSE41-NEXT: pand %xmm5, %xmm6
928; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
929; SSE41-NEXT: por %xmm6, %xmm0
930; SSE41-NEXT: movapd %xmm7, %xmm8
931; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8
932; SSE41-NEXT: movdqa %xmm3, %xmm0
933; SSE41-NEXT: pxor %xmm10, %xmm0
934; SSE41-NEXT: movdqa %xmm11, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000935; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
936; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000937; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000938; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
939; SSE41-NEXT: pand %xmm4, %xmm5
940; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
941; SSE41-NEXT: por %xmm5, %xmm0
942; SSE41-NEXT: movapd %xmm7, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000943; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000944; SSE41-NEXT: movdqa %xmm9, %xmm0
945; SSE41-NEXT: pxor %xmm10, %xmm0
946; SSE41-NEXT: movdqa %xmm11, %xmm3
947; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000948; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000949; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000950; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
951; SSE41-NEXT: pand %xmm4, %xmm5
952; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
953; SSE41-NEXT: por %xmm5, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000954; SSE41-NEXT: movapd %xmm7, %xmm4
955; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm4
956; SSE41-NEXT: movdqa %xmm1, %xmm0
957; SSE41-NEXT: pxor %xmm10, %xmm0
958; SSE41-NEXT: movdqa %xmm11, %xmm3
959; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
960; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
961; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
962; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
963; SSE41-NEXT: pand %xmm5, %xmm6
964; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
965; SSE41-NEXT: por %xmm6, %xmm0
966; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7
Simon Pilgrim0be55672018-02-11 10:52:37 +0000967; SSE41-NEXT: pxor %xmm3, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +0000968; SSE41-NEXT: movapd %xmm7, %xmm0
969; SSE41-NEXT: xorpd %xmm10, %xmm0
970; SSE41-NEXT: movapd %xmm0, %xmm1
971; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
972; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
973; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
974; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
975; SSE41-NEXT: pand %xmm5, %xmm6
976; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
977; SSE41-NEXT: por %xmm6, %xmm0
978; SSE41-NEXT: pxor %xmm5, %xmm5
979; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm5
980; SSE41-NEXT: movapd %xmm4, %xmm0
981; SSE41-NEXT: xorpd %xmm10, %xmm0
982; SSE41-NEXT: movapd %xmm0, %xmm1
983; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
984; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
985; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
986; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
987; SSE41-NEXT: pand %xmm6, %xmm7
988; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
989; SSE41-NEXT: por %xmm7, %xmm0
990; SSE41-NEXT: pxor %xmm1, %xmm1
991; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
992; SSE41-NEXT: packusdw %xmm5, %xmm1
993; SSE41-NEXT: movapd %xmm2, %xmm0
994; SSE41-NEXT: xorpd %xmm10, %xmm0
995; SSE41-NEXT: movapd %xmm0, %xmm4
996; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
997; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
998; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
999; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1000; SSE41-NEXT: pand %xmm5, %xmm6
1001; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1002; SSE41-NEXT: por %xmm6, %xmm0
1003; SSE41-NEXT: pxor %xmm4, %xmm4
1004; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
1005; SSE41-NEXT: movapd %xmm8, %xmm0
1006; SSE41-NEXT: xorpd %xmm10, %xmm0
1007; SSE41-NEXT: movapd %xmm0, %xmm2
1008; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
1009; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
1010; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1011; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1012; SSE41-NEXT: pand %xmm5, %xmm6
1013; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1014; SSE41-NEXT: por %xmm6, %xmm0
1015; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3
1016; SSE41-NEXT: packusdw %xmm4, %xmm3
1017; SSE41-NEXT: packusdw %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001018; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001019; SSE41-NEXT: retq
1020;
1021; AVX1-LABEL: trunc_packus_v8i64_v8i16:
1022; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001023; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [65535,65535,65535,65535]
1024; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1025; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535]
1026; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1027; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1028; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1029; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1030; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1031; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1032; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1033; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1034; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001035; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
Simon Pilgrim70eb5082018-02-19 18:08:16 +00001036; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001037; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1038; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1039; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1040; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
Simon Pilgrim70eb5082018-02-19 18:08:16 +00001041; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
1042; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001043; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00001044; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
1045; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
1046; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
1047; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001048; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1049; AVX1-NEXT: vzeroupper
1050; AVX1-NEXT: retq
1051;
Simon Pilgrim70eb5082018-02-19 18:08:16 +00001052; AVX2-LABEL: trunc_packus_v8i64_v8i16:
1053; AVX2: # %bb.0:
1054; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535]
1055; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
1056; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1057; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1058; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1059; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1060; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1061; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm1
1062; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1063; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
1064; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
1065; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1066; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1067; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1068; AVX2-NEXT: vzeroupper
1069; AVX2-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001070;
1071; AVX512-LABEL: trunc_packus_v8i64_v8i16:
1072; AVX512: # %bb.0:
1073; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1074; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1075; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1076; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1077; AVX512-NEXT: vzeroupper
1078; AVX512-NEXT: retq
1079 %1 = icmp slt <8 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1080 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1081 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1082 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1083 %5 = trunc <8 x i64> %4 to <8 x i16>
1084 ret <8 x i16> %5
1085}
1086
1087define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) {
1088; SSE2-LABEL: trunc_packus_v8i32_v8i16:
1089; SSE2: # %bb.0:
1090; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1091; SSE2-NEXT: movdqa %xmm2, %xmm3
1092; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001093; SSE2-NEXT: pand %xmm3, %xmm1
1094; SSE2-NEXT: pandn %xmm2, %xmm3
1095; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001096; SSE2-NEXT: movdqa %xmm2, %xmm1
1097; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1098; SSE2-NEXT: pand %xmm1, %xmm0
1099; SSE2-NEXT: pandn %xmm2, %xmm1
1100; SSE2-NEXT: por %xmm0, %xmm1
1101; SSE2-NEXT: pxor %xmm2, %xmm2
1102; SSE2-NEXT: movdqa %xmm1, %xmm0
1103; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1104; SSE2-NEXT: pand %xmm1, %xmm0
1105; SSE2-NEXT: movdqa %xmm3, %xmm1
1106; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1107; SSE2-NEXT: pand %xmm3, %xmm1
1108; SSE2-NEXT: pslld $16, %xmm1
1109; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001110; SSE2-NEXT: pslld $16, %xmm0
1111; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001112; SSE2-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001113; SSE2-NEXT: retq
1114;
1115; SSSE3-LABEL: trunc_packus_v8i32_v8i16:
1116; SSSE3: # %bb.0:
1117; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1118; SSSE3-NEXT: movdqa %xmm2, %xmm3
1119; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001120; SSSE3-NEXT: pand %xmm3, %xmm1
1121; SSSE3-NEXT: pandn %xmm2, %xmm3
1122; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001123; SSSE3-NEXT: movdqa %xmm2, %xmm1
1124; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
1125; SSSE3-NEXT: pand %xmm1, %xmm0
1126; SSSE3-NEXT: pandn %xmm2, %xmm1
1127; SSSE3-NEXT: por %xmm0, %xmm1
1128; SSSE3-NEXT: pxor %xmm2, %xmm2
1129; SSSE3-NEXT: movdqa %xmm1, %xmm0
1130; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
1131; SSSE3-NEXT: pand %xmm1, %xmm0
1132; SSSE3-NEXT: movdqa %xmm3, %xmm1
1133; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1134; SSSE3-NEXT: pand %xmm3, %xmm1
1135; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1136; SSSE3-NEXT: pshufb %xmm2, %xmm1
1137; SSSE3-NEXT: pshufb %xmm2, %xmm0
1138; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001139; SSSE3-NEXT: retq
1140;
1141; SSE41-LABEL: trunc_packus_v8i32_v8i16:
1142; SSE41: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001143; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001144; SSE41-NEXT: retq
1145;
1146; AVX1-LABEL: trunc_packus_v8i32_v8i16:
1147; AVX1: # %bb.0:
1148; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001149; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001150; AVX1-NEXT: vzeroupper
1151; AVX1-NEXT: retq
1152;
1153; AVX2-LABEL: trunc_packus_v8i32_v8i16:
1154; AVX2: # %bb.0:
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001155; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1156; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001157; AVX2-NEXT: vzeroupper
1158; AVX2-NEXT: retq
1159;
1160; AVX512F-LABEL: trunc_packus_v8i32_v8i16:
1161; AVX512F: # %bb.0:
1162; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1163; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1164; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1165; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1166; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001167; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001168; AVX512F-NEXT: vzeroupper
1169; AVX512F-NEXT: retq
1170;
1171; AVX512VL-LABEL: trunc_packus_v8i32_v8i16:
1172; AVX512VL: # %bb.0:
1173; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1174; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1175; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1176; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1177; AVX512VL-NEXT: vzeroupper
1178; AVX512VL-NEXT: retq
1179;
1180; AVX512BW-LABEL: trunc_packus_v8i32_v8i16:
1181; AVX512BW: # %bb.0:
1182; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1183; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1184; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1185; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1186; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001187; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001188; AVX512BW-NEXT: vzeroupper
1189; AVX512BW-NEXT: retq
1190;
1191; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16:
1192; AVX512BWVL: # %bb.0:
1193; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1194; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1195; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1196; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
1197; AVX512BWVL-NEXT: vzeroupper
1198; AVX512BWVL-NEXT: retq
1199 %1 = icmp slt <8 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1200 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1201 %3 = icmp sgt <8 x i32> %2, zeroinitializer
1202 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1203 %5 = trunc <8 x i32> %4 to <8 x i16>
1204 ret <8 x i16> %5
1205}
1206
1207define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) {
1208; SSE2-LABEL: trunc_packus_v16i32_v16i16:
1209; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001210; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1211; SSE2-NEXT: movdqa %xmm6, %xmm4
1212; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1213; SSE2-NEXT: pand %xmm4, %xmm1
1214; SSE2-NEXT: pandn %xmm6, %xmm4
1215; SSE2-NEXT: por %xmm1, %xmm4
1216; SSE2-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001217; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001218; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001219; SSE2-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001220; SSE2-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001221; SSE2-NEXT: movdqa %xmm6, %xmm0
1222; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
1223; SSE2-NEXT: pand %xmm0, %xmm3
1224; SSE2-NEXT: pandn %xmm6, %xmm0
1225; SSE2-NEXT: por %xmm3, %xmm0
1226; SSE2-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001227; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001228; SSE2-NEXT: pand %xmm3, %xmm2
1229; SSE2-NEXT: pandn %xmm6, %xmm3
1230; SSE2-NEXT: por %xmm2, %xmm3
1231; SSE2-NEXT: pxor %xmm2, %xmm2
1232; SSE2-NEXT: movdqa %xmm3, %xmm1
1233; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1234; SSE2-NEXT: pand %xmm3, %xmm1
1235; SSE2-NEXT: movdqa %xmm0, %xmm3
1236; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1237; SSE2-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001238; SSE2-NEXT: movdqa %xmm5, %xmm0
1239; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001240; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001241; SSE2-NEXT: movdqa %xmm4, %xmm5
1242; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1243; SSE2-NEXT: pand %xmm4, %xmm5
1244; SSE2-NEXT: pslld $16, %xmm5
1245; SSE2-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001246; SSE2-NEXT: pslld $16, %xmm0
1247; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001248; SSE2-NEXT: packssdw %xmm5, %xmm0
1249; SSE2-NEXT: pslld $16, %xmm3
1250; SSE2-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001251; SSE2-NEXT: pslld $16, %xmm1
1252; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001253; SSE2-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001254; SSE2-NEXT: retq
1255;
1256; SSSE3-LABEL: trunc_packus_v16i32_v16i16:
1257; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001258; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1259; SSSE3-NEXT: movdqa %xmm6, %xmm4
1260; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
1261; SSSE3-NEXT: pand %xmm4, %xmm1
1262; SSSE3-NEXT: pandn %xmm6, %xmm4
1263; SSSE3-NEXT: por %xmm1, %xmm4
1264; SSSE3-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001265; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001266; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001267; SSSE3-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001268; SSSE3-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001269; SSSE3-NEXT: movdqa %xmm6, %xmm0
1270; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
1271; SSSE3-NEXT: pand %xmm0, %xmm3
1272; SSSE3-NEXT: pandn %xmm6, %xmm0
1273; SSSE3-NEXT: por %xmm3, %xmm0
1274; SSSE3-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001275; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001276; SSSE3-NEXT: pand %xmm3, %xmm2
1277; SSSE3-NEXT: pandn %xmm6, %xmm3
1278; SSSE3-NEXT: por %xmm2, %xmm3
1279; SSSE3-NEXT: pxor %xmm2, %xmm2
1280; SSSE3-NEXT: movdqa %xmm3, %xmm1
1281; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1282; SSSE3-NEXT: pand %xmm3, %xmm1
1283; SSSE3-NEXT: movdqa %xmm0, %xmm3
1284; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
1285; SSSE3-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001286; SSSE3-NEXT: movdqa %xmm5, %xmm0
1287; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001288; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001289; SSSE3-NEXT: movdqa %xmm4, %xmm5
1290; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
1291; SSSE3-NEXT: pand %xmm4, %xmm5
1292; SSSE3-NEXT: pslld $16, %xmm5
1293; SSSE3-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001294; SSSE3-NEXT: pslld $16, %xmm0
1295; SSSE3-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001296; SSSE3-NEXT: packssdw %xmm5, %xmm0
1297; SSSE3-NEXT: pslld $16, %xmm3
1298; SSSE3-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001299; SSSE3-NEXT: pslld $16, %xmm1
1300; SSSE3-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001301; SSSE3-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001302; SSSE3-NEXT: retq
1303;
1304; SSE41-LABEL: trunc_packus_v16i32_v16i16:
1305; SSE41: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001306; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001307; SSE41-NEXT: packusdw %xmm3, %xmm2
1308; SSE41-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001309; SSE41-NEXT: retq
1310;
1311; AVX1-LABEL: trunc_packus_v16i32_v16i16:
1312; AVX1: # %bb.0:
1313; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001314; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001315; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001316; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1317; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1318; AVX1-NEXT: retq
1319;
1320; AVX2-LABEL: trunc_packus_v16i32_v16i16:
1321; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001322; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
1323; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001324; AVX2-NEXT: retq
1325;
1326; AVX512-LABEL: trunc_packus_v16i32_v16i16:
1327; AVX512: # %bb.0:
1328; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1329; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1330; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1331; AVX512-NEXT: vpmovdw %zmm0, %ymm0
1332; AVX512-NEXT: retq
1333 %1 = icmp slt <16 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1334 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1335 %3 = icmp sgt <16 x i32> %2, zeroinitializer
1336 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
1337 %5 = trunc <16 x i32> %4 to <16 x i16>
1338 ret <16 x i16> %5
1339}
1340
1341;
1342; PACKUS saturation truncation to v16i8
1343;
1344
1345define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) {
1346; SSE2-LABEL: trunc_packus_v8i64_v8i8:
1347; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001348; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1349; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001350; SSE2-NEXT: movdqa %xmm2, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001351; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001352; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001353; SSE2-NEXT: movdqa %xmm9, %xmm7
1354; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1355; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001356; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001357; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001358; SSE2-NEXT: pand %xmm6, %xmm4
1359; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001360; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001361; SSE2-NEXT: pand %xmm5, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001362; SSE2-NEXT: pandn %xmm8, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001363; SSE2-NEXT: por %xmm2, %xmm5
1364; SSE2-NEXT: movdqa %xmm3, %xmm2
1365; SSE2-NEXT: pxor %xmm10, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001366; SSE2-NEXT: movdqa %xmm9, %xmm4
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001367; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001368; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001369; SSE2-NEXT: pcmpeqd %xmm9, %xmm2
1370; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001371; SSE2-NEXT: pand %xmm6, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001372; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1373; SSE2-NEXT: por %xmm7, %xmm2
1374; SSE2-NEXT: pand %xmm2, %xmm3
1375; SSE2-NEXT: pandn %xmm8, %xmm2
1376; SSE2-NEXT: por %xmm3, %xmm2
1377; SSE2-NEXT: movdqa %xmm0, %xmm3
1378; SSE2-NEXT: pxor %xmm10, %xmm3
1379; SSE2-NEXT: movdqa %xmm9, %xmm4
1380; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1381; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1382; SSE2-NEXT: pcmpeqd %xmm9, %xmm3
1383; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1384; SSE2-NEXT: pand %xmm6, %xmm7
1385; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1386; SSE2-NEXT: por %xmm7, %xmm3
1387; SSE2-NEXT: pand %xmm3, %xmm0
1388; SSE2-NEXT: pandn %xmm8, %xmm3
1389; SSE2-NEXT: por %xmm0, %xmm3
1390; SSE2-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001391; SSE2-NEXT: pxor %xmm10, %xmm0
1392; SSE2-NEXT: movdqa %xmm9, %xmm4
1393; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1394; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1395; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1396; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1397; SSE2-NEXT: pand %xmm6, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001398; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1399; SSE2-NEXT: por %xmm0, %xmm4
1400; SSE2-NEXT: pand %xmm4, %xmm1
1401; SSE2-NEXT: pandn %xmm8, %xmm4
1402; SSE2-NEXT: por %xmm1, %xmm4
1403; SSE2-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001404; SSE2-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001405; SSE2-NEXT: movdqa %xmm0, %xmm1
1406; SSE2-NEXT: pcmpgtd %xmm10, %xmm1
1407; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1408; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001409; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001410; SSE2-NEXT: pand %xmm6, %xmm0
1411; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1412; SSE2-NEXT: por %xmm0, %xmm1
1413; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001414; SSE2-NEXT: movdqa %xmm3, %xmm0
1415; SSE2-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001416; SSE2-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00001417; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
1418; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001419; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
1420; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1421; SSE2-NEXT: pand %xmm6, %xmm7
1422; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1423; SSE2-NEXT: por %xmm7, %xmm0
1424; SSE2-NEXT: pand %xmm3, %xmm0
1425; SSE2-NEXT: packssdw %xmm1, %xmm0
1426; SSE2-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001427; SSE2-NEXT: pxor %xmm10, %xmm1
1428; SSE2-NEXT: movdqa %xmm1, %xmm3
1429; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001430; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001431; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
1432; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001433; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001434; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1435; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001436; SSE2-NEXT: pand %xmm2, %xmm3
1437; SSE2-NEXT: movdqa %xmm5, %xmm1
1438; SSE2-NEXT: pxor %xmm10, %xmm1
1439; SSE2-NEXT: movdqa %xmm1, %xmm2
1440; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
1441; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1442; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
1443; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1444; SSE2-NEXT: pand %xmm4, %xmm1
1445; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1446; SSE2-NEXT: por %xmm1, %xmm2
1447; SSE2-NEXT: pand %xmm5, %xmm2
1448; SSE2-NEXT: packssdw %xmm3, %xmm2
1449; SSE2-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001450; SSE2-NEXT: retq
1451;
1452; SSSE3-LABEL: trunc_packus_v8i64_v8i8:
1453; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001454; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1455; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001456; SSSE3-NEXT: movdqa %xmm2, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001457; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001458; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001459; SSSE3-NEXT: movdqa %xmm9, %xmm7
1460; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
1461; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001462; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001463; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001464; SSSE3-NEXT: pand %xmm6, %xmm4
1465; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001466; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001467; SSSE3-NEXT: pand %xmm5, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001468; SSSE3-NEXT: pandn %xmm8, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001469; SSSE3-NEXT: por %xmm2, %xmm5
1470; SSSE3-NEXT: movdqa %xmm3, %xmm2
1471; SSSE3-NEXT: pxor %xmm10, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001472; SSSE3-NEXT: movdqa %xmm9, %xmm4
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001473; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001474; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001475; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2
1476; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001477; SSSE3-NEXT: pand %xmm6, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001478; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1479; SSSE3-NEXT: por %xmm7, %xmm2
1480; SSSE3-NEXT: pand %xmm2, %xmm3
1481; SSSE3-NEXT: pandn %xmm8, %xmm2
1482; SSSE3-NEXT: por %xmm3, %xmm2
1483; SSSE3-NEXT: movdqa %xmm0, %xmm3
1484; SSSE3-NEXT: pxor %xmm10, %xmm3
1485; SSSE3-NEXT: movdqa %xmm9, %xmm4
1486; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
1487; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1488; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3
1489; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1490; SSSE3-NEXT: pand %xmm6, %xmm7
1491; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1492; SSSE3-NEXT: por %xmm7, %xmm3
1493; SSSE3-NEXT: pand %xmm3, %xmm0
1494; SSSE3-NEXT: pandn %xmm8, %xmm3
1495; SSSE3-NEXT: por %xmm0, %xmm3
1496; SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001497; SSSE3-NEXT: pxor %xmm10, %xmm0
1498; SSSE3-NEXT: movdqa %xmm9, %xmm4
1499; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
1500; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1501; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
1502; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1503; SSSE3-NEXT: pand %xmm6, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001504; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1505; SSSE3-NEXT: por %xmm0, %xmm4
1506; SSSE3-NEXT: pand %xmm4, %xmm1
1507; SSSE3-NEXT: pandn %xmm8, %xmm4
1508; SSSE3-NEXT: por %xmm1, %xmm4
1509; SSSE3-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001510; SSSE3-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001511; SSSE3-NEXT: movdqa %xmm0, %xmm1
1512; SSSE3-NEXT: pcmpgtd %xmm10, %xmm1
1513; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1514; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001515; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001516; SSSE3-NEXT: pand %xmm6, %xmm0
1517; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1518; SSSE3-NEXT: por %xmm0, %xmm1
1519; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001520; SSSE3-NEXT: movdqa %xmm3, %xmm0
1521; SSSE3-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001522; SSSE3-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00001523; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
1524; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001525; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
1526; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1527; SSSE3-NEXT: pand %xmm6, %xmm7
1528; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1529; SSSE3-NEXT: por %xmm7, %xmm0
1530; SSSE3-NEXT: pand %xmm3, %xmm0
1531; SSSE3-NEXT: packssdw %xmm1, %xmm0
1532; SSSE3-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001533; SSSE3-NEXT: pxor %xmm10, %xmm1
1534; SSSE3-NEXT: movdqa %xmm1, %xmm3
1535; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001536; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001537; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
1538; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001539; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001540; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1541; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001542; SSSE3-NEXT: pand %xmm2, %xmm3
1543; SSSE3-NEXT: movdqa %xmm5, %xmm1
1544; SSSE3-NEXT: pxor %xmm10, %xmm1
1545; SSSE3-NEXT: movdqa %xmm1, %xmm2
1546; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
1547; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1548; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
1549; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1550; SSSE3-NEXT: pand %xmm4, %xmm1
1551; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1552; SSSE3-NEXT: por %xmm1, %xmm2
1553; SSSE3-NEXT: pand %xmm5, %xmm2
1554; SSSE3-NEXT: packssdw %xmm3, %xmm2
1555; SSSE3-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001556; SSSE3-NEXT: retq
1557;
1558; SSE41-LABEL: trunc_packus_v8i64_v8i8:
1559; SSE41: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001560; SSE41-NEXT: movdqa %xmm0, %xmm9
Simon Pilgrim0be55672018-02-11 10:52:37 +00001561; SSE41-NEXT: movapd {{.*#+}} xmm7 = [255,255]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001562; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001563; SSE41-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001564; SSE41-NEXT: pxor %xmm10, %xmm0
1565; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
1566; SSE41-NEXT: movdqa %xmm11, %xmm4
1567; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
1568; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1569; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
1570; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1571; SSE41-NEXT: pand %xmm5, %xmm6
1572; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1573; SSE41-NEXT: por %xmm6, %xmm0
1574; SSE41-NEXT: movapd %xmm7, %xmm8
1575; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8
1576; SSE41-NEXT: movdqa %xmm3, %xmm0
1577; SSE41-NEXT: pxor %xmm10, %xmm0
1578; SSE41-NEXT: movdqa %xmm11, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001579; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
1580; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001581; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001582; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1583; SSE41-NEXT: pand %xmm4, %xmm5
1584; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1585; SSE41-NEXT: por %xmm5, %xmm0
1586; SSE41-NEXT: movapd %xmm7, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001587; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001588; SSE41-NEXT: movdqa %xmm9, %xmm0
1589; SSE41-NEXT: pxor %xmm10, %xmm0
1590; SSE41-NEXT: movdqa %xmm11, %xmm3
1591; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001592; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001593; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001594; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1595; SSE41-NEXT: pand %xmm4, %xmm5
1596; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1597; SSE41-NEXT: por %xmm5, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001598; SSE41-NEXT: movapd %xmm7, %xmm4
1599; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm4
1600; SSE41-NEXT: movdqa %xmm1, %xmm0
1601; SSE41-NEXT: pxor %xmm10, %xmm0
1602; SSE41-NEXT: movdqa %xmm11, %xmm3
1603; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1604; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1605; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
1606; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1607; SSE41-NEXT: pand %xmm5, %xmm6
1608; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1609; SSE41-NEXT: por %xmm6, %xmm0
1610; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7
Simon Pilgrim0be55672018-02-11 10:52:37 +00001611; SSE41-NEXT: pxor %xmm3, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001612; SSE41-NEXT: movapd %xmm7, %xmm0
1613; SSE41-NEXT: xorpd %xmm10, %xmm0
1614; SSE41-NEXT: movapd %xmm0, %xmm1
1615; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
1616; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1617; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1618; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1619; SSE41-NEXT: pand %xmm5, %xmm6
1620; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1621; SSE41-NEXT: por %xmm6, %xmm0
1622; SSE41-NEXT: pxor %xmm5, %xmm5
1623; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm5
1624; SSE41-NEXT: movapd %xmm4, %xmm0
1625; SSE41-NEXT: xorpd %xmm10, %xmm0
1626; SSE41-NEXT: movapd %xmm0, %xmm1
1627; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
1628; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1629; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1630; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1631; SSE41-NEXT: pand %xmm6, %xmm7
1632; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1633; SSE41-NEXT: por %xmm7, %xmm0
1634; SSE41-NEXT: pxor %xmm1, %xmm1
1635; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
1636; SSE41-NEXT: packssdw %xmm5, %xmm1
1637; SSE41-NEXT: movapd %xmm2, %xmm0
1638; SSE41-NEXT: xorpd %xmm10, %xmm0
1639; SSE41-NEXT: movapd %xmm0, %xmm4
1640; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
1641; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1642; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1643; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1644; SSE41-NEXT: pand %xmm5, %xmm6
1645; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1646; SSE41-NEXT: por %xmm6, %xmm0
1647; SSE41-NEXT: pxor %xmm4, %xmm4
1648; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
1649; SSE41-NEXT: movapd %xmm8, %xmm0
1650; SSE41-NEXT: xorpd %xmm10, %xmm0
1651; SSE41-NEXT: movapd %xmm0, %xmm2
1652; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
1653; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
1654; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1655; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1656; SSE41-NEXT: pand %xmm5, %xmm6
1657; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1658; SSE41-NEXT: por %xmm6, %xmm0
1659; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3
1660; SSE41-NEXT: packssdw %xmm4, %xmm3
1661; SSE41-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001662; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001663; SSE41-NEXT: retq
1664;
1665; AVX1-LABEL: trunc_packus_v8i64_v8i8:
1666; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001667; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [255,255,255,255]
1668; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1669; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
1670; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1671; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1672; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1673; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1674; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1675; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1676; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1677; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1678; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001679; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001680; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001681; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1682; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1683; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1684; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001685; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
1686; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001687; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001688; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1689; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
1690; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
1691; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1692; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001693; AVX1-NEXT: vzeroupper
1694; AVX1-NEXT: retq
1695;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001696; AVX2-LABEL: trunc_packus_v8i64_v8i8:
1697; AVX2: # %bb.0:
1698; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
1699; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
1700; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1701; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1702; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1703; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1704; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1705; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm1
1706; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1707; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
1708; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1709; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1710; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1711; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1712; AVX2-NEXT: vzeroupper
1713; AVX2-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001714;
1715; AVX512-LABEL: trunc_packus_v8i64_v8i8:
1716; AVX512: # %bb.0:
1717; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1718; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1719; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1720; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1721; AVX512-NEXT: vzeroupper
1722; AVX512-NEXT: retq
1723 %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1724 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1725 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1726 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1727 %5 = trunc <8 x i64> %4 to <8 x i8>
1728 ret <8 x i8> %5
1729}
1730
Simon Pilgrim689d8132018-02-15 17:48:34 +00001731define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
1732; SSE2-LABEL: trunc_packus_v8i64_v8i8_store:
1733; SSE2: # %bb.0:
1734; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1735; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1736; SSE2-NEXT: movdqa %xmm3, %xmm4
1737; SSE2-NEXT: pxor %xmm10, %xmm4
1738; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1739; SSE2-NEXT: movdqa %xmm9, %xmm6
1740; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
1741; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1742; SSE2-NEXT: pcmpeqd %xmm9, %xmm4
1743; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1744; SSE2-NEXT: pand %xmm7, %xmm4
1745; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm6[1,1,3,3]
1746; SSE2-NEXT: por %xmm4, %xmm11
1747; SSE2-NEXT: pand %xmm11, %xmm3
1748; SSE2-NEXT: pandn %xmm8, %xmm11
1749; SSE2-NEXT: por %xmm3, %xmm11
1750; SSE2-NEXT: movdqa %xmm2, %xmm3
1751; SSE2-NEXT: pxor %xmm10, %xmm3
1752; SSE2-NEXT: movdqa %xmm9, %xmm4
1753; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1754; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
1755; SSE2-NEXT: pcmpeqd %xmm9, %xmm3
1756; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1757; SSE2-NEXT: pand %xmm7, %xmm5
1758; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1759; SSE2-NEXT: por %xmm5, %xmm3
1760; SSE2-NEXT: pand %xmm3, %xmm2
1761; SSE2-NEXT: pandn %xmm8, %xmm3
1762; SSE2-NEXT: por %xmm2, %xmm3
1763; SSE2-NEXT: movdqa %xmm1, %xmm2
1764; SSE2-NEXT: pxor %xmm10, %xmm2
1765; SSE2-NEXT: movdqa %xmm9, %xmm4
1766; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1767; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1768; SSE2-NEXT: pcmpeqd %xmm9, %xmm2
1769; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1770; SSE2-NEXT: pand %xmm5, %xmm7
1771; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1772; SSE2-NEXT: por %xmm7, %xmm2
1773; SSE2-NEXT: pand %xmm2, %xmm1
1774; SSE2-NEXT: pandn %xmm8, %xmm2
1775; SSE2-NEXT: por %xmm1, %xmm2
1776; SSE2-NEXT: movdqa %xmm0, %xmm1
1777; SSE2-NEXT: pxor %xmm10, %xmm1
1778; SSE2-NEXT: movdqa %xmm9, %xmm4
1779; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1780; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1781; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
1782; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1783; SSE2-NEXT: pand %xmm5, %xmm7
1784; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1785; SSE2-NEXT: por %xmm7, %xmm1
1786; SSE2-NEXT: pand %xmm1, %xmm0
1787; SSE2-NEXT: pandn %xmm8, %xmm1
1788; SSE2-NEXT: por %xmm0, %xmm1
1789; SSE2-NEXT: movdqa %xmm1, %xmm0
1790; SSE2-NEXT: pxor %xmm10, %xmm0
1791; SSE2-NEXT: movdqa %xmm0, %xmm4
1792; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
1793; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1794; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
1795; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1796; SSE2-NEXT: pand %xmm5, %xmm7
1797; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1798; SSE2-NEXT: por %xmm7, %xmm0
1799; SSE2-NEXT: movdqa %xmm2, %xmm4
1800; SSE2-NEXT: pxor %xmm10, %xmm4
1801; SSE2-NEXT: movdqa %xmm4, %xmm5
1802; SSE2-NEXT: pcmpgtd %xmm10, %xmm5
1803; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1804; SSE2-NEXT: pcmpeqd %xmm10, %xmm4
1805; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1806; SSE2-NEXT: pand %xmm7, %xmm4
1807; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[1,1,3,3]
1808; SSE2-NEXT: por %xmm4, %xmm7
1809; SSE2-NEXT: movdqa %xmm3, %xmm4
1810; SSE2-NEXT: pxor %xmm10, %xmm4
1811; SSE2-NEXT: movdqa %xmm4, %xmm5
1812; SSE2-NEXT: pcmpgtd %xmm10, %xmm5
1813; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1814; SSE2-NEXT: pcmpeqd %xmm10, %xmm4
1815; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1816; SSE2-NEXT: pand %xmm9, %xmm6
1817; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1818; SSE2-NEXT: por %xmm6, %xmm4
1819; SSE2-NEXT: movdqa %xmm11, %xmm5
1820; SSE2-NEXT: pxor %xmm10, %xmm5
1821; SSE2-NEXT: movdqa %xmm5, %xmm6
1822; SSE2-NEXT: pcmpgtd %xmm10, %xmm6
1823; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
1824; SSE2-NEXT: pcmpeqd %xmm10, %xmm5
1825; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1826; SSE2-NEXT: pand %xmm9, %xmm5
1827; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1828; SSE2-NEXT: por %xmm5, %xmm6
1829; SSE2-NEXT: pand %xmm8, %xmm6
1830; SSE2-NEXT: pand %xmm11, %xmm6
1831; SSE2-NEXT: pand %xmm8, %xmm4
1832; SSE2-NEXT: pand %xmm3, %xmm4
1833; SSE2-NEXT: packuswb %xmm6, %xmm4
1834; SSE2-NEXT: pand %xmm8, %xmm7
1835; SSE2-NEXT: pand %xmm2, %xmm7
1836; SSE2-NEXT: pand %xmm8, %xmm0
1837; SSE2-NEXT: pand %xmm1, %xmm0
1838; SSE2-NEXT: packuswb %xmm7, %xmm0
1839; SSE2-NEXT: packuswb %xmm4, %xmm0
1840; SSE2-NEXT: packuswb %xmm0, %xmm0
1841; SSE2-NEXT: movq %xmm0, (%rdi)
1842; SSE2-NEXT: retq
1843;
1844; SSSE3-LABEL: trunc_packus_v8i64_v8i8_store:
1845; SSSE3: # %bb.0:
1846; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1847; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1848; SSSE3-NEXT: movdqa %xmm3, %xmm4
1849; SSSE3-NEXT: pxor %xmm10, %xmm4
1850; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1851; SSSE3-NEXT: movdqa %xmm9, %xmm6
1852; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
1853; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1854; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4
1855; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1856; SSSE3-NEXT: pand %xmm7, %xmm4
1857; SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm6[1,1,3,3]
1858; SSSE3-NEXT: por %xmm4, %xmm11
1859; SSSE3-NEXT: pand %xmm11, %xmm3
1860; SSSE3-NEXT: pandn %xmm8, %xmm11
1861; SSSE3-NEXT: por %xmm3, %xmm11
1862; SSSE3-NEXT: movdqa %xmm2, %xmm3
1863; SSSE3-NEXT: pxor %xmm10, %xmm3
1864; SSSE3-NEXT: movdqa %xmm9, %xmm4
1865; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
1866; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
1867; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3
1868; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1869; SSSE3-NEXT: pand %xmm7, %xmm5
1870; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1871; SSSE3-NEXT: por %xmm5, %xmm3
1872; SSSE3-NEXT: pand %xmm3, %xmm2
1873; SSSE3-NEXT: pandn %xmm8, %xmm3
1874; SSSE3-NEXT: por %xmm2, %xmm3
1875; SSSE3-NEXT: movdqa %xmm1, %xmm2
1876; SSSE3-NEXT: pxor %xmm10, %xmm2
1877; SSSE3-NEXT: movdqa %xmm9, %xmm4
1878; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
1879; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1880; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2
1881; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1882; SSSE3-NEXT: pand %xmm5, %xmm7
1883; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1884; SSSE3-NEXT: por %xmm7, %xmm2
1885; SSSE3-NEXT: pand %xmm2, %xmm1
1886; SSSE3-NEXT: pandn %xmm8, %xmm2
1887; SSSE3-NEXT: por %xmm1, %xmm2
1888; SSSE3-NEXT: movdqa %xmm0, %xmm1
1889; SSSE3-NEXT: pxor %xmm10, %xmm1
1890; SSSE3-NEXT: movdqa %xmm9, %xmm4
1891; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
1892; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1893; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
1894; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1895; SSSE3-NEXT: pand %xmm5, %xmm7
1896; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1897; SSSE3-NEXT: por %xmm7, %xmm1
1898; SSSE3-NEXT: pand %xmm1, %xmm0
1899; SSSE3-NEXT: pandn %xmm8, %xmm1
1900; SSSE3-NEXT: por %xmm0, %xmm1
1901; SSSE3-NEXT: movdqa %xmm1, %xmm0
1902; SSSE3-NEXT: pxor %xmm10, %xmm0
1903; SSSE3-NEXT: movdqa %xmm0, %xmm4
1904; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
1905; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1906; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
1907; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1908; SSSE3-NEXT: pand %xmm5, %xmm7
1909; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1910; SSSE3-NEXT: por %xmm7, %xmm0
1911; SSSE3-NEXT: movdqa %xmm2, %xmm4
1912; SSSE3-NEXT: pxor %xmm10, %xmm4
1913; SSSE3-NEXT: movdqa %xmm4, %xmm5
1914; SSSE3-NEXT: pcmpgtd %xmm10, %xmm5
1915; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1916; SSSE3-NEXT: pcmpeqd %xmm10, %xmm4
1917; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1918; SSSE3-NEXT: pand %xmm7, %xmm4
1919; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[1,1,3,3]
1920; SSSE3-NEXT: por %xmm4, %xmm7
1921; SSSE3-NEXT: movdqa %xmm3, %xmm4
1922; SSSE3-NEXT: pxor %xmm10, %xmm4
1923; SSSE3-NEXT: movdqa %xmm4, %xmm5
1924; SSSE3-NEXT: pcmpgtd %xmm10, %xmm5
1925; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1926; SSSE3-NEXT: pcmpeqd %xmm10, %xmm4
1927; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1928; SSSE3-NEXT: pand %xmm9, %xmm6
1929; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1930; SSSE3-NEXT: por %xmm6, %xmm4
1931; SSSE3-NEXT: movdqa %xmm11, %xmm5
1932; SSSE3-NEXT: pxor %xmm10, %xmm5
1933; SSSE3-NEXT: movdqa %xmm5, %xmm6
1934; SSSE3-NEXT: pcmpgtd %xmm10, %xmm6
1935; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
1936; SSSE3-NEXT: pcmpeqd %xmm10, %xmm5
1937; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1938; SSSE3-NEXT: pand %xmm9, %xmm5
1939; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1940; SSSE3-NEXT: por %xmm5, %xmm6
1941; SSSE3-NEXT: pand %xmm8, %xmm6
1942; SSSE3-NEXT: pand %xmm11, %xmm6
1943; SSSE3-NEXT: pand %xmm8, %xmm4
1944; SSSE3-NEXT: pand %xmm3, %xmm4
1945; SSSE3-NEXT: packuswb %xmm6, %xmm4
1946; SSSE3-NEXT: pand %xmm8, %xmm7
1947; SSSE3-NEXT: pand %xmm2, %xmm7
1948; SSSE3-NEXT: pand %xmm8, %xmm0
1949; SSSE3-NEXT: pand %xmm1, %xmm0
1950; SSSE3-NEXT: packuswb %xmm7, %xmm0
1951; SSSE3-NEXT: packuswb %xmm4, %xmm0
1952; SSSE3-NEXT: packuswb %xmm0, %xmm0
1953; SSSE3-NEXT: movq %xmm0, (%rdi)
1954; SSSE3-NEXT: retq
1955;
1956; SSE41-LABEL: trunc_packus_v8i64_v8i8_store:
1957; SSE41: # %bb.0:
1958; SSE41-NEXT: movdqa %xmm0, %xmm9
1959; SSE41-NEXT: movapd {{.*#+}} xmm8 = [255,255]
1960; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1961; SSE41-NEXT: movdqa %xmm3, %xmm0
1962; SSE41-NEXT: pxor %xmm10, %xmm0
1963; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903]
1964; SSE41-NEXT: movdqa %xmm5, %xmm4
1965; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
1966; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
1967; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
1968; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1969; SSE41-NEXT: pand %xmm7, %xmm6
1970; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1971; SSE41-NEXT: por %xmm6, %xmm0
1972; SSE41-NEXT: movapd %xmm8, %xmm11
1973; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm11
1974; SSE41-NEXT: movdqa %xmm2, %xmm0
1975; SSE41-NEXT: pxor %xmm10, %xmm0
1976; SSE41-NEXT: movdqa %xmm5, %xmm3
1977; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1978; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
1979; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
1980; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1981; SSE41-NEXT: pand %xmm4, %xmm6
1982; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1983; SSE41-NEXT: por %xmm6, %xmm0
1984; SSE41-NEXT: movapd %xmm8, %xmm3
1985; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1986; SSE41-NEXT: movdqa %xmm1, %xmm0
1987; SSE41-NEXT: pxor %xmm10, %xmm0
1988; SSE41-NEXT: movdqa %xmm5, %xmm2
1989; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
1990; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1991; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
1992; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1993; SSE41-NEXT: pand %xmm4, %xmm6
1994; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1995; SSE41-NEXT: por %xmm6, %xmm0
1996; SSE41-NEXT: movapd %xmm8, %xmm4
1997; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
1998; SSE41-NEXT: movdqa %xmm9, %xmm0
1999; SSE41-NEXT: pxor %xmm10, %xmm0
2000; SSE41-NEXT: movdqa %xmm5, %xmm1
2001; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
2002; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2003; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
2004; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2005; SSE41-NEXT: pand %xmm2, %xmm5
2006; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2007; SSE41-NEXT: por %xmm5, %xmm0
2008; SSE41-NEXT: movapd %xmm8, %xmm5
2009; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm5
2010; SSE41-NEXT: movapd %xmm5, %xmm0
2011; SSE41-NEXT: xorpd %xmm10, %xmm0
2012; SSE41-NEXT: movapd %xmm0, %xmm1
2013; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
2014; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2015; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2016; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2017; SSE41-NEXT: pand %xmm2, %xmm6
2018; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2019; SSE41-NEXT: por %xmm6, %xmm0
2020; SSE41-NEXT: pxor %xmm2, %xmm2
2021; SSE41-NEXT: pxor %xmm1, %xmm1
2022; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
2023; SSE41-NEXT: movapd %xmm4, %xmm0
2024; SSE41-NEXT: xorpd %xmm10, %xmm0
2025; SSE41-NEXT: movapd %xmm0, %xmm5
2026; SSE41-NEXT: pcmpgtd %xmm10, %xmm5
2027; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2028; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2029; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2030; SSE41-NEXT: pand %xmm6, %xmm7
2031; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
2032; SSE41-NEXT: por %xmm7, %xmm0
2033; SSE41-NEXT: pxor %xmm5, %xmm5
2034; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm5
2035; SSE41-NEXT: movapd %xmm3, %xmm0
2036; SSE41-NEXT: xorpd %xmm10, %xmm0
2037; SSE41-NEXT: movapd %xmm0, %xmm4
2038; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
2039; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2040; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2041; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2042; SSE41-NEXT: pand %xmm6, %xmm7
2043; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2044; SSE41-NEXT: por %xmm7, %xmm0
2045; SSE41-NEXT: pxor %xmm4, %xmm4
2046; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
2047; SSE41-NEXT: movapd %xmm11, %xmm0
2048; SSE41-NEXT: xorpd %xmm10, %xmm0
2049; SSE41-NEXT: movapd %xmm0, %xmm3
2050; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2051; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
2052; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2053; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2054; SSE41-NEXT: pand %xmm6, %xmm7
2055; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2056; SSE41-NEXT: por %xmm7, %xmm0
2057; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm2
2058; SSE41-NEXT: andpd %xmm8, %xmm2
2059; SSE41-NEXT: andpd %xmm8, %xmm4
2060; SSE41-NEXT: packuswb %xmm2, %xmm4
2061; SSE41-NEXT: andpd %xmm8, %xmm5
2062; SSE41-NEXT: andpd %xmm8, %xmm1
2063; SSE41-NEXT: packuswb %xmm5, %xmm1
2064; SSE41-NEXT: packuswb %xmm4, %xmm1
2065; SSE41-NEXT: packuswb %xmm1, %xmm1
2066; SSE41-NEXT: movq %xmm1, (%rdi)
2067; SSE41-NEXT: retq
2068;
2069; AVX1-LABEL: trunc_packus_v8i64_v8i8_store:
2070; AVX1: # %bb.0:
2071; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [255,255,255,255]
2072; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2073; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
2074; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
2075; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
2076; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
2077; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2078; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2079; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
2080; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5
2081; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
2082; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2083; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2084; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm8
2085; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
2086; AVX1-NEXT: vpcmpgtq %xmm2, %xmm5, %xmm6
2087; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm7
2088; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2089; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2090; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
2091; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2092; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
2093; AVX1-NEXT: vpand %xmm1, %xmm7, %xmm1
2094; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
2095; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
2096; AVX1-NEXT: vpand %xmm2, %xmm6, %xmm2
2097; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
2098; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
2099; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2100; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2101; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2102; AVX1-NEXT: vmovq %xmm0, (%rdi)
2103; AVX1-NEXT: vzeroupper
2104; AVX1-NEXT: retq
2105;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002106; AVX2-LABEL: trunc_packus_v8i64_v8i8_store:
2107; AVX2: # %bb.0:
2108; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
2109; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
2110; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2111; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
2112; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2113; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2114; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3
2115; AVX2-NEXT: vpand %ymm0, %ymm3, %ymm0
2116; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
2117; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
2118; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2119; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2120; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2121; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2122; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2123; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2124; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2125; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2126; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2127; AVX2-NEXT: vmovq %xmm0, (%rdi)
2128; AVX2-NEXT: vzeroupper
2129; AVX2-NEXT: retq
Simon Pilgrim689d8132018-02-15 17:48:34 +00002130;
2131; AVX512-LABEL: trunc_packus_v8i64_v8i8_store:
2132; AVX512: # %bb.0:
2133; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
2134; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
2135; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
2136; AVX512-NEXT: vpmovqb %zmm0, (%rdi)
2137; AVX512-NEXT: vzeroupper
2138; AVX512-NEXT: retq
2139 %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2140 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2141 %3 = icmp sgt <8 x i64> %2, zeroinitializer
2142 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
2143 %5 = trunc <8 x i64> %4 to <8 x i8>
2144 store <8 x i8> %5, <8 x i8> *%p1
2145 ret void
2146}
2147
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002148define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) {
2149; SSE2-LABEL: trunc_packus_v16i64_v16i8:
2150; SSE2: # %bb.0:
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002151; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [255,255]
2152; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
2153; SSE2-NEXT: movdqa %xmm6, %xmm9
2154; SSE2-NEXT: pxor %xmm8, %xmm9
Simon Pilgrim0be55672018-02-11 10:52:37 +00002155; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002156; SSE2-NEXT: movdqa %xmm11, %xmm12
2157; SSE2-NEXT: pcmpgtd %xmm9, %xmm12
2158; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2159; SSE2-NEXT: pcmpeqd %xmm11, %xmm9
2160; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm9[1,1,3,3]
2161; SSE2-NEXT: pand %xmm13, %xmm14
2162; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
2163; SSE2-NEXT: por %xmm14, %xmm9
2164; SSE2-NEXT: pand %xmm9, %xmm6
2165; SSE2-NEXT: pandn %xmm10, %xmm9
2166; SSE2-NEXT: por %xmm6, %xmm9
2167; SSE2-NEXT: movdqa %xmm7, %xmm6
2168; SSE2-NEXT: pxor %xmm8, %xmm6
2169; SSE2-NEXT: movdqa %xmm11, %xmm12
2170; SSE2-NEXT: pcmpgtd %xmm6, %xmm12
2171; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002172; SSE2-NEXT: pcmpeqd %xmm11, %xmm6
2173; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002174; SSE2-NEXT: pand %xmm13, %xmm6
2175; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
2176; SSE2-NEXT: por %xmm6, %xmm12
2177; SSE2-NEXT: pand %xmm12, %xmm7
2178; SSE2-NEXT: pandn %xmm10, %xmm12
2179; SSE2-NEXT: por %xmm7, %xmm12
2180; SSE2-NEXT: movdqa %xmm4, %xmm6
2181; SSE2-NEXT: pxor %xmm8, %xmm6
2182; SSE2-NEXT: movdqa %xmm11, %xmm7
2183; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
2184; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2185; SSE2-NEXT: pcmpeqd %xmm11, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002186; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002187; SSE2-NEXT: pand %xmm13, %xmm6
2188; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3]
2189; SSE2-NEXT: por %xmm6, %xmm13
2190; SSE2-NEXT: pand %xmm13, %xmm4
2191; SSE2-NEXT: pandn %xmm10, %xmm13
2192; SSE2-NEXT: por %xmm4, %xmm13
2193; SSE2-NEXT: movdqa %xmm5, %xmm4
2194; SSE2-NEXT: pxor %xmm8, %xmm4
2195; SSE2-NEXT: movdqa %xmm11, %xmm6
2196; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
2197; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002198; SSE2-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002199; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002200; SSE2-NEXT: pand %xmm7, %xmm4
2201; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm6[1,1,3,3]
2202; SSE2-NEXT: por %xmm4, %xmm14
2203; SSE2-NEXT: pand %xmm14, %xmm5
2204; SSE2-NEXT: pandn %xmm10, %xmm14
2205; SSE2-NEXT: por %xmm5, %xmm14
2206; SSE2-NEXT: movdqa %xmm2, %xmm4
2207; SSE2-NEXT: pxor %xmm8, %xmm4
2208; SSE2-NEXT: movdqa %xmm11, %xmm5
2209; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
2210; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
2211; SSE2-NEXT: pcmpeqd %xmm11, %xmm4
2212; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2213; SSE2-NEXT: pand %xmm7, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00002214; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2215; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002216; SSE2-NEXT: pand %xmm5, %xmm2
2217; SSE2-NEXT: pandn %xmm10, %xmm5
2218; SSE2-NEXT: por %xmm2, %xmm5
2219; SSE2-NEXT: movdqa %xmm3, %xmm2
2220; SSE2-NEXT: pxor %xmm8, %xmm2
2221; SSE2-NEXT: movdqa %xmm11, %xmm4
2222; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
2223; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
2224; SSE2-NEXT: pcmpeqd %xmm11, %xmm2
2225; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
2226; SSE2-NEXT: pand %xmm7, %xmm6
2227; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
2228; SSE2-NEXT: por %xmm6, %xmm2
2229; SSE2-NEXT: pand %xmm2, %xmm3
2230; SSE2-NEXT: pandn %xmm10, %xmm2
2231; SSE2-NEXT: por %xmm3, %xmm2
2232; SSE2-NEXT: movdqa %xmm0, %xmm3
2233; SSE2-NEXT: pxor %xmm8, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002234; SSE2-NEXT: movdqa %xmm11, %xmm4
2235; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002236; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002237; SSE2-NEXT: pcmpeqd %xmm11, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002238; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
2239; SSE2-NEXT: pand %xmm6, %xmm7
2240; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2241; SSE2-NEXT: por %xmm7, %xmm3
2242; SSE2-NEXT: pand %xmm3, %xmm0
2243; SSE2-NEXT: pandn %xmm10, %xmm3
2244; SSE2-NEXT: por %xmm0, %xmm3
2245; SSE2-NEXT: movdqa %xmm1, %xmm0
2246; SSE2-NEXT: pxor %xmm8, %xmm0
2247; SSE2-NEXT: movdqa %xmm11, %xmm4
2248; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2249; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2250; SSE2-NEXT: pcmpeqd %xmm11, %xmm0
2251; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2252; SSE2-NEXT: pand %xmm6, %xmm0
2253; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2254; SSE2-NEXT: por %xmm0, %xmm4
2255; SSE2-NEXT: pand %xmm4, %xmm1
2256; SSE2-NEXT: pandn %xmm10, %xmm4
2257; SSE2-NEXT: por %xmm1, %xmm4
2258; SSE2-NEXT: movdqa %xmm4, %xmm0
2259; SSE2-NEXT: pxor %xmm8, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002260; SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002261; SSE2-NEXT: pcmpgtd %xmm8, %xmm1
2262; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
2263; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
2264; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2265; SSE2-NEXT: pand %xmm6, %xmm0
2266; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2267; SSE2-NEXT: por %xmm0, %xmm1
2268; SSE2-NEXT: pand %xmm4, %xmm1
2269; SSE2-NEXT: movdqa %xmm3, %xmm0
2270; SSE2-NEXT: pxor %xmm8, %xmm0
2271; SSE2-NEXT: movdqa %xmm0, %xmm4
2272; SSE2-NEXT: pcmpgtd %xmm8, %xmm4
2273; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2274; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
2275; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2276; SSE2-NEXT: pand %xmm6, %xmm7
2277; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2278; SSE2-NEXT: por %xmm7, %xmm0
2279; SSE2-NEXT: pand %xmm3, %xmm0
2280; SSE2-NEXT: packuswb %xmm1, %xmm0
2281; SSE2-NEXT: movdqa %xmm2, %xmm1
2282; SSE2-NEXT: pxor %xmm8, %xmm1
2283; SSE2-NEXT: movdqa %xmm1, %xmm3
2284; SSE2-NEXT: pcmpgtd %xmm8, %xmm3
2285; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2286; SSE2-NEXT: pcmpeqd %xmm8, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002287; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2288; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002289; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002290; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002291; SSE2-NEXT: pand %xmm2, %xmm3
2292; SSE2-NEXT: movdqa %xmm5, %xmm1
2293; SSE2-NEXT: pxor %xmm8, %xmm1
2294; SSE2-NEXT: movdqa %xmm1, %xmm2
2295; SSE2-NEXT: pcmpgtd %xmm8, %xmm2
2296; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2297; SSE2-NEXT: pcmpeqd %xmm8, %xmm1
2298; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2299; SSE2-NEXT: pand %xmm4, %xmm1
2300; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2301; SSE2-NEXT: por %xmm1, %xmm2
2302; SSE2-NEXT: pand %xmm5, %xmm2
2303; SSE2-NEXT: packuswb %xmm3, %xmm2
2304; SSE2-NEXT: packuswb %xmm2, %xmm0
2305; SSE2-NEXT: movdqa %xmm14, %xmm1
2306; SSE2-NEXT: pxor %xmm8, %xmm1
2307; SSE2-NEXT: movdqa %xmm1, %xmm2
2308; SSE2-NEXT: pcmpgtd %xmm8, %xmm2
2309; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2310; SSE2-NEXT: pcmpeqd %xmm8, %xmm1
2311; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2312; SSE2-NEXT: pand %xmm3, %xmm1
2313; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2314; SSE2-NEXT: por %xmm1, %xmm2
2315; SSE2-NEXT: pand %xmm14, %xmm2
2316; SSE2-NEXT: movdqa %xmm13, %xmm1
2317; SSE2-NEXT: pxor %xmm8, %xmm1
2318; SSE2-NEXT: movdqa %xmm1, %xmm3
2319; SSE2-NEXT: pcmpgtd %xmm8, %xmm3
2320; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2321; SSE2-NEXT: pcmpeqd %xmm8, %xmm1
2322; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
2323; SSE2-NEXT: pand %xmm4, %xmm5
2324; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
2325; SSE2-NEXT: por %xmm5, %xmm1
2326; SSE2-NEXT: pand %xmm13, %xmm1
2327; SSE2-NEXT: packuswb %xmm2, %xmm1
2328; SSE2-NEXT: movdqa %xmm12, %xmm2
2329; SSE2-NEXT: pxor %xmm8, %xmm2
2330; SSE2-NEXT: movdqa %xmm2, %xmm3
2331; SSE2-NEXT: pcmpgtd %xmm8, %xmm3
2332; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2333; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
2334; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2335; SSE2-NEXT: pand %xmm4, %xmm2
2336; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2337; SSE2-NEXT: por %xmm2, %xmm3
2338; SSE2-NEXT: pand %xmm12, %xmm3
2339; SSE2-NEXT: movdqa %xmm9, %xmm2
2340; SSE2-NEXT: pxor %xmm8, %xmm2
2341; SSE2-NEXT: movdqa %xmm2, %xmm4
2342; SSE2-NEXT: pcmpgtd %xmm8, %xmm4
2343; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2344; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
2345; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2346; SSE2-NEXT: pand %xmm5, %xmm2
2347; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2348; SSE2-NEXT: por %xmm2, %xmm4
2349; SSE2-NEXT: pand %xmm9, %xmm4
2350; SSE2-NEXT: packuswb %xmm3, %xmm4
2351; SSE2-NEXT: packuswb %xmm4, %xmm1
2352; SSE2-NEXT: packuswb %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002353; SSE2-NEXT: retq
2354;
2355; SSSE3-LABEL: trunc_packus_v16i64_v16i8:
2356; SSSE3: # %bb.0:
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002357; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [255,255]
2358; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
2359; SSSE3-NEXT: movdqa %xmm6, %xmm9
2360; SSSE3-NEXT: pxor %xmm8, %xmm9
Simon Pilgrim0be55672018-02-11 10:52:37 +00002361; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002362; SSSE3-NEXT: movdqa %xmm11, %xmm12
2363; SSSE3-NEXT: pcmpgtd %xmm9, %xmm12
2364; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
2365; SSSE3-NEXT: pcmpeqd %xmm11, %xmm9
2366; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm9[1,1,3,3]
2367; SSSE3-NEXT: pand %xmm13, %xmm14
2368; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
2369; SSSE3-NEXT: por %xmm14, %xmm9
2370; SSSE3-NEXT: pand %xmm9, %xmm6
2371; SSSE3-NEXT: pandn %xmm10, %xmm9
2372; SSSE3-NEXT: por %xmm6, %xmm9
2373; SSSE3-NEXT: movdqa %xmm7, %xmm6
2374; SSSE3-NEXT: pxor %xmm8, %xmm6
2375; SSSE3-NEXT: movdqa %xmm11, %xmm12
2376; SSSE3-NEXT: pcmpgtd %xmm6, %xmm12
2377; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002378; SSSE3-NEXT: pcmpeqd %xmm11, %xmm6
2379; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002380; SSSE3-NEXT: pand %xmm13, %xmm6
2381; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
2382; SSSE3-NEXT: por %xmm6, %xmm12
2383; SSSE3-NEXT: pand %xmm12, %xmm7
2384; SSSE3-NEXT: pandn %xmm10, %xmm12
2385; SSSE3-NEXT: por %xmm7, %xmm12
2386; SSSE3-NEXT: movdqa %xmm4, %xmm6
2387; SSSE3-NEXT: pxor %xmm8, %xmm6
2388; SSSE3-NEXT: movdqa %xmm11, %xmm7
2389; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
2390; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2391; SSSE3-NEXT: pcmpeqd %xmm11, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002392; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002393; SSSE3-NEXT: pand %xmm13, %xmm6
2394; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3]
2395; SSSE3-NEXT: por %xmm6, %xmm13
2396; SSSE3-NEXT: pand %xmm13, %xmm4
2397; SSSE3-NEXT: pandn %xmm10, %xmm13
2398; SSSE3-NEXT: por %xmm4, %xmm13
2399; SSSE3-NEXT: movdqa %xmm5, %xmm4
2400; SSSE3-NEXT: pxor %xmm8, %xmm4
2401; SSSE3-NEXT: movdqa %xmm11, %xmm6
2402; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
2403; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002404; SSSE3-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002405; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002406; SSSE3-NEXT: pand %xmm7, %xmm4
2407; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm6[1,1,3,3]
2408; SSSE3-NEXT: por %xmm4, %xmm14
2409; SSSE3-NEXT: pand %xmm14, %xmm5
2410; SSSE3-NEXT: pandn %xmm10, %xmm14
2411; SSSE3-NEXT: por %xmm5, %xmm14
2412; SSSE3-NEXT: movdqa %xmm2, %xmm4
2413; SSSE3-NEXT: pxor %xmm8, %xmm4
2414; SSSE3-NEXT: movdqa %xmm11, %xmm5
2415; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
2416; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
2417; SSSE3-NEXT: pcmpeqd %xmm11, %xmm4
2418; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2419; SSSE3-NEXT: pand %xmm7, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00002420; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2421; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002422; SSSE3-NEXT: pand %xmm5, %xmm2
2423; SSSE3-NEXT: pandn %xmm10, %xmm5
2424; SSSE3-NEXT: por %xmm2, %xmm5
2425; SSSE3-NEXT: movdqa %xmm3, %xmm2
2426; SSSE3-NEXT: pxor %xmm8, %xmm2
2427; SSSE3-NEXT: movdqa %xmm11, %xmm4
2428; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
2429; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
2430; SSSE3-NEXT: pcmpeqd %xmm11, %xmm2
2431; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
2432; SSSE3-NEXT: pand %xmm7, %xmm6
2433; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
2434; SSSE3-NEXT: por %xmm6, %xmm2
2435; SSSE3-NEXT: pand %xmm2, %xmm3
2436; SSSE3-NEXT: pandn %xmm10, %xmm2
2437; SSSE3-NEXT: por %xmm3, %xmm2
2438; SSSE3-NEXT: movdqa %xmm0, %xmm3
2439; SSSE3-NEXT: pxor %xmm8, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002440; SSSE3-NEXT: movdqa %xmm11, %xmm4
2441; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002442; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002443; SSSE3-NEXT: pcmpeqd %xmm11, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002444; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
2445; SSSE3-NEXT: pand %xmm6, %xmm7
2446; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
2447; SSSE3-NEXT: por %xmm7, %xmm3
2448; SSSE3-NEXT: pand %xmm3, %xmm0
2449; SSSE3-NEXT: pandn %xmm10, %xmm3
2450; SSSE3-NEXT: por %xmm0, %xmm3
2451; SSSE3-NEXT: movdqa %xmm1, %xmm0
2452; SSSE3-NEXT: pxor %xmm8, %xmm0
2453; SSSE3-NEXT: movdqa %xmm11, %xmm4
2454; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
2455; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2456; SSSE3-NEXT: pcmpeqd %xmm11, %xmm0
2457; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2458; SSSE3-NEXT: pand %xmm6, %xmm0
2459; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2460; SSSE3-NEXT: por %xmm0, %xmm4
2461; SSSE3-NEXT: pand %xmm4, %xmm1
2462; SSSE3-NEXT: pandn %xmm10, %xmm4
2463; SSSE3-NEXT: por %xmm1, %xmm4
2464; SSSE3-NEXT: movdqa %xmm4, %xmm0
2465; SSSE3-NEXT: pxor %xmm8, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002466; SSSE3-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002467; SSSE3-NEXT: pcmpgtd %xmm8, %xmm1
2468; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
2469; SSSE3-NEXT: pcmpeqd %xmm8, %xmm0
2470; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2471; SSSE3-NEXT: pand %xmm6, %xmm0
2472; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2473; SSSE3-NEXT: por %xmm0, %xmm1
2474; SSSE3-NEXT: pand %xmm4, %xmm1
2475; SSSE3-NEXT: movdqa %xmm3, %xmm0
2476; SSSE3-NEXT: pxor %xmm8, %xmm0
2477; SSSE3-NEXT: movdqa %xmm0, %xmm4
2478; SSSE3-NEXT: pcmpgtd %xmm8, %xmm4
2479; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2480; SSSE3-NEXT: pcmpeqd %xmm8, %xmm0
2481; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2482; SSSE3-NEXT: pand %xmm6, %xmm7
2483; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2484; SSSE3-NEXT: por %xmm7, %xmm0
2485; SSSE3-NEXT: pand %xmm3, %xmm0
2486; SSSE3-NEXT: packuswb %xmm1, %xmm0
2487; SSSE3-NEXT: movdqa %xmm2, %xmm1
2488; SSSE3-NEXT: pxor %xmm8, %xmm1
2489; SSSE3-NEXT: movdqa %xmm1, %xmm3
2490; SSSE3-NEXT: pcmpgtd %xmm8, %xmm3
2491; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2492; SSSE3-NEXT: pcmpeqd %xmm8, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002493; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2494; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002495; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002496; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002497; SSSE3-NEXT: pand %xmm2, %xmm3
2498; SSSE3-NEXT: movdqa %xmm5, %xmm1
2499; SSSE3-NEXT: pxor %xmm8, %xmm1
2500; SSSE3-NEXT: movdqa %xmm1, %xmm2
2501; SSSE3-NEXT: pcmpgtd %xmm8, %xmm2
2502; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2503; SSSE3-NEXT: pcmpeqd %xmm8, %xmm1
2504; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2505; SSSE3-NEXT: pand %xmm4, %xmm1
2506; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2507; SSSE3-NEXT: por %xmm1, %xmm2
2508; SSSE3-NEXT: pand %xmm5, %xmm2
2509; SSSE3-NEXT: packuswb %xmm3, %xmm2
2510; SSSE3-NEXT: packuswb %xmm2, %xmm0
2511; SSSE3-NEXT: movdqa %xmm14, %xmm1
2512; SSSE3-NEXT: pxor %xmm8, %xmm1
2513; SSSE3-NEXT: movdqa %xmm1, %xmm2
2514; SSSE3-NEXT: pcmpgtd %xmm8, %xmm2
2515; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2516; SSSE3-NEXT: pcmpeqd %xmm8, %xmm1
2517; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2518; SSSE3-NEXT: pand %xmm3, %xmm1
2519; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2520; SSSE3-NEXT: por %xmm1, %xmm2
2521; SSSE3-NEXT: pand %xmm14, %xmm2
2522; SSSE3-NEXT: movdqa %xmm13, %xmm1
2523; SSSE3-NEXT: pxor %xmm8, %xmm1
2524; SSSE3-NEXT: movdqa %xmm1, %xmm3
2525; SSSE3-NEXT: pcmpgtd %xmm8, %xmm3
2526; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2527; SSSE3-NEXT: pcmpeqd %xmm8, %xmm1
2528; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
2529; SSSE3-NEXT: pand %xmm4, %xmm5
2530; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
2531; SSSE3-NEXT: por %xmm5, %xmm1
2532; SSSE3-NEXT: pand %xmm13, %xmm1
2533; SSSE3-NEXT: packuswb %xmm2, %xmm1
2534; SSSE3-NEXT: movdqa %xmm12, %xmm2
2535; SSSE3-NEXT: pxor %xmm8, %xmm2
2536; SSSE3-NEXT: movdqa %xmm2, %xmm3
2537; SSSE3-NEXT: pcmpgtd %xmm8, %xmm3
2538; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2539; SSSE3-NEXT: pcmpeqd %xmm8, %xmm2
2540; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2541; SSSE3-NEXT: pand %xmm4, %xmm2
2542; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2543; SSSE3-NEXT: por %xmm2, %xmm3
2544; SSSE3-NEXT: pand %xmm12, %xmm3
2545; SSSE3-NEXT: movdqa %xmm9, %xmm2
2546; SSSE3-NEXT: pxor %xmm8, %xmm2
2547; SSSE3-NEXT: movdqa %xmm2, %xmm4
2548; SSSE3-NEXT: pcmpgtd %xmm8, %xmm4
2549; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2550; SSSE3-NEXT: pcmpeqd %xmm8, %xmm2
2551; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2552; SSSE3-NEXT: pand %xmm5, %xmm2
2553; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2554; SSSE3-NEXT: por %xmm2, %xmm4
2555; SSSE3-NEXT: pand %xmm9, %xmm4
2556; SSSE3-NEXT: packuswb %xmm3, %xmm4
2557; SSSE3-NEXT: packuswb %xmm4, %xmm1
2558; SSSE3-NEXT: packuswb %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002559; SSSE3-NEXT: retq
2560;
2561; SSE41-LABEL: trunc_packus_v16i64_v16i8:
2562; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002563; SSE41-NEXT: movdqa %xmm0, %xmm8
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002564; SSE41-NEXT: movapd {{.*#+}} xmm11 = [255,255]
2565; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002566; SSE41-NEXT: movdqa %xmm6, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002567; SSE41-NEXT: pxor %xmm9, %xmm0
2568; SSE41-NEXT: movdqa {{.*#+}} xmm12 = [2147483903,2147483903]
2569; SSE41-NEXT: movdqa %xmm12, %xmm10
2570; SSE41-NEXT: pcmpgtd %xmm0, %xmm10
2571; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm10[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002572; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2573; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2574; SSE41-NEXT: pand %xmm13, %xmm14
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002575; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002576; SSE41-NEXT: por %xmm14, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002577; SSE41-NEXT: movapd %xmm11, %xmm10
2578; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm10
2579; SSE41-NEXT: movdqa %xmm7, %xmm0
2580; SSE41-NEXT: pxor %xmm9, %xmm0
2581; SSE41-NEXT: movdqa %xmm12, %xmm6
2582; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
2583; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm6[0,0,2,2]
2584; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2585; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2586; SSE41-NEXT: pand %xmm13, %xmm14
2587; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2588; SSE41-NEXT: por %xmm14, %xmm0
2589; SSE41-NEXT: movapd %xmm11, %xmm13
2590; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm13
2591; SSE41-NEXT: movdqa %xmm4, %xmm0
2592; SSE41-NEXT: pxor %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002593; SSE41-NEXT: movdqa %xmm12, %xmm6
2594; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
2595; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[0,0,2,2]
2596; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2597; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2598; SSE41-NEXT: pand %xmm14, %xmm7
2599; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2600; SSE41-NEXT: por %xmm7, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002601; SSE41-NEXT: movapd %xmm11, %xmm14
2602; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm14
2603; SSE41-NEXT: movdqa %xmm5, %xmm0
2604; SSE41-NEXT: pxor %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002605; SSE41-NEXT: movdqa %xmm12, %xmm4
2606; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
2607; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2608; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2609; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2610; SSE41-NEXT: pand %xmm6, %xmm7
2611; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2612; SSE41-NEXT: por %xmm7, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002613; SSE41-NEXT: movapd %xmm11, %xmm15
2614; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm15
Simon Pilgrim0be55672018-02-11 10:52:37 +00002615; SSE41-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002616; SSE41-NEXT: pxor %xmm9, %xmm0
2617; SSE41-NEXT: movdqa %xmm12, %xmm5
2618; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
2619; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002620; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2621; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2622; SSE41-NEXT: pand %xmm6, %xmm7
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002623; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002624; SSE41-NEXT: por %xmm7, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002625; SSE41-NEXT: movapd %xmm11, %xmm5
2626; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5
2627; SSE41-NEXT: movdqa %xmm3, %xmm0
2628; SSE41-NEXT: pxor %xmm9, %xmm0
2629; SSE41-NEXT: movdqa %xmm12, %xmm2
2630; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2631; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
2632; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2633; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2634; SSE41-NEXT: pand %xmm6, %xmm7
2635; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2636; SSE41-NEXT: por %xmm7, %xmm0
2637; SSE41-NEXT: movapd %xmm11, %xmm6
2638; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm6
2639; SSE41-NEXT: movdqa %xmm8, %xmm0
2640; SSE41-NEXT: pxor %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002641; SSE41-NEXT: movdqa %xmm12, %xmm2
2642; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2643; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2644; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2645; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2646; SSE41-NEXT: pand %xmm3, %xmm7
2647; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2648; SSE41-NEXT: por %xmm7, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002649; SSE41-NEXT: movapd %xmm11, %xmm3
2650; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3
2651; SSE41-NEXT: movdqa %xmm1, %xmm0
2652; SSE41-NEXT: pxor %xmm9, %xmm0
2653; SSE41-NEXT: movdqa %xmm12, %xmm2
2654; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2655; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm2[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002656; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002657; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2658; SSE41-NEXT: pand %xmm7, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00002659; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002660; SSE41-NEXT: por %xmm4, %xmm0
2661; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm11
2662; SSE41-NEXT: xorpd %xmm8, %xmm8
2663; SSE41-NEXT: movapd %xmm11, %xmm0
2664; SSE41-NEXT: xorpd %xmm9, %xmm0
2665; SSE41-NEXT: movapd %xmm0, %xmm1
2666; SSE41-NEXT: pcmpgtd %xmm9, %xmm1
2667; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2668; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002669; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002670; SSE41-NEXT: pand %xmm4, %xmm7
2671; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002672; SSE41-NEXT: por %xmm7, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002673; SSE41-NEXT: pxor %xmm4, %xmm4
2674; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm4
2675; SSE41-NEXT: movapd %xmm3, %xmm0
2676; SSE41-NEXT: xorpd %xmm9, %xmm0
2677; SSE41-NEXT: movapd %xmm0, %xmm1
2678; SSE41-NEXT: pcmpgtd %xmm9, %xmm1
2679; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[0,0,2,2]
2680; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
2681; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2682; SSE41-NEXT: pand %xmm7, %xmm2
2683; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2684; SSE41-NEXT: por %xmm2, %xmm0
2685; SSE41-NEXT: pxor %xmm1, %xmm1
2686; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
2687; SSE41-NEXT: packssdw %xmm4, %xmm1
2688; SSE41-NEXT: movapd %xmm6, %xmm0
2689; SSE41-NEXT: xorpd %xmm9, %xmm0
2690; SSE41-NEXT: movapd %xmm0, %xmm2
2691; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
2692; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2693; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
2694; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2695; SSE41-NEXT: pand %xmm3, %xmm4
2696; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2697; SSE41-NEXT: por %xmm4, %xmm0
2698; SSE41-NEXT: pxor %xmm2, %xmm2
2699; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm2
2700; SSE41-NEXT: movapd %xmm5, %xmm0
2701; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002702; SSE41-NEXT: movapd %xmm0, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002703; SSE41-NEXT: pcmpgtd %xmm9, %xmm3
2704; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2705; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002706; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002707; SSE41-NEXT: pand %xmm4, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002708; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2709; SSE41-NEXT: por %xmm6, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002710; SSE41-NEXT: pxor %xmm3, %xmm3
2711; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm3
2712; SSE41-NEXT: packssdw %xmm2, %xmm3
2713; SSE41-NEXT: packusdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002714; SSE41-NEXT: movapd %xmm15, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002715; SSE41-NEXT: xorpd %xmm9, %xmm0
2716; SSE41-NEXT: movapd %xmm0, %xmm2
2717; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
2718; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2719; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
2720; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2721; SSE41-NEXT: pand %xmm3, %xmm4
2722; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2723; SSE41-NEXT: por %xmm4, %xmm0
2724; SSE41-NEXT: pxor %xmm2, %xmm2
2725; SSE41-NEXT: blendvpd %xmm0, %xmm15, %xmm2
2726; SSE41-NEXT: movapd %xmm14, %xmm0
2727; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002728; SSE41-NEXT: movapd %xmm0, %xmm3
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002729; SSE41-NEXT: pcmpgtd %xmm9, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002730; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002731; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002732; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2733; SSE41-NEXT: pand %xmm4, %xmm5
2734; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2735; SSE41-NEXT: por %xmm5, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002736; SSE41-NEXT: pxor %xmm3, %xmm3
2737; SSE41-NEXT: blendvpd %xmm0, %xmm14, %xmm3
2738; SSE41-NEXT: packssdw %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002739; SSE41-NEXT: movapd %xmm13, %xmm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002740; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002741; SSE41-NEXT: movapd %xmm0, %xmm2
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002742; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
2743; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2744; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
2745; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2746; SSE41-NEXT: pand %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00002747; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002748; SSE41-NEXT: por %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002749; SSE41-NEXT: pxor %xmm2, %xmm2
2750; SSE41-NEXT: blendvpd %xmm0, %xmm13, %xmm2
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002751; SSE41-NEXT: movapd %xmm10, %xmm0
2752; SSE41-NEXT: xorpd %xmm9, %xmm0
2753; SSE41-NEXT: movapd %xmm0, %xmm4
2754; SSE41-NEXT: pcmpgtd %xmm9, %xmm4
2755; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2756; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
2757; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2758; SSE41-NEXT: pand %xmm5, %xmm6
2759; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2760; SSE41-NEXT: por %xmm6, %xmm0
2761; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm8
2762; SSE41-NEXT: packssdw %xmm2, %xmm8
2763; SSE41-NEXT: packusdw %xmm8, %xmm3
2764; SSE41-NEXT: packuswb %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002765; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002766; SSE41-NEXT: retq
2767;
2768; AVX1-LABEL: trunc_packus_v16i64_v16i8:
2769; AVX1: # %bb.0:
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002770; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [255,255,255,255]
2771; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
2772; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255]
2773; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
2774; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm7
2775; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm7, %ymm5
2776; AVX1-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm15
2777; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
2778; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
2779; AVX1-NEXT: vpcmpgtq %xmm2, %xmm6, %xmm7
2780; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm7, %ymm5
2781; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
2782; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
2783; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
2784; AVX1-NEXT: vpcmpgtq %xmm1, %xmm6, %xmm7
2785; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm7, %ymm5
2786; AVX1-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2787; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
2788; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
2789; AVX1-NEXT: vpcmpgtq %xmm0, %xmm6, %xmm6
2790; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
2791; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2792; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2793; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm8
2794; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm14
2795; AVX1-NEXT: vpcmpgtq %xmm4, %xmm14, %xmm9
2796; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm10
2797; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
2798; AVX1-NEXT: vpcmpgtq %xmm4, %xmm7, %xmm11
2799; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm12
2800; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
2801; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm13
2802; AVX1-NEXT: vpcmpgtq %xmm4, %xmm15, %xmm6
2803; AVX1-NEXT: vextractf128 $1, %ymm15, %xmm3
2804; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm4
2805; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3
2806; AVX1-NEXT: vpand %xmm15, %xmm6, %xmm4
2807; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
2808; AVX1-NEXT: vpand %xmm5, %xmm13, %xmm4
2809; AVX1-NEXT: vpand %xmm2, %xmm12, %xmm2
2810; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
2811; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
2812; AVX1-NEXT: vpand %xmm7, %xmm11, %xmm3
2813; AVX1-NEXT: vpand %xmm1, %xmm10, %xmm1
2814; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
2815; AVX1-NEXT: vpand %xmm14, %xmm9, %xmm3
2816; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
2817; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2818; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002819; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2820; AVX1-NEXT: vzeroupper
2821; AVX1-NEXT: retq
2822;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002823; AVX2-LABEL: trunc_packus_v16i64_v16i8:
2824; AVX2: # %bb.0:
2825; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002826; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5
2827; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002828; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5
2829; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002830; AVX2-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5
2831; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002832; AVX2-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5
2833; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002834; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002835; AVX2-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5
2836; AVX2-NEXT: vpand %ymm1, %ymm5, %ymm1
Simon Pilgrim70eb5082018-02-19 18:08:16 +00002837; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5
2838; AVX2-NEXT: vpand %ymm0, %ymm5, %ymm0
2839; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
2840; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm1
2841; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
2842; AVX2-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm3
2843; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2
2844; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1
2845; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
2846; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2847; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
2848; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2849; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2850; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002851; AVX2-NEXT: vzeroupper
2852; AVX2-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002853;
2854; AVX512-LABEL: trunc_packus_v16i64_v16i8:
2855; AVX512: # %bb.0:
2856; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255]
2857; AVX512-NEXT: vpminsq %zmm2, %zmm0, %zmm0
2858; AVX512-NEXT: vpminsq %zmm2, %zmm1, %zmm1
2859; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
2860; AVX512-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1
2861; AVX512-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0
2862; AVX512-NEXT: vpmovqd %zmm0, %ymm0
2863; AVX512-NEXT: vpmovqd %zmm1, %ymm1
2864; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2865; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2866; AVX512-NEXT: vzeroupper
2867; AVX512-NEXT: retq
2868 %1 = icmp slt <16 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2869 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2870 %3 = icmp sgt <16 x i64> %2, zeroinitializer
2871 %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> zeroinitializer
2872 %5 = trunc <16 x i64> %4 to <16 x i8>
2873 ret <16 x i8> %5
2874}
2875
2876define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) {
2877; SSE2-LABEL: trunc_packus_v8i32_v8i8:
2878; SSE2: # %bb.0:
2879; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2880; SSE2-NEXT: movdqa %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002881; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
2882; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002883; SSE2-NEXT: pandn %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002884; SSE2-NEXT: por %xmm0, %xmm3
2885; SSE2-NEXT: movdqa %xmm2, %xmm0
2886; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
2887; SSE2-NEXT: pand %xmm0, %xmm1
2888; SSE2-NEXT: pandn %xmm2, %xmm0
2889; SSE2-NEXT: por %xmm1, %xmm0
2890; SSE2-NEXT: pxor %xmm1, %xmm1
2891; SSE2-NEXT: movdqa %xmm0, %xmm2
2892; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2893; SSE2-NEXT: pand %xmm0, %xmm2
2894; SSE2-NEXT: movdqa %xmm3, %xmm0
2895; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
2896; SSE2-NEXT: pand %xmm3, %xmm0
2897; SSE2-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002898; SSE2-NEXT: retq
2899;
2900; SSSE3-LABEL: trunc_packus_v8i32_v8i8:
2901; SSSE3: # %bb.0:
2902; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2903; SSSE3-NEXT: movdqa %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002904; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
2905; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002906; SSSE3-NEXT: pandn %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002907; SSSE3-NEXT: por %xmm0, %xmm3
2908; SSSE3-NEXT: movdqa %xmm2, %xmm0
2909; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
2910; SSSE3-NEXT: pand %xmm0, %xmm1
2911; SSSE3-NEXT: pandn %xmm2, %xmm0
2912; SSSE3-NEXT: por %xmm1, %xmm0
2913; SSSE3-NEXT: pxor %xmm1, %xmm1
2914; SSSE3-NEXT: movdqa %xmm0, %xmm2
2915; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2916; SSSE3-NEXT: pand %xmm0, %xmm2
2917; SSSE3-NEXT: movdqa %xmm3, %xmm0
2918; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
2919; SSSE3-NEXT: pand %xmm3, %xmm0
2920; SSSE3-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002921; SSSE3-NEXT: retq
2922;
2923; SSE41-LABEL: trunc_packus_v8i32_v8i8:
2924; SSE41: # %bb.0:
2925; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002926; SSE41-NEXT: pminsd %xmm2, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002927; SSE41-NEXT: pminsd %xmm2, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002928; SSE41-NEXT: pxor %xmm2, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002929; SSE41-NEXT: pmaxsd %xmm2, %xmm1
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002930; SSE41-NEXT: pmaxsd %xmm2, %xmm0
2931; SSE41-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002932; SSE41-NEXT: retq
2933;
2934; AVX1-LABEL: trunc_packus_v8i32_v8i8:
2935; AVX1: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002936; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255]
2937; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm2
2938; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2939; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
2940; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2941; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
2942; AVX1-NEXT: vpmaxsd %xmm1, %xmm2, %xmm1
2943; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002944; AVX1-NEXT: vzeroupper
2945; AVX1-NEXT: retq
2946;
2947; AVX2-LABEL: trunc_packus_v8i32_v8i8:
2948; AVX2: # %bb.0:
2949; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2950; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2951; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
2952; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002953; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2954; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002955; AVX2-NEXT: vzeroupper
2956; AVX2-NEXT: retq
2957;
2958; AVX512F-LABEL: trunc_packus_v8i32_v8i8:
2959; AVX512F: # %bb.0:
2960; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2961; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2962; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
2963; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2964; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00002965; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002966; AVX512F-NEXT: vzeroupper
2967; AVX512F-NEXT: retq
2968;
2969; AVX512VL-LABEL: trunc_packus_v8i32_v8i8:
2970; AVX512VL: # %bb.0:
2971; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2972; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2973; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2974; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
2975; AVX512VL-NEXT: vzeroupper
2976; AVX512VL-NEXT: retq
2977;
2978; AVX512BW-LABEL: trunc_packus_v8i32_v8i8:
2979; AVX512BW: # %bb.0:
2980; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
2981; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
2982; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
2983; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2984; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00002985; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002986; AVX512BW-NEXT: vzeroupper
2987; AVX512BW-NEXT: retq
2988;
2989; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8:
2990; AVX512BWVL: # %bb.0:
2991; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
2992; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
2993; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
2994; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
2995; AVX512BWVL-NEXT: vzeroupper
2996; AVX512BWVL-NEXT: retq
2997 %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2998 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
2999 %3 = icmp sgt <8 x i32> %2, zeroinitializer
3000 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
3001 %5 = trunc <8 x i32> %4 to <8 x i8>
3002 ret <8 x i8> %5
3003}
3004
Simon Pilgrim689d8132018-02-15 17:48:34 +00003005define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
Simon Pilgrimc302a582018-02-19 13:29:20 +00003006; SSE-LABEL: trunc_packus_v8i32_v8i8_store:
3007; SSE: # %bb.0:
3008; SSE-NEXT: packssdw %xmm1, %xmm0
3009; SSE-NEXT: packuswb %xmm0, %xmm0
3010; SSE-NEXT: movq %xmm0, (%rdi)
3011; SSE-NEXT: retq
Simon Pilgrim689d8132018-02-15 17:48:34 +00003012;
3013; AVX1-LABEL: trunc_packus_v8i32_v8i8_store:
3014; AVX1: # %bb.0:
Simon Pilgrimc302a582018-02-19 13:29:20 +00003015; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3016; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
3017; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003018; AVX1-NEXT: vmovq %xmm0, (%rdi)
3019; AVX1-NEXT: vzeroupper
3020; AVX1-NEXT: retq
3021;
3022; AVX2-LABEL: trunc_packus_v8i32_v8i8_store:
3023; AVX2: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003024; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3025; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrimc302a582018-02-19 13:29:20 +00003026; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003027; AVX2-NEXT: vmovq %xmm0, (%rdi)
3028; AVX2-NEXT: vzeroupper
3029; AVX2-NEXT: retq
3030;
3031; AVX512F-LABEL: trunc_packus_v8i32_v8i8_store:
3032; AVX512F: # %bb.0:
3033; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3034; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3035; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3036; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3037; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00003038; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003039; AVX512F-NEXT: vmovq %xmm0, (%rdi)
3040; AVX512F-NEXT: vzeroupper
3041; AVX512F-NEXT: retq
3042;
3043; AVX512VL-LABEL: trunc_packus_v8i32_v8i8_store:
3044; AVX512VL: # %bb.0:
3045; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3046; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3047; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3048; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi)
3049; AVX512VL-NEXT: vzeroupper
3050; AVX512VL-NEXT: retq
3051;
3052; AVX512BW-LABEL: trunc_packus_v8i32_v8i8_store:
3053; AVX512BW: # %bb.0:
3054; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3055; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3056; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3057; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3058; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Simon Pilgrim70eb5082018-02-19 18:08:16 +00003059; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003060; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
3061; AVX512BW-NEXT: vzeroupper
3062; AVX512BW-NEXT: retq
3063;
3064; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8_store:
3065; AVX512BWVL: # %bb.0:
3066; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3067; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3068; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3069; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi)
3070; AVX512BWVL-NEXT: vzeroupper
3071; AVX512BWVL-NEXT: retq
3072 %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3073 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3074 %3 = icmp sgt <8 x i32> %2, zeroinitializer
3075 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
3076 %5 = trunc <8 x i32> %4 to <8 x i8>
3077 store <8 x i8> %5, <8 x i8> *%p1
3078 ret void
3079}
3080
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003081define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32> %a0) {
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003082; SSE-LABEL: trunc_packus_v16i32_v16i8:
3083; SSE: # %bb.0:
3084; SSE-NEXT: packssdw %xmm3, %xmm2
3085; SSE-NEXT: packssdw %xmm1, %xmm0
3086; SSE-NEXT: packuswb %xmm2, %xmm0
3087; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003088;
3089; AVX1-LABEL: trunc_packus_v16i32_v16i8:
3090; AVX1: # %bb.0:
3091; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003092; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3093; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3094; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003095; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3096; AVX1-NEXT: vzeroupper
3097; AVX1-NEXT: retq
3098;
3099; AVX2-LABEL: trunc_packus_v16i32_v16i8:
3100; AVX2: # %bb.0:
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003101; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3102; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3103; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3104; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003105; AVX2-NEXT: vzeroupper
3106; AVX2-NEXT: retq
3107;
3108; AVX512-LABEL: trunc_packus_v16i32_v16i8:
3109; AVX512: # %bb.0:
3110; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
3111; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
3112; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
3113; AVX512-NEXT: vpmovdb %zmm0, %xmm0
3114; AVX512-NEXT: vzeroupper
3115; AVX512-NEXT: retq
3116 %1 = icmp slt <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3117 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3118 %3 = icmp sgt <16 x i32> %2, zeroinitializer
3119 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
3120 %5 = trunc <16 x i32> %4 to <16 x i8>
3121 ret <16 x i8> %5
3122}
3123
3124define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003125; SSE-LABEL: trunc_packus_v16i16_v16i8:
3126; SSE: # %bb.0:
3127; SSE-NEXT: packuswb %xmm1, %xmm0
3128; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003129;
3130; AVX1-LABEL: trunc_packus_v16i16_v16i8:
3131; AVX1: # %bb.0:
3132; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00003133; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003134; AVX1-NEXT: vzeroupper
3135; AVX1-NEXT: retq
3136;
3137; AVX2-LABEL: trunc_packus_v16i16_v16i8:
3138; AVX2: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003139; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00003140; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003141; AVX2-NEXT: vzeroupper
3142; AVX2-NEXT: retq
3143;
3144; AVX512F-LABEL: trunc_packus_v16i16_v16i8:
3145; AVX512F: # %bb.0:
3146; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3147; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3148; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3149; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3150; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3151; AVX512F-NEXT: vzeroupper
3152; AVX512F-NEXT: retq
3153;
3154; AVX512VL-LABEL: trunc_packus_v16i16_v16i8:
3155; AVX512VL: # %bb.0:
3156; AVX512VL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3157; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3158; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3159; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3160; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3161; AVX512VL-NEXT: vzeroupper
3162; AVX512VL-NEXT: retq
3163;
3164; AVX512BW-LABEL: trunc_packus_v16i16_v16i8:
3165; AVX512BW: # %bb.0:
3166; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3167; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3168; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3169; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00003170; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003171; AVX512BW-NEXT: vzeroupper
3172; AVX512BW-NEXT: retq
3173;
3174; AVX512BWVL-LABEL: trunc_packus_v16i16_v16i8:
3175; AVX512BWVL: # %bb.0:
3176; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3177; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3178; AVX512BWVL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3179; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
3180; AVX512BWVL-NEXT: vzeroupper
3181; AVX512BWVL-NEXT: retq
3182 %1 = icmp slt <16 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3183 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3184 %3 = icmp sgt <16 x i16> %2, zeroinitializer
3185 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
3186 %5 = trunc <16 x i16> %4 to <16 x i8>
3187 ret <16 x i8> %5
3188}
3189
3190define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003191; SSE-LABEL: trunc_packus_v32i16_v32i8:
3192; SSE: # %bb.0:
3193; SSE-NEXT: packuswb %xmm1, %xmm0
3194; SSE-NEXT: packuswb %xmm3, %xmm2
3195; SSE-NEXT: movdqa %xmm2, %xmm1
3196; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003197;
3198; AVX1-LABEL: trunc_packus_v32i16_v32i8:
3199; AVX1: # %bb.0:
3200; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrimae00a712018-02-06 14:07:46 +00003201; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
3202; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3203; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003204; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3205; AVX1-NEXT: retq
3206;
3207; AVX2-LABEL: trunc_packus_v32i16_v32i8:
3208; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003209; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
3210; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003211; AVX2-NEXT: retq
3212;
3213; AVX512F-LABEL: trunc_packus_v32i16_v32i8:
3214; AVX512F: # %bb.0:
3215; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3216; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3217; AVX512F-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3218; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
3219; AVX512F-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3220; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3221; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3222; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3223; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
3224; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
3225; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3226; AVX512F-NEXT: retq
3227;
3228; AVX512VL-LABEL: trunc_packus_v32i16_v32i8:
3229; AVX512VL: # %bb.0:
3230; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3231; AVX512VL-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3232; AVX512VL-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3233; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
3234; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3235; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3236; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3237; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3238; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
3239; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
3240; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3241; AVX512VL-NEXT: retq
3242;
3243; AVX512BW-LABEL: trunc_packus_v32i16_v32i8:
3244; AVX512BW: # %bb.0:
3245; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3246; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3247; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3248; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
3249; AVX512BW-NEXT: retq
3250;
3251; AVX512BWVL-LABEL: trunc_packus_v32i16_v32i8:
3252; AVX512BWVL: # %bb.0:
3253; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3254; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3255; AVX512BWVL-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3256; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
3257; AVX512BWVL-NEXT: retq
3258 %1 = icmp slt <32 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3259 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3260 %3 = icmp sgt <32 x i16> %2, zeroinitializer
3261 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
3262 %5 = trunc <32 x i16> %4 to <32 x i8>
3263 ret <32 x i8> %5
3264}