blob: 1566d9429cea0da024f99b002f65752e154b63c7 [file] [log] [blame]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
12
13;
14; PACKUS saturation truncation to vXi32
15;
16
17define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
18; SSE2-LABEL: trunc_packus_v4i64_v4i32:
19; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000020; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000021; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
22; SSE2-NEXT: movdqa %xmm0, %xmm3
23; SSE2-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000024; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
25; SSE2-NEXT: movdqa %xmm5, %xmm6
26; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000027; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000028; SSE2-NEXT: pcmpeqd %xmm5, %xmm3
29; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
30; SSE2-NEXT: pand %xmm7, %xmm4
31; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
32; SSE2-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000033; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000034; SSE2-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000035; SSE2-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000036; SSE2-NEXT: movdqa %xmm1, %xmm0
37; SSE2-NEXT: pxor %xmm2, %xmm0
38; SSE2-NEXT: movdqa %xmm5, %xmm4
39; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
40; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
41; SSE2-NEXT: pcmpeqd %xmm5, %xmm0
42; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
43; SSE2-NEXT: pand %xmm6, %xmm0
44; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
45; SSE2-NEXT: por %xmm0, %xmm4
46; SSE2-NEXT: pand %xmm4, %xmm1
47; SSE2-NEXT: pandn %xmm8, %xmm4
48; SSE2-NEXT: por %xmm1, %xmm4
49; SSE2-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000050; SSE2-NEXT: pxor %xmm2, %xmm0
51; SSE2-NEXT: movdqa %xmm0, %xmm1
52; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
53; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
54; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000055; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
56; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000057; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +000058; SSE2-NEXT: por %xmm0, %xmm1
59; SSE2-NEXT: pand %xmm4, %xmm1
60; SSE2-NEXT: movdqa %xmm3, %xmm0
61; SSE2-NEXT: pxor %xmm2, %xmm0
62; SSE2-NEXT: movdqa %xmm0, %xmm4
63; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
64; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
65; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
66; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
67; SSE2-NEXT: pand %xmm5, %xmm2
68; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
69; SSE2-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +000070; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000071; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000072; SSE2-NEXT: retq
73;
74; SSSE3-LABEL: trunc_packus_v4i64_v4i32:
75; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +000076; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
Simon Pilgrim65ec9232018-01-26 14:58:50 +000077; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
78; SSSE3-NEXT: movdqa %xmm0, %xmm3
79; SSSE3-NEXT: pxor %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000080; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
81; SSSE3-NEXT: movdqa %xmm5, %xmm6
82; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +000083; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +000084; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3
85; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
86; SSSE3-NEXT: pand %xmm7, %xmm4
87; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
88; SSSE3-NEXT: por %xmm4, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000089; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +000090; SSSE3-NEXT: pandn %xmm8, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +000091; SSSE3-NEXT: por %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +000092; SSSE3-NEXT: movdqa %xmm1, %xmm0
93; SSSE3-NEXT: pxor %xmm2, %xmm0
94; SSSE3-NEXT: movdqa %xmm5, %xmm4
95; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
96; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
97; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0
98; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
99; SSSE3-NEXT: pand %xmm6, %xmm0
100; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
101; SSSE3-NEXT: por %xmm0, %xmm4
102; SSSE3-NEXT: pand %xmm4, %xmm1
103; SSSE3-NEXT: pandn %xmm8, %xmm4
104; SSSE3-NEXT: por %xmm1, %xmm4
105; SSSE3-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000106; SSSE3-NEXT: pxor %xmm2, %xmm0
107; SSSE3-NEXT: movdqa %xmm0, %xmm1
108; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
109; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
110; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000111; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
112; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000113; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000114; SSSE3-NEXT: por %xmm0, %xmm1
115; SSSE3-NEXT: pand %xmm4, %xmm1
116; SSSE3-NEXT: movdqa %xmm3, %xmm0
117; SSSE3-NEXT: pxor %xmm2, %xmm0
118; SSSE3-NEXT: movdqa %xmm0, %xmm4
119; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
120; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
121; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
122; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
123; SSSE3-NEXT: pand %xmm5, %xmm2
124; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
125; SSSE3-NEXT: por %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000126; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000127; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000128; SSSE3-NEXT: retq
129;
130; SSE41-LABEL: trunc_packus_v4i64_v4i32:
131; SSE41: # %bb.0:
132; SSE41-NEXT: movdqa %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000133; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295]
134; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
135; SSE41-NEXT: pxor %xmm8, %xmm0
136; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647]
137; SSE41-NEXT: movdqa %xmm6, %xmm5
138; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
139; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
140; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000141; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000142; SSE41-NEXT: pand %xmm7, %xmm3
143; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000144; SSE41-NEXT: por %xmm3, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000145; SSE41-NEXT: movapd %xmm4, %xmm5
146; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5
147; SSE41-NEXT: movdqa %xmm1, %xmm0
148; SSE41-NEXT: pxor %xmm8, %xmm0
149; SSE41-NEXT: movdqa %xmm6, %xmm2
150; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000151; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000152; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
153; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
154; SSE41-NEXT: pand %xmm3, %xmm6
155; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
156; SSE41-NEXT: por %xmm6, %xmm0
157; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
158; SSE41-NEXT: xorpd %xmm1, %xmm1
159; SSE41-NEXT: movapd %xmm4, %xmm0
160; SSE41-NEXT: xorpd %xmm8, %xmm0
161; SSE41-NEXT: movapd %xmm0, %xmm2
162; SSE41-NEXT: pcmpgtd %xmm8, %xmm2
163; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
164; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
165; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
166; SSE41-NEXT: pand %xmm3, %xmm6
167; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
168; SSE41-NEXT: por %xmm6, %xmm0
169; SSE41-NEXT: pxor %xmm2, %xmm2
170; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
171; SSE41-NEXT: movapd %xmm5, %xmm0
172; SSE41-NEXT: xorpd %xmm8, %xmm0
173; SSE41-NEXT: movapd %xmm0, %xmm3
174; SSE41-NEXT: pcmpgtd %xmm8, %xmm3
175; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
176; SSE41-NEXT: pcmpeqd %xmm8, %xmm0
177; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
178; SSE41-NEXT: pand %xmm4, %xmm6
179; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
180; SSE41-NEXT: por %xmm6, %xmm0
181; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
182; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
183; SSE41-NEXT: movaps %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000184; SSE41-NEXT: retq
185;
186; AVX1-LABEL: trunc_packus_v4i64_v4i32:
187; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000188; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
190; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4294967295,4294967295]
191; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
192; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
193; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
194; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000195; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
196; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
197; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
198; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
199; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
200; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
201; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
202; AVX1-NEXT: vzeroupper
203; AVX1-NEXT: retq
204;
205; AVX2-SLOW-LABEL: trunc_packus_v4i64_v4i32:
206; AVX2-SLOW: # %bb.0:
207; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
208; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
209; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
210; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
211; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
212; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0
213; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
214; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000215; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000216; AVX2-SLOW-NEXT: vzeroupper
217; AVX2-SLOW-NEXT: retq
218;
219; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i32:
220; AVX2-FAST: # %bb.0:
221; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
222; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
223; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
224; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
225; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
226; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
227; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
228; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000229; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000230; AVX2-FAST-NEXT: vzeroupper
231; AVX2-FAST-NEXT: retq
232;
233; AVX512F-LABEL: trunc_packus_v4i64_v4i32:
234; AVX512F: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000235; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000236; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
237; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
238; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
239; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
240; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000241; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000242; AVX512F-NEXT: vzeroupper
243; AVX512F-NEXT: retq
244;
245; AVX512VL-LABEL: trunc_packus_v4i64_v4i32:
246; AVX512VL: # %bb.0:
247; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
248; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
249; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
250; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
251; AVX512VL-NEXT: vzeroupper
252; AVX512VL-NEXT: retq
253;
254; AVX512BW-LABEL: trunc_packus_v4i64_v4i32:
255; AVX512BW: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000256; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000257; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
258; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
259; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
260; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
261; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000262; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000263; AVX512BW-NEXT: vzeroupper
264; AVX512BW-NEXT: retq
265;
266; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i32:
267; AVX512BWVL: # %bb.0:
268; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
269; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
270; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
271; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
272; AVX512BWVL-NEXT: vzeroupper
273; AVX512BWVL-NEXT: retq
274 %1 = icmp slt <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
275 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
276 %3 = icmp sgt <4 x i64> %2, zeroinitializer
277 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer
278 %5 = trunc <4 x i64> %4 to <4 x i32>
279 ret <4 x i32> %5
280}
281
282
283define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64> %a0) {
284; SSE2-LABEL: trunc_packus_v8i64_v8i32:
285; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000286; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
287; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
288; SSE2-NEXT: movdqa %xmm0, %xmm5
289; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000290; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
291; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000292; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
293; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
294; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000295; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
296; SSE2-NEXT: pand %xmm7, %xmm4
297; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000298; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000299; SSE2-NEXT: pand %xmm5, %xmm0
300; SSE2-NEXT: pandn %xmm8, %xmm5
301; SSE2-NEXT: por %xmm0, %xmm5
302; SSE2-NEXT: movdqa %xmm1, %xmm0
303; SSE2-NEXT: pxor %xmm10, %xmm0
304; SSE2-NEXT: movdqa %xmm9, %xmm4
305; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
306; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
307; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
308; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
309; SSE2-NEXT: pand %xmm6, %xmm7
310; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
311; SSE2-NEXT: por %xmm7, %xmm0
312; SSE2-NEXT: pand %xmm0, %xmm1
313; SSE2-NEXT: pandn %xmm8, %xmm0
314; SSE2-NEXT: por %xmm1, %xmm0
315; SSE2-NEXT: movdqa %xmm2, %xmm1
316; SSE2-NEXT: pxor %xmm10, %xmm1
317; SSE2-NEXT: movdqa %xmm9, %xmm4
318; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
319; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
320; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000321; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000322; SSE2-NEXT: pand %xmm6, %xmm1
323; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
324; SSE2-NEXT: por %xmm1, %xmm6
325; SSE2-NEXT: pand %xmm6, %xmm2
326; SSE2-NEXT: pandn %xmm8, %xmm6
327; SSE2-NEXT: por %xmm2, %xmm6
328; SSE2-NEXT: movdqa %xmm3, %xmm1
329; SSE2-NEXT: pxor %xmm10, %xmm1
330; SSE2-NEXT: movdqa %xmm9, %xmm2
331; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
332; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
333; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
334; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
335; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000336; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
337; SSE2-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000338; SSE2-NEXT: pand %xmm2, %xmm3
339; SSE2-NEXT: pandn %xmm8, %xmm2
340; SSE2-NEXT: por %xmm3, %xmm2
341; SSE2-NEXT: movdqa %xmm2, %xmm1
342; SSE2-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000343; SSE2-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000344; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
345; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
346; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
347; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
348; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000349; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000350; SSE2-NEXT: por %xmm1, %xmm3
351; SSE2-NEXT: pand %xmm2, %xmm3
352; SSE2-NEXT: movdqa %xmm6, %xmm1
353; SSE2-NEXT: pxor %xmm10, %xmm1
354; SSE2-NEXT: movdqa %xmm1, %xmm2
355; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
356; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
357; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
358; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
359; SSE2-NEXT: pand %xmm4, %xmm7
360; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
361; SSE2-NEXT: por %xmm7, %xmm1
362; SSE2-NEXT: pand %xmm6, %xmm1
363; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
364; SSE2-NEXT: movdqa %xmm0, %xmm2
365; SSE2-NEXT: pxor %xmm10, %xmm2
366; SSE2-NEXT: movdqa %xmm2, %xmm3
367; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
368; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
369; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
370; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
371; SSE2-NEXT: pand %xmm4, %xmm2
372; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
373; SSE2-NEXT: por %xmm2, %xmm3
374; SSE2-NEXT: pand %xmm0, %xmm3
375; SSE2-NEXT: movdqa %xmm5, %xmm0
376; SSE2-NEXT: pxor %xmm10, %xmm0
377; SSE2-NEXT: movdqa %xmm0, %xmm2
378; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
379; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
380; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
381; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
382; SSE2-NEXT: pand %xmm4, %xmm6
383; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
384; SSE2-NEXT: por %xmm6, %xmm0
385; SSE2-NEXT: pand %xmm5, %xmm0
386; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000387; SSE2-NEXT: retq
388;
389; SSSE3-LABEL: trunc_packus_v8i64_v8i32:
390; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000391; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
392; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
393; SSSE3-NEXT: movdqa %xmm0, %xmm5
394; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000395; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483647,2147483647]
396; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000397; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
398; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
399; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000400; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
401; SSSE3-NEXT: pand %xmm7, %xmm4
402; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000403; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000404; SSSE3-NEXT: pand %xmm5, %xmm0
405; SSSE3-NEXT: pandn %xmm8, %xmm5
406; SSSE3-NEXT: por %xmm0, %xmm5
407; SSSE3-NEXT: movdqa %xmm1, %xmm0
408; SSSE3-NEXT: pxor %xmm10, %xmm0
409; SSSE3-NEXT: movdqa %xmm9, %xmm4
410; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
411; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
412; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
413; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
414; SSSE3-NEXT: pand %xmm6, %xmm7
415; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
416; SSSE3-NEXT: por %xmm7, %xmm0
417; SSSE3-NEXT: pand %xmm0, %xmm1
418; SSSE3-NEXT: pandn %xmm8, %xmm0
419; SSSE3-NEXT: por %xmm1, %xmm0
420; SSSE3-NEXT: movdqa %xmm2, %xmm1
421; SSSE3-NEXT: pxor %xmm10, %xmm1
422; SSSE3-NEXT: movdqa %xmm9, %xmm4
423; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
424; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
425; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000426; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000427; SSSE3-NEXT: pand %xmm6, %xmm1
428; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
429; SSSE3-NEXT: por %xmm1, %xmm6
430; SSSE3-NEXT: pand %xmm6, %xmm2
431; SSSE3-NEXT: pandn %xmm8, %xmm6
432; SSSE3-NEXT: por %xmm2, %xmm6
433; SSSE3-NEXT: movdqa %xmm3, %xmm1
434; SSSE3-NEXT: pxor %xmm10, %xmm1
435; SSSE3-NEXT: movdqa %xmm9, %xmm2
436; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
437; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
438; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
439; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
440; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000441; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442; SSSE3-NEXT: por %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000443; SSSE3-NEXT: pand %xmm2, %xmm3
444; SSSE3-NEXT: pandn %xmm8, %xmm2
445; SSSE3-NEXT: por %xmm3, %xmm2
446; SSSE3-NEXT: movdqa %xmm2, %xmm1
447; SSSE3-NEXT: pxor %xmm10, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000448; SSSE3-NEXT: movdqa %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +0000449; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
450; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
451; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
452; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
453; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000454; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000455; SSSE3-NEXT: por %xmm1, %xmm3
456; SSSE3-NEXT: pand %xmm2, %xmm3
457; SSSE3-NEXT: movdqa %xmm6, %xmm1
458; SSSE3-NEXT: pxor %xmm10, %xmm1
459; SSSE3-NEXT: movdqa %xmm1, %xmm2
460; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
461; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
462; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
463; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
464; SSSE3-NEXT: pand %xmm4, %xmm7
465; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
466; SSSE3-NEXT: por %xmm7, %xmm1
467; SSSE3-NEXT: pand %xmm6, %xmm1
468; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
469; SSSE3-NEXT: movdqa %xmm0, %xmm2
470; SSSE3-NEXT: pxor %xmm10, %xmm2
471; SSSE3-NEXT: movdqa %xmm2, %xmm3
472; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
473; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
474; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
475; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
476; SSSE3-NEXT: pand %xmm4, %xmm2
477; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
478; SSSE3-NEXT: por %xmm2, %xmm3
479; SSSE3-NEXT: pand %xmm0, %xmm3
480; SSSE3-NEXT: movdqa %xmm5, %xmm0
481; SSSE3-NEXT: pxor %xmm10, %xmm0
482; SSSE3-NEXT: movdqa %xmm0, %xmm2
483; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
484; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
485; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
486; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
487; SSSE3-NEXT: pand %xmm4, %xmm6
488; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
489; SSSE3-NEXT: por %xmm6, %xmm0
490; SSSE3-NEXT: pand %xmm5, %xmm0
491; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000492; SSSE3-NEXT: retq
493;
494; SSE41-LABEL: trunc_packus_v8i64_v8i32:
495; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000496; SSE41-NEXT: movdqa %xmm0, %xmm4
497; SSE41-NEXT: movapd {{.*#+}} xmm7 = [4294967295,4294967295]
498; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
499; SSE41-NEXT: pxor %xmm10, %xmm0
500; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147483647,2147483647]
501; SSE41-NEXT: movdqa %xmm11, %xmm6
502; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
503; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
504; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
505; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
506; SSE41-NEXT: pand %xmm8, %xmm5
507; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
508; SSE41-NEXT: por %xmm5, %xmm0
509; SSE41-NEXT: movapd %xmm7, %xmm8
510; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm8
511; SSE41-NEXT: movdqa %xmm1, %xmm0
512; SSE41-NEXT: pxor %xmm10, %xmm0
513; SSE41-NEXT: movdqa %xmm11, %xmm4
514; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
515; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000516; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
517; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000518; SSE41-NEXT: pand %xmm5, %xmm6
519; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
520; SSE41-NEXT: por %xmm6, %xmm0
521; SSE41-NEXT: movapd %xmm7, %xmm9
522; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9
523; SSE41-NEXT: movdqa %xmm2, %xmm0
524; SSE41-NEXT: pxor %xmm10, %xmm0
525; SSE41-NEXT: movdqa %xmm11, %xmm1
526; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
527; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
528; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
529; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
530; SSE41-NEXT: pand %xmm4, %xmm5
531; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
532; SSE41-NEXT: por %xmm5, %xmm0
533; SSE41-NEXT: movapd %xmm7, %xmm4
534; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
535; SSE41-NEXT: movdqa %xmm3, %xmm0
536; SSE41-NEXT: pxor %xmm10, %xmm0
537; SSE41-NEXT: movdqa %xmm11, %xmm1
538; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
539; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
540; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
541; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
542; SSE41-NEXT: pand %xmm2, %xmm5
543; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
544; SSE41-NEXT: por %xmm5, %xmm0
545; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7
546; SSE41-NEXT: pxor %xmm2, %xmm2
547; SSE41-NEXT: movapd %xmm7, %xmm0
548; SSE41-NEXT: xorpd %xmm10, %xmm0
549; SSE41-NEXT: movapd %xmm0, %xmm1
550; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
551; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
552; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
553; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
554; SSE41-NEXT: pand %xmm3, %xmm5
555; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
556; SSE41-NEXT: por %xmm5, %xmm0
557; SSE41-NEXT: pxor %xmm3, %xmm3
558; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm3
559; SSE41-NEXT: movapd %xmm4, %xmm0
560; SSE41-NEXT: xorpd %xmm10, %xmm0
561; SSE41-NEXT: movapd %xmm0, %xmm1
562; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
563; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
564; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
565; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
566; SSE41-NEXT: pand %xmm5, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000567; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
568; SSE41-NEXT: por %xmm6, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000569; SSE41-NEXT: pxor %xmm1, %xmm1
570; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
571; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
572; SSE41-NEXT: movapd %xmm9, %xmm0
573; SSE41-NEXT: xorpd %xmm10, %xmm0
574; SSE41-NEXT: movapd %xmm0, %xmm3
575; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
576; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
577; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
578; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
579; SSE41-NEXT: pand %xmm4, %xmm5
580; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
581; SSE41-NEXT: por %xmm5, %xmm0
582; SSE41-NEXT: pxor %xmm3, %xmm3
583; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm3
584; SSE41-NEXT: movapd %xmm8, %xmm0
585; SSE41-NEXT: xorpd %xmm10, %xmm0
586; SSE41-NEXT: movapd %xmm0, %xmm4
587; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
588; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
589; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
590; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
591; SSE41-NEXT: pand %xmm5, %xmm6
592; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
593; SSE41-NEXT: por %xmm6, %xmm0
594; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
595; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
596; SSE41-NEXT: movaps %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000597; SSE41-NEXT: retq
598;
599; AVX1-LABEL: trunc_packus_v8i64_v8i32:
600; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000601; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
602; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
603; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4294967295,4294967295]
604; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
605; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
606; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
607; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
609; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
610; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
611; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
612; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000613; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
614; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
615; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
616; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
617; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
619; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
620; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
621; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
622; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
623; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
624; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
625; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
626; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
627; AVX1-NEXT: retq
628;
629; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i32:
630; AVX2-SLOW: # %bb.0:
631; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
632; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000633; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000634; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
635; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000636; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000637; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
638; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
639; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
640; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000641; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
642; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
643; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
644; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
645; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
646; AVX2-SLOW-NEXT: retq
647;
648; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i32:
649; AVX2-FAST: # %bb.0:
650; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
651; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000652; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000653; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
654; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000655; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000656; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
657; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
658; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
659; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000660; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
661; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
662; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
663; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
664; AVX2-FAST-NEXT: retq
665;
666; AVX512-LABEL: trunc_packus_v8i64_v8i32:
667; AVX512: # %bb.0:
668; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
669; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
670; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
671; AVX512-NEXT: vpmovqd %zmm0, %ymm0
672; AVX512-NEXT: retq
673 %1 = icmp slt <8 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
674 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
675 %3 = icmp sgt <8 x i64> %2, zeroinitializer
676 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
677 %5 = trunc <8 x i64> %4 to <8 x i32>
678 ret <8 x i32> %5
679}
680
681;
682; PACKUS saturation truncation to vXi16
683;
684
685define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) {
686; SSE2-LABEL: trunc_packus_v8i64_v8i16:
687; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000688; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
689; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
690; SSE2-NEXT: movdqa %xmm1, %xmm5
691; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000692; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
693; SSE2-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000694; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
695; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
696; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000697; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
698; SSE2-NEXT: pand %xmm7, %xmm4
699; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
700; SSE2-NEXT: por %xmm4, %xmm5
701; SSE2-NEXT: pand %xmm5, %xmm1
702; SSE2-NEXT: pandn %xmm8, %xmm5
703; SSE2-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000704; SSE2-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000705; SSE2-NEXT: pxor %xmm10, %xmm1
706; SSE2-NEXT: movdqa %xmm9, %xmm4
707; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000708; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000709; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
710; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
711; SSE2-NEXT: pand %xmm6, %xmm7
712; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
713; SSE2-NEXT: por %xmm7, %xmm1
714; SSE2-NEXT: pand %xmm1, %xmm0
715; SSE2-NEXT: pandn %xmm8, %xmm1
716; SSE2-NEXT: por %xmm0, %xmm1
717; SSE2-NEXT: movdqa %xmm3, %xmm0
718; SSE2-NEXT: pxor %xmm10, %xmm0
719; SSE2-NEXT: movdqa %xmm9, %xmm4
720; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
721; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
722; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
723; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
724; SSE2-NEXT: pand %xmm6, %xmm0
725; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
726; SSE2-NEXT: por %xmm0, %xmm6
727; SSE2-NEXT: pand %xmm6, %xmm3
728; SSE2-NEXT: pandn %xmm8, %xmm6
729; SSE2-NEXT: por %xmm3, %xmm6
730; SSE2-NEXT: movdqa %xmm2, %xmm0
731; SSE2-NEXT: pxor %xmm10, %xmm0
732; SSE2-NEXT: movdqa %xmm9, %xmm3
733; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
734; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
735; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
736; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
737; SSE2-NEXT: pand %xmm4, %xmm0
738; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
739; SSE2-NEXT: por %xmm0, %xmm3
740; SSE2-NEXT: pand %xmm3, %xmm2
741; SSE2-NEXT: pandn %xmm8, %xmm3
742; SSE2-NEXT: por %xmm2, %xmm3
743; SSE2-NEXT: movdqa %xmm3, %xmm0
744; SSE2-NEXT: pxor %xmm10, %xmm0
745; SSE2-NEXT: movdqa %xmm0, %xmm2
746; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
747; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
748; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
749; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
750; SSE2-NEXT: pand %xmm4, %xmm7
751; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
752; SSE2-NEXT: por %xmm7, %xmm0
753; SSE2-NEXT: pand %xmm3, %xmm0
754; SSE2-NEXT: movdqa %xmm6, %xmm2
755; SSE2-NEXT: pxor %xmm10, %xmm2
756; SSE2-NEXT: movdqa %xmm2, %xmm3
757; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
758; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
759; SSE2-NEXT: pcmpeqd %xmm10, %xmm2
760; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
761; SSE2-NEXT: pand %xmm4, %xmm7
762; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
763; SSE2-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000764; SSE2-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000765; SSE2-NEXT: movdqa %xmm1, %xmm3
766; SSE2-NEXT: pxor %xmm10, %xmm3
767; SSE2-NEXT: movdqa %xmm3, %xmm4
768; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
769; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
770; SSE2-NEXT: pcmpeqd %xmm10, %xmm3
771; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
772; SSE2-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000773; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000774; SSE2-NEXT: por %xmm3, %xmm4
775; SSE2-NEXT: pand %xmm1, %xmm4
776; SSE2-NEXT: movdqa %xmm5, %xmm1
777; SSE2-NEXT: pxor %xmm10, %xmm1
778; SSE2-NEXT: movdqa %xmm1, %xmm3
779; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
780; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
781; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
782; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
783; SSE2-NEXT: pand %xmm6, %xmm1
784; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
785; SSE2-NEXT: por %xmm1, %xmm3
786; SSE2-NEXT: pand %xmm5, %xmm3
787; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000788; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000789; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
790; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
791; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
792; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
793; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
794; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000795; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000796; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
797; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000798; SSE2-NEXT: retq
799;
800; SSSE3-LABEL: trunc_packus_v8i64_v8i16:
801; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +0000802; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535]
803; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
804; SSSE3-NEXT: movdqa %xmm1, %xmm5
805; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000806; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183]
807; SSSE3-NEXT: movdqa %xmm9, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000808; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
809; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
810; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000811; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
812; SSSE3-NEXT: pand %xmm7, %xmm4
813; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
814; SSSE3-NEXT: por %xmm4, %xmm5
815; SSSE3-NEXT: pand %xmm5, %xmm1
816; SSSE3-NEXT: pandn %xmm8, %xmm5
817; SSSE3-NEXT: por %xmm1, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000818; SSSE3-NEXT: movdqa %xmm0, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +0000819; SSSE3-NEXT: pxor %xmm10, %xmm1
820; SSSE3-NEXT: movdqa %xmm9, %xmm4
821; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000822; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000823; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
824; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
825; SSSE3-NEXT: pand %xmm6, %xmm7
826; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
827; SSSE3-NEXT: por %xmm7, %xmm1
828; SSSE3-NEXT: pand %xmm1, %xmm0
829; SSSE3-NEXT: pandn %xmm8, %xmm1
830; SSSE3-NEXT: por %xmm0, %xmm1
831; SSSE3-NEXT: movdqa %xmm3, %xmm0
832; SSSE3-NEXT: pxor %xmm10, %xmm0
833; SSSE3-NEXT: movdqa %xmm9, %xmm4
834; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
835; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
836; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
837; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
838; SSSE3-NEXT: pand %xmm6, %xmm0
839; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
840; SSSE3-NEXT: por %xmm0, %xmm6
841; SSSE3-NEXT: pand %xmm6, %xmm3
842; SSSE3-NEXT: pandn %xmm8, %xmm6
843; SSSE3-NEXT: por %xmm3, %xmm6
844; SSSE3-NEXT: movdqa %xmm2, %xmm0
845; SSSE3-NEXT: pxor %xmm10, %xmm0
846; SSSE3-NEXT: movdqa %xmm9, %xmm3
847; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
848; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
849; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
850; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
851; SSSE3-NEXT: pand %xmm4, %xmm0
852; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
853; SSSE3-NEXT: por %xmm0, %xmm3
854; SSSE3-NEXT: pand %xmm3, %xmm2
855; SSSE3-NEXT: pandn %xmm8, %xmm3
856; SSSE3-NEXT: por %xmm2, %xmm3
857; SSSE3-NEXT: movdqa %xmm3, %xmm0
858; SSSE3-NEXT: pxor %xmm10, %xmm0
859; SSSE3-NEXT: movdqa %xmm0, %xmm2
860; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
861; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
862; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
863; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
864; SSSE3-NEXT: pand %xmm4, %xmm7
865; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
866; SSSE3-NEXT: por %xmm7, %xmm0
867; SSSE3-NEXT: pand %xmm3, %xmm0
868; SSSE3-NEXT: movdqa %xmm6, %xmm2
869; SSSE3-NEXT: pxor %xmm10, %xmm2
870; SSSE3-NEXT: movdqa %xmm2, %xmm3
871; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
872; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
873; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2
874; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
875; SSSE3-NEXT: pand %xmm4, %xmm7
876; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
877; SSSE3-NEXT: por %xmm7, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000878; SSSE3-NEXT: pand %xmm6, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000879; SSSE3-NEXT: movdqa %xmm1, %xmm3
880; SSSE3-NEXT: pxor %xmm10, %xmm3
881; SSSE3-NEXT: movdqa %xmm3, %xmm4
882; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
883; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
884; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3
885; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
886; SSSE3-NEXT: pand %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000887; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000888; SSSE3-NEXT: por %xmm3, %xmm4
889; SSSE3-NEXT: pand %xmm1, %xmm4
890; SSSE3-NEXT: movdqa %xmm5, %xmm1
891; SSSE3-NEXT: pxor %xmm10, %xmm1
892; SSSE3-NEXT: movdqa %xmm1, %xmm3
893; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
894; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
895; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
896; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
897; SSSE3-NEXT: pand %xmm6, %xmm1
898; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
899; SSSE3-NEXT: por %xmm1, %xmm3
900; SSSE3-NEXT: pand %xmm5, %xmm3
901; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000902; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000903; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
904; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
905; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
906; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
907; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7]
908; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000909; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
Simon Pilgrim0be55672018-02-11 10:52:37 +0000910; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
911; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000912; SSSE3-NEXT: retq
913;
914; SSE41-LABEL: trunc_packus_v8i64_v8i16:
915; SSE41: # %bb.0:
916; SSE41-NEXT: movdqa %xmm0, %xmm8
Simon Pilgrim0be55672018-02-11 10:52:37 +0000917; SSE41-NEXT: movapd {{.*#+}} xmm7 = [65535,65535]
918; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
919; SSE41-NEXT: movdqa %xmm3, %xmm0
920; SSE41-NEXT: pxor %xmm9, %xmm0
921; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147549183,2147549183]
922; SSE41-NEXT: movdqa %xmm10, %xmm6
923; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
924; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm6[0,0,2,2]
925; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
926; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
927; SSE41-NEXT: pand %xmm5, %xmm4
928; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
929; SSE41-NEXT: por %xmm4, %xmm0
930; SSE41-NEXT: movapd %xmm7, %xmm6
931; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm6
932; SSE41-NEXT: movdqa %xmm2, %xmm0
933; SSE41-NEXT: pxor %xmm9, %xmm0
934; SSE41-NEXT: movdqa %xmm10, %xmm3
935; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
936; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
937; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
938; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000939; SSE41-NEXT: pand %xmm4, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +0000940; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
941; SSE41-NEXT: por %xmm5, %xmm0
942; SSE41-NEXT: movapd %xmm7, %xmm3
943; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
944; SSE41-NEXT: movdqa %xmm1, %xmm0
945; SSE41-NEXT: pxor %xmm9, %xmm0
946; SSE41-NEXT: movdqa %xmm10, %xmm2
947; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
948; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
949; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
950; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
951; SSE41-NEXT: pand %xmm4, %xmm5
952; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
953; SSE41-NEXT: por %xmm5, %xmm0
954; SSE41-NEXT: movapd %xmm7, %xmm2
955; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
956; SSE41-NEXT: movdqa %xmm8, %xmm0
957; SSE41-NEXT: pxor %xmm9, %xmm0
958; SSE41-NEXT: movdqa %xmm10, %xmm1
959; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
960; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
961; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
962; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
963; SSE41-NEXT: pand %xmm4, %xmm5
964; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
965; SSE41-NEXT: por %xmm5, %xmm0
966; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7
967; SSE41-NEXT: movapd %xmm7, %xmm0
968; SSE41-NEXT: xorpd %xmm9, %xmm0
969; SSE41-NEXT: movapd %xmm0, %xmm1
970; SSE41-NEXT: pcmpgtd %xmm9, %xmm1
971; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
972; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
973; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
974; SSE41-NEXT: pand %xmm4, %xmm5
975; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
976; SSE41-NEXT: por %xmm5, %xmm0
977; SSE41-NEXT: pxor %xmm8, %xmm8
978; SSE41-NEXT: pxor %xmm1, %xmm1
979; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1
980; SSE41-NEXT: movapd %xmm2, %xmm0
981; SSE41-NEXT: xorpd %xmm9, %xmm0
982; SSE41-NEXT: movapd %xmm0, %xmm5
983; SSE41-NEXT: pcmpgtd %xmm9, %xmm5
984; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
985; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
986; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
987; SSE41-NEXT: pand %xmm7, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000988; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
989; SSE41-NEXT: por %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +0000990; SSE41-NEXT: pxor %xmm7, %xmm7
991; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm7
992; SSE41-NEXT: movapd %xmm3, %xmm0
993; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +0000994; SSE41-NEXT: movapd %xmm0, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +0000995; SSE41-NEXT: pcmpgtd %xmm9, %xmm2
996; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
997; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
998; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
999; SSE41-NEXT: pand %xmm4, %xmm5
1000; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1001; SSE41-NEXT: por %xmm5, %xmm0
1002; SSE41-NEXT: pxor %xmm2, %xmm2
1003; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
1004; SSE41-NEXT: movapd %xmm6, %xmm0
1005; SSE41-NEXT: xorpd %xmm9, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001006; SSE41-NEXT: movapd %xmm0, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001007; SSE41-NEXT: pcmpgtd %xmm9, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001008; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001009; SSE41-NEXT: pcmpeqd %xmm9, %xmm0
1010; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1011; SSE41-NEXT: pand %xmm4, %xmm5
1012; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1013; SSE41-NEXT: por %xmm5, %xmm0
1014; SSE41-NEXT: pxor %xmm3, %xmm3
1015; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm3
1016; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm8[1,2,3],xmm3[4],xmm8[5,6,7]
1017; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm8[1,2,3],xmm2[4],xmm8[5,6,7]
1018; SSE41-NEXT: packusdw %xmm3, %xmm2
1019; SSE41-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0],xmm8[1,2,3],xmm7[4],xmm8[5,6,7]
1020; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm8[1,2,3],xmm1[4],xmm8[5,6,7]
1021; SSE41-NEXT: packusdw %xmm7, %xmm1
1022; SSE41-NEXT: packusdw %xmm2, %xmm1
1023; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001024; SSE41-NEXT: retq
1025;
1026; AVX1-LABEL: trunc_packus_v8i64_v8i16:
1027; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001028; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [65535,65535,65535,65535]
1029; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1030; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65535,65535]
1031; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1032; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1033; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1034; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1035; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1036; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1037; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1038; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1039; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001040; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1041; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm8
1042; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1043; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1044; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1045; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
1046; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm3
1047; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
1048; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1049; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
1050; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1051; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1052; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
1053; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7]
1054; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
1055; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
1056; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
1057; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1058; AVX1-NEXT: vzeroupper
1059; AVX1-NEXT: retq
1060;
1061; AVX2-SLOW-LABEL: trunc_packus_v8i64_v8i16:
1062; AVX2-SLOW: # %bb.0:
1063; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535]
1064; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001065; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001066; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1067; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001068; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001069; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1070; AVX2-SLOW-NEXT: vpand %ymm1, %ymm3, %ymm1
1071; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1072; AVX2-SLOW-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001073; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1074; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1075; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1076; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1077; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1078; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1079; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001080; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001081; AVX2-SLOW-NEXT: vzeroupper
1082; AVX2-SLOW-NEXT: retq
1083;
1084; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i16:
1085; AVX2-FAST: # %bb.0:
1086; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535]
1087; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001088; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001089; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1090; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001091; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001092; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1093; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
1094; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1095; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001096; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1097; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
1098; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
1099; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1100; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
1101; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001102; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001103; AVX2-FAST-NEXT: vzeroupper
1104; AVX2-FAST-NEXT: retq
1105;
1106; AVX512-LABEL: trunc_packus_v8i64_v8i16:
1107; AVX512: # %bb.0:
1108; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1109; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1110; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1111; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1112; AVX512-NEXT: vzeroupper
1113; AVX512-NEXT: retq
1114 %1 = icmp slt <8 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1115 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
1116 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1117 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1118 %5 = trunc <8 x i64> %4 to <8 x i16>
1119 ret <8 x i16> %5
1120}
1121
1122define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) {
1123; SSE2-LABEL: trunc_packus_v8i32_v8i16:
1124; SSE2: # %bb.0:
1125; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1126; SSE2-NEXT: movdqa %xmm2, %xmm3
1127; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001128; SSE2-NEXT: pand %xmm3, %xmm1
1129; SSE2-NEXT: pandn %xmm2, %xmm3
1130; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001131; SSE2-NEXT: movdqa %xmm2, %xmm1
1132; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1133; SSE2-NEXT: pand %xmm1, %xmm0
1134; SSE2-NEXT: pandn %xmm2, %xmm1
1135; SSE2-NEXT: por %xmm0, %xmm1
1136; SSE2-NEXT: pxor %xmm2, %xmm2
1137; SSE2-NEXT: movdqa %xmm1, %xmm0
1138; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1139; SSE2-NEXT: pand %xmm1, %xmm0
1140; SSE2-NEXT: movdqa %xmm3, %xmm1
1141; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1142; SSE2-NEXT: pand %xmm3, %xmm1
1143; SSE2-NEXT: pslld $16, %xmm1
1144; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001145; SSE2-NEXT: pslld $16, %xmm0
1146; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001147; SSE2-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001148; SSE2-NEXT: retq
1149;
1150; SSSE3-LABEL: trunc_packus_v8i32_v8i16:
1151; SSSE3: # %bb.0:
1152; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
1153; SSSE3-NEXT: movdqa %xmm2, %xmm3
1154; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001155; SSSE3-NEXT: pand %xmm3, %xmm1
1156; SSSE3-NEXT: pandn %xmm2, %xmm3
1157; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001158; SSSE3-NEXT: movdqa %xmm2, %xmm1
1159; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
1160; SSSE3-NEXT: pand %xmm1, %xmm0
1161; SSSE3-NEXT: pandn %xmm2, %xmm1
1162; SSSE3-NEXT: por %xmm0, %xmm1
1163; SSSE3-NEXT: pxor %xmm2, %xmm2
1164; SSSE3-NEXT: movdqa %xmm1, %xmm0
1165; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
1166; SSSE3-NEXT: pand %xmm1, %xmm0
1167; SSSE3-NEXT: movdqa %xmm3, %xmm1
1168; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1169; SSSE3-NEXT: pand %xmm3, %xmm1
1170; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1171; SSSE3-NEXT: pshufb %xmm2, %xmm1
1172; SSSE3-NEXT: pshufb %xmm2, %xmm0
1173; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001174; SSSE3-NEXT: retq
1175;
1176; SSE41-LABEL: trunc_packus_v8i32_v8i16:
1177; SSE41: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001178; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001179; SSE41-NEXT: retq
1180;
1181; AVX1-LABEL: trunc_packus_v8i32_v8i16:
1182; AVX1: # %bb.0:
1183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001184; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001185; AVX1-NEXT: vzeroupper
1186; AVX1-NEXT: retq
1187;
1188; AVX2-LABEL: trunc_packus_v8i32_v8i16:
1189; AVX2: # %bb.0:
Simon Pilgrimb4e789e2018-02-07 15:48:44 +00001190; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1191; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001192; AVX2-NEXT: vzeroupper
1193; AVX2-NEXT: retq
1194;
1195; AVX512F-LABEL: trunc_packus_v8i32_v8i16:
1196; AVX512F: # %bb.0:
1197; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1198; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1199; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1200; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1201; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001202; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001203; AVX512F-NEXT: vzeroupper
1204; AVX512F-NEXT: retq
1205;
1206; AVX512VL-LABEL: trunc_packus_v8i32_v8i16:
1207; AVX512VL: # %bb.0:
1208; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1209; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1210; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1211; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1212; AVX512VL-NEXT: vzeroupper
1213; AVX512VL-NEXT: retq
1214;
1215; AVX512BW-LABEL: trunc_packus_v8i32_v8i16:
1216; AVX512BW: # %bb.0:
1217; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
1218; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1219; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1220; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1221; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00001222; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001223; AVX512BW-NEXT: vzeroupper
1224; AVX512BW-NEXT: retq
1225;
1226; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16:
1227; AVX512BWVL: # %bb.0:
1228; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1229; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1230; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
1231; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
1232; AVX512BWVL-NEXT: vzeroupper
1233; AVX512BWVL-NEXT: retq
1234 %1 = icmp slt <8 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1235 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1236 %3 = icmp sgt <8 x i32> %2, zeroinitializer
1237 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1238 %5 = trunc <8 x i32> %4 to <8 x i16>
1239 ret <8 x i16> %5
1240}
1241
1242define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) {
1243; SSE2-LABEL: trunc_packus_v16i32_v16i16:
1244; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001245; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1246; SSE2-NEXT: movdqa %xmm6, %xmm4
1247; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1248; SSE2-NEXT: pand %xmm4, %xmm1
1249; SSE2-NEXT: pandn %xmm6, %xmm4
1250; SSE2-NEXT: por %xmm1, %xmm4
1251; SSE2-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001252; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001253; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001254; SSE2-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001255; SSE2-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001256; SSE2-NEXT: movdqa %xmm6, %xmm0
1257; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
1258; SSE2-NEXT: pand %xmm0, %xmm3
1259; SSE2-NEXT: pandn %xmm6, %xmm0
1260; SSE2-NEXT: por %xmm3, %xmm0
1261; SSE2-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001262; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001263; SSE2-NEXT: pand %xmm3, %xmm2
1264; SSE2-NEXT: pandn %xmm6, %xmm3
1265; SSE2-NEXT: por %xmm2, %xmm3
1266; SSE2-NEXT: pxor %xmm2, %xmm2
1267; SSE2-NEXT: movdqa %xmm3, %xmm1
1268; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1269; SSE2-NEXT: pand %xmm3, %xmm1
1270; SSE2-NEXT: movdqa %xmm0, %xmm3
1271; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1272; SSE2-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001273; SSE2-NEXT: movdqa %xmm5, %xmm0
1274; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001275; SSE2-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001276; SSE2-NEXT: movdqa %xmm4, %xmm5
1277; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1278; SSE2-NEXT: pand %xmm4, %xmm5
1279; SSE2-NEXT: pslld $16, %xmm5
1280; SSE2-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001281; SSE2-NEXT: pslld $16, %xmm0
1282; SSE2-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001283; SSE2-NEXT: packssdw %xmm5, %xmm0
1284; SSE2-NEXT: pslld $16, %xmm3
1285; SSE2-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001286; SSE2-NEXT: pslld $16, %xmm1
1287; SSE2-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001288; SSE2-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001289; SSE2-NEXT: retq
1290;
1291; SSSE3-LABEL: trunc_packus_v16i32_v16i16:
1292; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001293; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535]
1294; SSSE3-NEXT: movdqa %xmm6, %xmm4
1295; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
1296; SSSE3-NEXT: pand %xmm4, %xmm1
1297; SSSE3-NEXT: pandn %xmm6, %xmm4
1298; SSSE3-NEXT: por %xmm1, %xmm4
1299; SSSE3-NEXT: movdqa %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001300; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001301; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001302; SSSE3-NEXT: pandn %xmm6, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001303; SSSE3-NEXT: por %xmm0, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001304; SSSE3-NEXT: movdqa %xmm6, %xmm0
1305; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
1306; SSSE3-NEXT: pand %xmm0, %xmm3
1307; SSSE3-NEXT: pandn %xmm6, %xmm0
1308; SSSE3-NEXT: por %xmm3, %xmm0
1309; SSSE3-NEXT: movdqa %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001310; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00001311; SSSE3-NEXT: pand %xmm3, %xmm2
1312; SSSE3-NEXT: pandn %xmm6, %xmm3
1313; SSSE3-NEXT: por %xmm2, %xmm3
1314; SSSE3-NEXT: pxor %xmm2, %xmm2
1315; SSSE3-NEXT: movdqa %xmm3, %xmm1
1316; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
1317; SSSE3-NEXT: pand %xmm3, %xmm1
1318; SSSE3-NEXT: movdqa %xmm0, %xmm3
1319; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
1320; SSSE3-NEXT: pand %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001321; SSSE3-NEXT: movdqa %xmm5, %xmm0
1322; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001323; SSSE3-NEXT: pand %xmm5, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001324; SSSE3-NEXT: movdqa %xmm4, %xmm5
1325; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
1326; SSSE3-NEXT: pand %xmm4, %xmm5
1327; SSSE3-NEXT: pslld $16, %xmm5
1328; SSSE3-NEXT: psrad $16, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001329; SSSE3-NEXT: pslld $16, %xmm0
1330; SSSE3-NEXT: psrad $16, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001331; SSSE3-NEXT: packssdw %xmm5, %xmm0
1332; SSSE3-NEXT: pslld $16, %xmm3
1333; SSSE3-NEXT: psrad $16, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001334; SSSE3-NEXT: pslld $16, %xmm1
1335; SSSE3-NEXT: psrad $16, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001336; SSSE3-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001337; SSSE3-NEXT: retq
1338;
1339; SSE41-LABEL: trunc_packus_v16i32_v16i16:
1340; SSE41: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001341; SSE41-NEXT: packusdw %xmm1, %xmm0
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001342; SSE41-NEXT: packusdw %xmm3, %xmm2
1343; SSE41-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001344; SSE41-NEXT: retq
1345;
1346; AVX1-LABEL: trunc_packus_v16i32_v16i16:
1347; AVX1: # %bb.0:
1348; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001349; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001351; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1352; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1353; AVX1-NEXT: retq
1354;
1355; AVX2-LABEL: trunc_packus_v16i32_v16i16:
1356; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00001357; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
1358; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001359; AVX2-NEXT: retq
1360;
1361; AVX512-LABEL: trunc_packus_v16i32_v16i16:
1362; AVX512: # %bb.0:
1363; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1364; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1365; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
1366; AVX512-NEXT: vpmovdw %zmm0, %ymm0
1367; AVX512-NEXT: retq
1368 %1 = icmp slt <16 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1369 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1370 %3 = icmp sgt <16 x i32> %2, zeroinitializer
1371 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
1372 %5 = trunc <16 x i32> %4 to <16 x i16>
1373 ret <16 x i16> %5
1374}
1375
1376;
1377; PACKUS saturation truncation to v16i8
1378;
1379
1380define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) {
1381; SSE2-LABEL: trunc_packus_v8i64_v8i8:
1382; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001383; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1384; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001385; SSE2-NEXT: movdqa %xmm2, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001386; SSE2-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001387; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001388; SSE2-NEXT: movdqa %xmm9, %xmm7
1389; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1390; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001391; SSE2-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001392; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001393; SSE2-NEXT: pand %xmm6, %xmm4
1394; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001395; SSE2-NEXT: por %xmm4, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001396; SSE2-NEXT: pand %xmm5, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001397; SSE2-NEXT: pandn %xmm8, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001398; SSE2-NEXT: por %xmm2, %xmm5
1399; SSE2-NEXT: movdqa %xmm3, %xmm2
1400; SSE2-NEXT: pxor %xmm10, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001401; SSE2-NEXT: movdqa %xmm9, %xmm4
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001402; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001403; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001404; SSE2-NEXT: pcmpeqd %xmm9, %xmm2
1405; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001406; SSE2-NEXT: pand %xmm6, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001407; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1408; SSE2-NEXT: por %xmm7, %xmm2
1409; SSE2-NEXT: pand %xmm2, %xmm3
1410; SSE2-NEXT: pandn %xmm8, %xmm2
1411; SSE2-NEXT: por %xmm3, %xmm2
1412; SSE2-NEXT: movdqa %xmm0, %xmm3
1413; SSE2-NEXT: pxor %xmm10, %xmm3
1414; SSE2-NEXT: movdqa %xmm9, %xmm4
1415; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1416; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1417; SSE2-NEXT: pcmpeqd %xmm9, %xmm3
1418; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1419; SSE2-NEXT: pand %xmm6, %xmm7
1420; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1421; SSE2-NEXT: por %xmm7, %xmm3
1422; SSE2-NEXT: pand %xmm3, %xmm0
1423; SSE2-NEXT: pandn %xmm8, %xmm3
1424; SSE2-NEXT: por %xmm0, %xmm3
1425; SSE2-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001426; SSE2-NEXT: pxor %xmm10, %xmm0
1427; SSE2-NEXT: movdqa %xmm9, %xmm4
1428; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1429; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1430; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
1431; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1432; SSE2-NEXT: pand %xmm6, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001433; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1434; SSE2-NEXT: por %xmm0, %xmm4
1435; SSE2-NEXT: pand %xmm4, %xmm1
1436; SSE2-NEXT: pandn %xmm8, %xmm4
1437; SSE2-NEXT: por %xmm1, %xmm4
1438; SSE2-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001439; SSE2-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001440; SSE2-NEXT: movdqa %xmm0, %xmm1
1441; SSE2-NEXT: pcmpgtd %xmm10, %xmm1
1442; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1443; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001444; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001445; SSE2-NEXT: pand %xmm6, %xmm0
1446; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1447; SSE2-NEXT: por %xmm0, %xmm1
1448; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001449; SSE2-NEXT: movdqa %xmm3, %xmm0
1450; SSE2-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001451; SSE2-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00001452; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
1453; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001454; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
1455; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1456; SSE2-NEXT: pand %xmm6, %xmm7
1457; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1458; SSE2-NEXT: por %xmm7, %xmm0
1459; SSE2-NEXT: pand %xmm3, %xmm0
1460; SSE2-NEXT: packssdw %xmm1, %xmm0
1461; SSE2-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001462; SSE2-NEXT: pxor %xmm10, %xmm1
1463; SSE2-NEXT: movdqa %xmm1, %xmm3
1464; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001465; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001466; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
1467; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001468; SSE2-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001469; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1470; SSE2-NEXT: por %xmm1, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001471; SSE2-NEXT: pand %xmm2, %xmm3
1472; SSE2-NEXT: movdqa %xmm5, %xmm1
1473; SSE2-NEXT: pxor %xmm10, %xmm1
1474; SSE2-NEXT: movdqa %xmm1, %xmm2
1475; SSE2-NEXT: pcmpgtd %xmm10, %xmm2
1476; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1477; SSE2-NEXT: pcmpeqd %xmm10, %xmm1
1478; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1479; SSE2-NEXT: pand %xmm4, %xmm1
1480; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1481; SSE2-NEXT: por %xmm1, %xmm2
1482; SSE2-NEXT: pand %xmm5, %xmm2
1483; SSE2-NEXT: packssdw %xmm3, %xmm2
1484; SSE2-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001485; SSE2-NEXT: retq
1486;
1487; SSSE3-LABEL: trunc_packus_v8i64_v8i8:
1488; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001489; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1490; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001491; SSSE3-NEXT: movdqa %xmm2, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001492; SSSE3-NEXT: pxor %xmm10, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001493; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001494; SSSE3-NEXT: movdqa %xmm9, %xmm7
1495; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
1496; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001497; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5
Simon Pilgrim0be55672018-02-11 10:52:37 +00001498; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001499; SSSE3-NEXT: pand %xmm6, %xmm4
1500; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001501; SSSE3-NEXT: por %xmm4, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001502; SSSE3-NEXT: pand %xmm5, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001503; SSSE3-NEXT: pandn %xmm8, %xmm5
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001504; SSSE3-NEXT: por %xmm2, %xmm5
1505; SSSE3-NEXT: movdqa %xmm3, %xmm2
1506; SSSE3-NEXT: pxor %xmm10, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001507; SSSE3-NEXT: movdqa %xmm9, %xmm4
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001508; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001509; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001510; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2
1511; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001512; SSSE3-NEXT: pand %xmm6, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001513; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1514; SSSE3-NEXT: por %xmm7, %xmm2
1515; SSSE3-NEXT: pand %xmm2, %xmm3
1516; SSSE3-NEXT: pandn %xmm8, %xmm2
1517; SSSE3-NEXT: por %xmm3, %xmm2
1518; SSSE3-NEXT: movdqa %xmm0, %xmm3
1519; SSSE3-NEXT: pxor %xmm10, %xmm3
1520; SSSE3-NEXT: movdqa %xmm9, %xmm4
1521; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
1522; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1523; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3
1524; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
1525; SSSE3-NEXT: pand %xmm6, %xmm7
1526; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1527; SSSE3-NEXT: por %xmm7, %xmm3
1528; SSSE3-NEXT: pand %xmm3, %xmm0
1529; SSSE3-NEXT: pandn %xmm8, %xmm3
1530; SSSE3-NEXT: por %xmm0, %xmm3
1531; SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001532; SSSE3-NEXT: pxor %xmm10, %xmm0
1533; SSSE3-NEXT: movdqa %xmm9, %xmm4
1534; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4
1535; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
1536; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
1537; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1538; SSSE3-NEXT: pand %xmm6, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001539; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1540; SSSE3-NEXT: por %xmm0, %xmm4
1541; SSSE3-NEXT: pand %xmm4, %xmm1
1542; SSSE3-NEXT: pandn %xmm8, %xmm4
1543; SSSE3-NEXT: por %xmm1, %xmm4
1544; SSSE3-NEXT: movdqa %xmm4, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001545; SSSE3-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001546; SSSE3-NEXT: movdqa %xmm0, %xmm1
1547; SSSE3-NEXT: pcmpgtd %xmm10, %xmm1
1548; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1549; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001550; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001551; SSSE3-NEXT: pand %xmm6, %xmm0
1552; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1553; SSSE3-NEXT: por %xmm0, %xmm1
1554; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001555; SSSE3-NEXT: movdqa %xmm3, %xmm0
1556; SSSE3-NEXT: pxor %xmm10, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001557; SSSE3-NEXT: movdqa %xmm0, %xmm4
Simon Pilgrim0be55672018-02-11 10:52:37 +00001558; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
1559; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001560; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
1561; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1562; SSSE3-NEXT: pand %xmm6, %xmm7
1563; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1564; SSSE3-NEXT: por %xmm7, %xmm0
1565; SSSE3-NEXT: pand %xmm3, %xmm0
1566; SSSE3-NEXT: packssdw %xmm1, %xmm0
1567; SSSE3-NEXT: movdqa %xmm2, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001568; SSSE3-NEXT: pxor %xmm10, %xmm1
1569; SSSE3-NEXT: movdqa %xmm1, %xmm3
1570; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001571; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001572; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
1573; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001574; SSSE3-NEXT: pand %xmm4, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001575; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1576; SSSE3-NEXT: por %xmm1, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001577; SSSE3-NEXT: pand %xmm2, %xmm3
1578; SSSE3-NEXT: movdqa %xmm5, %xmm1
1579; SSSE3-NEXT: pxor %xmm10, %xmm1
1580; SSSE3-NEXT: movdqa %xmm1, %xmm2
1581; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2
1582; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
1583; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1
1584; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1585; SSSE3-NEXT: pand %xmm4, %xmm1
1586; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1587; SSSE3-NEXT: por %xmm1, %xmm2
1588; SSSE3-NEXT: pand %xmm5, %xmm2
1589; SSSE3-NEXT: packssdw %xmm3, %xmm2
1590; SSSE3-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001591; SSSE3-NEXT: retq
1592;
1593; SSE41-LABEL: trunc_packus_v8i64_v8i8:
1594; SSE41: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001595; SSE41-NEXT: movdqa %xmm0, %xmm9
Simon Pilgrim0be55672018-02-11 10:52:37 +00001596; SSE41-NEXT: movapd {{.*#+}} xmm7 = [255,255]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001597; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim0be55672018-02-11 10:52:37 +00001598; SSE41-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001599; SSE41-NEXT: pxor %xmm10, %xmm0
1600; SSE41-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
1601; SSE41-NEXT: movdqa %xmm11, %xmm4
1602; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
1603; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1604; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
1605; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1606; SSE41-NEXT: pand %xmm5, %xmm6
1607; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1608; SSE41-NEXT: por %xmm6, %xmm0
1609; SSE41-NEXT: movapd %xmm7, %xmm8
1610; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8
1611; SSE41-NEXT: movdqa %xmm3, %xmm0
1612; SSE41-NEXT: pxor %xmm10, %xmm0
1613; SSE41-NEXT: movdqa %xmm11, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001614; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
1615; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001616; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001617; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1618; SSE41-NEXT: pand %xmm4, %xmm5
1619; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1620; SSE41-NEXT: por %xmm5, %xmm0
1621; SSE41-NEXT: movapd %xmm7, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00001622; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001623; SSE41-NEXT: movdqa %xmm9, %xmm0
1624; SSE41-NEXT: pxor %xmm10, %xmm0
1625; SSE41-NEXT: movdqa %xmm11, %xmm3
1626; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001627; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001628; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00001629; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1630; SSE41-NEXT: pand %xmm4, %xmm5
1631; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1632; SSE41-NEXT: por %xmm5, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001633; SSE41-NEXT: movapd %xmm7, %xmm4
1634; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm4
1635; SSE41-NEXT: movdqa %xmm1, %xmm0
1636; SSE41-NEXT: pxor %xmm10, %xmm0
1637; SSE41-NEXT: movdqa %xmm11, %xmm3
1638; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1639; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1640; SSE41-NEXT: pcmpeqd %xmm11, %xmm0
1641; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1642; SSE41-NEXT: pand %xmm5, %xmm6
1643; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1644; SSE41-NEXT: por %xmm6, %xmm0
1645; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7
Simon Pilgrim0be55672018-02-11 10:52:37 +00001646; SSE41-NEXT: pxor %xmm3, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001647; SSE41-NEXT: movapd %xmm7, %xmm0
1648; SSE41-NEXT: xorpd %xmm10, %xmm0
1649; SSE41-NEXT: movapd %xmm0, %xmm1
1650; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
1651; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1652; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1653; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1654; SSE41-NEXT: pand %xmm5, %xmm6
1655; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1656; SSE41-NEXT: por %xmm6, %xmm0
1657; SSE41-NEXT: pxor %xmm5, %xmm5
1658; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm5
1659; SSE41-NEXT: movapd %xmm4, %xmm0
1660; SSE41-NEXT: xorpd %xmm10, %xmm0
1661; SSE41-NEXT: movapd %xmm0, %xmm1
1662; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
1663; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2]
1664; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1665; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1666; SSE41-NEXT: pand %xmm6, %xmm7
1667; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1668; SSE41-NEXT: por %xmm7, %xmm0
1669; SSE41-NEXT: pxor %xmm1, %xmm1
1670; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
1671; SSE41-NEXT: packssdw %xmm5, %xmm1
1672; SSE41-NEXT: movapd %xmm2, %xmm0
1673; SSE41-NEXT: xorpd %xmm10, %xmm0
1674; SSE41-NEXT: movapd %xmm0, %xmm4
1675; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
1676; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1677; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1678; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1679; SSE41-NEXT: pand %xmm5, %xmm6
1680; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1681; SSE41-NEXT: por %xmm6, %xmm0
1682; SSE41-NEXT: pxor %xmm4, %xmm4
1683; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4
1684; SSE41-NEXT: movapd %xmm8, %xmm0
1685; SSE41-NEXT: xorpd %xmm10, %xmm0
1686; SSE41-NEXT: movapd %xmm0, %xmm2
1687; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
1688; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
1689; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
1690; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1691; SSE41-NEXT: pand %xmm5, %xmm6
1692; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
1693; SSE41-NEXT: por %xmm6, %xmm0
1694; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3
1695; SSE41-NEXT: packssdw %xmm4, %xmm3
1696; SSE41-NEXT: packssdw %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00001697; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001698; SSE41-NEXT: retq
1699;
1700; AVX1-LABEL: trunc_packus_v8i64_v8i8:
1701; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00001702; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [255,255,255,255]
1703; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1704; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
1705; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1706; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
1707; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
1708; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1709; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1710; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1711; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm4
1712; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
1713; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001714; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001715; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001716; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1717; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5
1718; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm6
1719; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001720; AVX1-NEXT: vpcmpgtq %xmm2, %xmm7, %xmm2
1721; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001722; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001723; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1724; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
1725; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
1726; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1727; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001728; AVX1-NEXT: vzeroupper
1729; AVX1-NEXT: retq
1730;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00001731; AVX2-LABEL: trunc_packus_v8i64_v8i8:
1732; AVX2: # %bb.0:
1733; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
1734; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
1735; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
1736; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
1737; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
1738; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1739; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
1740; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm1
1741; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
1742; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
1743; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1744; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1745; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1746; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
1747; AVX2-NEXT: vzeroupper
1748; AVX2-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00001749;
1750; AVX512-LABEL: trunc_packus_v8i64_v8i8:
1751; AVX512: # %bb.0:
1752; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1753; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1754; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
1755; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1756; AVX512-NEXT: vzeroupper
1757; AVX512-NEXT: retq
1758 %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1759 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
1760 %3 = icmp sgt <8 x i64> %2, zeroinitializer
1761 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
1762 %5 = trunc <8 x i64> %4 to <8 x i8>
1763 ret <8 x i8> %5
1764}
1765
Simon Pilgrim689d8132018-02-15 17:48:34 +00001766define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
1767; SSE2-LABEL: trunc_packus_v8i64_v8i8_store:
1768; SSE2: # %bb.0:
1769; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1770; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1771; SSE2-NEXT: movdqa %xmm3, %xmm4
1772; SSE2-NEXT: pxor %xmm10, %xmm4
1773; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1774; SSE2-NEXT: movdqa %xmm9, %xmm6
1775; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
1776; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1777; SSE2-NEXT: pcmpeqd %xmm9, %xmm4
1778; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1779; SSE2-NEXT: pand %xmm7, %xmm4
1780; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm6[1,1,3,3]
1781; SSE2-NEXT: por %xmm4, %xmm11
1782; SSE2-NEXT: pand %xmm11, %xmm3
1783; SSE2-NEXT: pandn %xmm8, %xmm11
1784; SSE2-NEXT: por %xmm3, %xmm11
1785; SSE2-NEXT: movdqa %xmm2, %xmm3
1786; SSE2-NEXT: pxor %xmm10, %xmm3
1787; SSE2-NEXT: movdqa %xmm9, %xmm4
1788; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1789; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
1790; SSE2-NEXT: pcmpeqd %xmm9, %xmm3
1791; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1792; SSE2-NEXT: pand %xmm7, %xmm5
1793; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1794; SSE2-NEXT: por %xmm5, %xmm3
1795; SSE2-NEXT: pand %xmm3, %xmm2
1796; SSE2-NEXT: pandn %xmm8, %xmm3
1797; SSE2-NEXT: por %xmm2, %xmm3
1798; SSE2-NEXT: movdqa %xmm1, %xmm2
1799; SSE2-NEXT: pxor %xmm10, %xmm2
1800; SSE2-NEXT: movdqa %xmm9, %xmm4
1801; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1802; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1803; SSE2-NEXT: pcmpeqd %xmm9, %xmm2
1804; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1805; SSE2-NEXT: pand %xmm5, %xmm7
1806; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1807; SSE2-NEXT: por %xmm7, %xmm2
1808; SSE2-NEXT: pand %xmm2, %xmm1
1809; SSE2-NEXT: pandn %xmm8, %xmm2
1810; SSE2-NEXT: por %xmm1, %xmm2
1811; SSE2-NEXT: movdqa %xmm0, %xmm1
1812; SSE2-NEXT: pxor %xmm10, %xmm1
1813; SSE2-NEXT: movdqa %xmm9, %xmm4
1814; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1815; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1816; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
1817; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1818; SSE2-NEXT: pand %xmm5, %xmm7
1819; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1820; SSE2-NEXT: por %xmm7, %xmm1
1821; SSE2-NEXT: pand %xmm1, %xmm0
1822; SSE2-NEXT: pandn %xmm8, %xmm1
1823; SSE2-NEXT: por %xmm0, %xmm1
1824; SSE2-NEXT: movdqa %xmm1, %xmm0
1825; SSE2-NEXT: pxor %xmm10, %xmm0
1826; SSE2-NEXT: movdqa %xmm0, %xmm4
1827; SSE2-NEXT: pcmpgtd %xmm10, %xmm4
1828; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1829; SSE2-NEXT: pcmpeqd %xmm10, %xmm0
1830; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1831; SSE2-NEXT: pand %xmm5, %xmm7
1832; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1833; SSE2-NEXT: por %xmm7, %xmm0
1834; SSE2-NEXT: movdqa %xmm2, %xmm4
1835; SSE2-NEXT: pxor %xmm10, %xmm4
1836; SSE2-NEXT: movdqa %xmm4, %xmm5
1837; SSE2-NEXT: pcmpgtd %xmm10, %xmm5
1838; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1839; SSE2-NEXT: pcmpeqd %xmm10, %xmm4
1840; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1841; SSE2-NEXT: pand %xmm7, %xmm4
1842; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[1,1,3,3]
1843; SSE2-NEXT: por %xmm4, %xmm7
1844; SSE2-NEXT: movdqa %xmm3, %xmm4
1845; SSE2-NEXT: pxor %xmm10, %xmm4
1846; SSE2-NEXT: movdqa %xmm4, %xmm5
1847; SSE2-NEXT: pcmpgtd %xmm10, %xmm5
1848; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1849; SSE2-NEXT: pcmpeqd %xmm10, %xmm4
1850; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1851; SSE2-NEXT: pand %xmm9, %xmm6
1852; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1853; SSE2-NEXT: por %xmm6, %xmm4
1854; SSE2-NEXT: movdqa %xmm11, %xmm5
1855; SSE2-NEXT: pxor %xmm10, %xmm5
1856; SSE2-NEXT: movdqa %xmm5, %xmm6
1857; SSE2-NEXT: pcmpgtd %xmm10, %xmm6
1858; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
1859; SSE2-NEXT: pcmpeqd %xmm10, %xmm5
1860; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1861; SSE2-NEXT: pand %xmm9, %xmm5
1862; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1863; SSE2-NEXT: por %xmm5, %xmm6
1864; SSE2-NEXT: pand %xmm8, %xmm6
1865; SSE2-NEXT: pand %xmm11, %xmm6
1866; SSE2-NEXT: pand %xmm8, %xmm4
1867; SSE2-NEXT: pand %xmm3, %xmm4
1868; SSE2-NEXT: packuswb %xmm6, %xmm4
1869; SSE2-NEXT: pand %xmm8, %xmm7
1870; SSE2-NEXT: pand %xmm2, %xmm7
1871; SSE2-NEXT: pand %xmm8, %xmm0
1872; SSE2-NEXT: pand %xmm1, %xmm0
1873; SSE2-NEXT: packuswb %xmm7, %xmm0
1874; SSE2-NEXT: packuswb %xmm4, %xmm0
1875; SSE2-NEXT: packuswb %xmm0, %xmm0
1876; SSE2-NEXT: movq %xmm0, (%rdi)
1877; SSE2-NEXT: retq
1878;
1879; SSSE3-LABEL: trunc_packus_v8i64_v8i8_store:
1880; SSSE3: # %bb.0:
1881; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255]
1882; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1883; SSSE3-NEXT: movdqa %xmm3, %xmm4
1884; SSSE3-NEXT: pxor %xmm10, %xmm4
1885; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903]
1886; SSSE3-NEXT: movdqa %xmm9, %xmm6
1887; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
1888; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1889; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4
1890; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1891; SSSE3-NEXT: pand %xmm7, %xmm4
1892; SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm6[1,1,3,3]
1893; SSSE3-NEXT: por %xmm4, %xmm11
1894; SSSE3-NEXT: pand %xmm11, %xmm3
1895; SSSE3-NEXT: pandn %xmm8, %xmm11
1896; SSSE3-NEXT: por %xmm3, %xmm11
1897; SSSE3-NEXT: movdqa %xmm2, %xmm3
1898; SSSE3-NEXT: pxor %xmm10, %xmm3
1899; SSSE3-NEXT: movdqa %xmm9, %xmm4
1900; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
1901; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
1902; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3
1903; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1904; SSSE3-NEXT: pand %xmm7, %xmm5
1905; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1906; SSSE3-NEXT: por %xmm5, %xmm3
1907; SSSE3-NEXT: pand %xmm3, %xmm2
1908; SSSE3-NEXT: pandn %xmm8, %xmm3
1909; SSSE3-NEXT: por %xmm2, %xmm3
1910; SSSE3-NEXT: movdqa %xmm1, %xmm2
1911; SSSE3-NEXT: pxor %xmm10, %xmm2
1912; SSSE3-NEXT: movdqa %xmm9, %xmm4
1913; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
1914; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1915; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2
1916; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1917; SSSE3-NEXT: pand %xmm5, %xmm7
1918; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1919; SSSE3-NEXT: por %xmm7, %xmm2
1920; SSSE3-NEXT: pand %xmm2, %xmm1
1921; SSSE3-NEXT: pandn %xmm8, %xmm2
1922; SSSE3-NEXT: por %xmm1, %xmm2
1923; SSSE3-NEXT: movdqa %xmm0, %xmm1
1924; SSSE3-NEXT: pxor %xmm10, %xmm1
1925; SSSE3-NEXT: movdqa %xmm9, %xmm4
1926; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
1927; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1928; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
1929; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
1930; SSSE3-NEXT: pand %xmm5, %xmm7
1931; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
1932; SSSE3-NEXT: por %xmm7, %xmm1
1933; SSSE3-NEXT: pand %xmm1, %xmm0
1934; SSSE3-NEXT: pandn %xmm8, %xmm1
1935; SSSE3-NEXT: por %xmm0, %xmm1
1936; SSSE3-NEXT: movdqa %xmm1, %xmm0
1937; SSSE3-NEXT: pxor %xmm10, %xmm0
1938; SSSE3-NEXT: movdqa %xmm0, %xmm4
1939; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4
1940; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1941; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0
1942; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
1943; SSSE3-NEXT: pand %xmm5, %xmm7
1944; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
1945; SSSE3-NEXT: por %xmm7, %xmm0
1946; SSSE3-NEXT: movdqa %xmm2, %xmm4
1947; SSSE3-NEXT: pxor %xmm10, %xmm4
1948; SSSE3-NEXT: movdqa %xmm4, %xmm5
1949; SSSE3-NEXT: pcmpgtd %xmm10, %xmm5
1950; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
1951; SSSE3-NEXT: pcmpeqd %xmm10, %xmm4
1952; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1953; SSSE3-NEXT: pand %xmm7, %xmm4
1954; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[1,1,3,3]
1955; SSSE3-NEXT: por %xmm4, %xmm7
1956; SSSE3-NEXT: movdqa %xmm3, %xmm4
1957; SSSE3-NEXT: pxor %xmm10, %xmm4
1958; SSSE3-NEXT: movdqa %xmm4, %xmm5
1959; SSSE3-NEXT: pcmpgtd %xmm10, %xmm5
1960; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
1961; SSSE3-NEXT: pcmpeqd %xmm10, %xmm4
1962; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
1963; SSSE3-NEXT: pand %xmm9, %xmm6
1964; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1965; SSSE3-NEXT: por %xmm6, %xmm4
1966; SSSE3-NEXT: movdqa %xmm11, %xmm5
1967; SSSE3-NEXT: pxor %xmm10, %xmm5
1968; SSSE3-NEXT: movdqa %xmm5, %xmm6
1969; SSSE3-NEXT: pcmpgtd %xmm10, %xmm6
1970; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
1971; SSSE3-NEXT: pcmpeqd %xmm10, %xmm5
1972; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1973; SSSE3-NEXT: pand %xmm9, %xmm5
1974; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1975; SSSE3-NEXT: por %xmm5, %xmm6
1976; SSSE3-NEXT: pand %xmm8, %xmm6
1977; SSSE3-NEXT: pand %xmm11, %xmm6
1978; SSSE3-NEXT: pand %xmm8, %xmm4
1979; SSSE3-NEXT: pand %xmm3, %xmm4
1980; SSSE3-NEXT: packuswb %xmm6, %xmm4
1981; SSSE3-NEXT: pand %xmm8, %xmm7
1982; SSSE3-NEXT: pand %xmm2, %xmm7
1983; SSSE3-NEXT: pand %xmm8, %xmm0
1984; SSSE3-NEXT: pand %xmm1, %xmm0
1985; SSSE3-NEXT: packuswb %xmm7, %xmm0
1986; SSSE3-NEXT: packuswb %xmm4, %xmm0
1987; SSSE3-NEXT: packuswb %xmm0, %xmm0
1988; SSSE3-NEXT: movq %xmm0, (%rdi)
1989; SSSE3-NEXT: retq
1990;
1991; SSE41-LABEL: trunc_packus_v8i64_v8i8_store:
1992; SSE41: # %bb.0:
1993; SSE41-NEXT: movdqa %xmm0, %xmm9
1994; SSE41-NEXT: movapd {{.*#+}} xmm8 = [255,255]
1995; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
1996; SSE41-NEXT: movdqa %xmm3, %xmm0
1997; SSE41-NEXT: pxor %xmm10, %xmm0
1998; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903]
1999; SSE41-NEXT: movdqa %xmm5, %xmm4
2000; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
2001; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2]
2002; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
2003; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2004; SSE41-NEXT: pand %xmm7, %xmm6
2005; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2006; SSE41-NEXT: por %xmm6, %xmm0
2007; SSE41-NEXT: movapd %xmm8, %xmm11
2008; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm11
2009; SSE41-NEXT: movdqa %xmm2, %xmm0
2010; SSE41-NEXT: pxor %xmm10, %xmm0
2011; SSE41-NEXT: movdqa %xmm5, %xmm3
2012; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
2013; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2014; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
2015; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2016; SSE41-NEXT: pand %xmm4, %xmm6
2017; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2018; SSE41-NEXT: por %xmm6, %xmm0
2019; SSE41-NEXT: movapd %xmm8, %xmm3
2020; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3
2021; SSE41-NEXT: movdqa %xmm1, %xmm0
2022; SSE41-NEXT: pxor %xmm10, %xmm0
2023; SSE41-NEXT: movdqa %xmm5, %xmm2
2024; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2025; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2026; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
2027; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2028; SSE41-NEXT: pand %xmm4, %xmm6
2029; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2030; SSE41-NEXT: por %xmm6, %xmm0
2031; SSE41-NEXT: movapd %xmm8, %xmm4
2032; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4
2033; SSE41-NEXT: movdqa %xmm9, %xmm0
2034; SSE41-NEXT: pxor %xmm10, %xmm0
2035; SSE41-NEXT: movdqa %xmm5, %xmm1
2036; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
2037; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2038; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
2039; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2040; SSE41-NEXT: pand %xmm2, %xmm5
2041; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2042; SSE41-NEXT: por %xmm5, %xmm0
2043; SSE41-NEXT: movapd %xmm8, %xmm5
2044; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm5
2045; SSE41-NEXT: movapd %xmm5, %xmm0
2046; SSE41-NEXT: xorpd %xmm10, %xmm0
2047; SSE41-NEXT: movapd %xmm0, %xmm1
2048; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
2049; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2050; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2051; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2052; SSE41-NEXT: pand %xmm2, %xmm6
2053; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2054; SSE41-NEXT: por %xmm6, %xmm0
2055; SSE41-NEXT: pxor %xmm2, %xmm2
2056; SSE41-NEXT: pxor %xmm1, %xmm1
2057; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1
2058; SSE41-NEXT: movapd %xmm4, %xmm0
2059; SSE41-NEXT: xorpd %xmm10, %xmm0
2060; SSE41-NEXT: movapd %xmm0, %xmm5
2061; SSE41-NEXT: pcmpgtd %xmm10, %xmm5
2062; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2063; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2064; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2065; SSE41-NEXT: pand %xmm6, %xmm7
2066; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
2067; SSE41-NEXT: por %xmm7, %xmm0
2068; SSE41-NEXT: pxor %xmm5, %xmm5
2069; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm5
2070; SSE41-NEXT: movapd %xmm3, %xmm0
2071; SSE41-NEXT: xorpd %xmm10, %xmm0
2072; SSE41-NEXT: movapd %xmm0, %xmm4
2073; SSE41-NEXT: pcmpgtd %xmm10, %xmm4
2074; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2075; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2076; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2077; SSE41-NEXT: pand %xmm6, %xmm7
2078; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2079; SSE41-NEXT: por %xmm7, %xmm0
2080; SSE41-NEXT: pxor %xmm4, %xmm4
2081; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
2082; SSE41-NEXT: movapd %xmm11, %xmm0
2083; SSE41-NEXT: xorpd %xmm10, %xmm0
2084; SSE41-NEXT: movapd %xmm0, %xmm3
2085; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2086; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
2087; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2088; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2089; SSE41-NEXT: pand %xmm6, %xmm7
2090; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2091; SSE41-NEXT: por %xmm7, %xmm0
2092; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm2
2093; SSE41-NEXT: andpd %xmm8, %xmm2
2094; SSE41-NEXT: andpd %xmm8, %xmm4
2095; SSE41-NEXT: packuswb %xmm2, %xmm4
2096; SSE41-NEXT: andpd %xmm8, %xmm5
2097; SSE41-NEXT: andpd %xmm8, %xmm1
2098; SSE41-NEXT: packuswb %xmm5, %xmm1
2099; SSE41-NEXT: packuswb %xmm4, %xmm1
2100; SSE41-NEXT: packuswb %xmm1, %xmm1
2101; SSE41-NEXT: movq %xmm1, (%rdi)
2102; SSE41-NEXT: retq
2103;
2104; AVX1-LABEL: trunc_packus_v8i64_v8i8_store:
2105; AVX1: # %bb.0:
2106; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [255,255,255,255]
2107; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2108; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
2109; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
2110; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
2111; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
2112; AVX1-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2113; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2114; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
2115; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5
2116; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
2117; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2118; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2119; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm8
2120; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
2121; AVX1-NEXT: vpcmpgtq %xmm2, %xmm5, %xmm6
2122; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm7
2123; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2124; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2125; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
2126; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2127; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
2128; AVX1-NEXT: vpand %xmm1, %xmm7, %xmm1
2129; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
2130; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm2
2131; AVX1-NEXT: vpand %xmm2, %xmm6, %xmm2
2132; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
2133; AVX1-NEXT: vpand %xmm0, %xmm8, %xmm0
2134; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2135; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2136; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
2137; AVX1-NEXT: vmovq %xmm0, (%rdi)
2138; AVX1-NEXT: vzeroupper
2139; AVX1-NEXT: retq
2140;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002141; AVX2-LABEL: trunc_packus_v8i64_v8i8_store:
2142; AVX2: # %bb.0:
2143; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
2144; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
2145; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
2146; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
2147; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
2148; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2149; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3
2150; AVX2-NEXT: vpand %ymm0, %ymm3, %ymm0
2151; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
2152; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
2153; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2154; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2155; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2156; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2157; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2158; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2159; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2160; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2161; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2162; AVX2-NEXT: vmovq %xmm0, (%rdi)
2163; AVX2-NEXT: vzeroupper
2164; AVX2-NEXT: retq
Simon Pilgrim689d8132018-02-15 17:48:34 +00002165;
2166; AVX512-LABEL: trunc_packus_v8i64_v8i8_store:
2167; AVX512: # %bb.0:
2168; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
2169; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
2170; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
2171; AVX512-NEXT: vpmovqb %zmm0, (%rdi)
2172; AVX512-NEXT: vzeroupper
2173; AVX512-NEXT: retq
2174 %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2175 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2176 %3 = icmp sgt <8 x i64> %2, zeroinitializer
2177 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer
2178 %5 = trunc <8 x i64> %4 to <8 x i8>
2179 store <8 x i8> %5, <8 x i8> *%p1
2180 ret void
2181}
2182
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002183define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) {
2184; SSE2-LABEL: trunc_packus_v16i64_v16i8:
2185; SSE2: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002186; SSE2-NEXT: movdqa {{.*#+}} xmm13 = [255,255]
2187; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
2188; SSE2-NEXT: movdqa %xmm7, %xmm8
2189; SSE2-NEXT: pxor %xmm9, %xmm8
2190; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
2191; SSE2-NEXT: movdqa %xmm11, %xmm10
2192; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
2193; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
2194; SSE2-NEXT: pcmpeqd %xmm11, %xmm8
2195; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
2196; SSE2-NEXT: pand %xmm12, %xmm8
2197; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
2198; SSE2-NEXT: por %xmm8, %xmm10
2199; SSE2-NEXT: pand %xmm10, %xmm7
2200; SSE2-NEXT: pandn %xmm13, %xmm10
2201; SSE2-NEXT: por %xmm7, %xmm10
2202; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
2203; SSE2-NEXT: movdqa %xmm6, %xmm7
2204; SSE2-NEXT: pxor %xmm9, %xmm7
2205; SSE2-NEXT: movdqa %xmm11, %xmm8
2206; SSE2-NEXT: pcmpgtd %xmm7, %xmm8
2207; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm8[0,0,2,2]
2208; SSE2-NEXT: pcmpeqd %xmm11, %xmm7
2209; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2210; SSE2-NEXT: pand %xmm12, %xmm10
2211; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm8[1,1,3,3]
2212; SSE2-NEXT: por %xmm10, %xmm14
2213; SSE2-NEXT: pand %xmm14, %xmm6
2214; SSE2-NEXT: pandn %xmm13, %xmm14
2215; SSE2-NEXT: por %xmm6, %xmm14
2216; SSE2-NEXT: movdqa %xmm5, %xmm6
2217; SSE2-NEXT: pxor %xmm9, %xmm6
2218; SSE2-NEXT: movdqa %xmm11, %xmm7
2219; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
2220; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
2221; SSE2-NEXT: pcmpeqd %xmm11, %xmm6
2222; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2223; SSE2-NEXT: pand %xmm8, %xmm6
2224; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
2225; SSE2-NEXT: por %xmm6, %xmm7
2226; SSE2-NEXT: pand %xmm7, %xmm5
2227; SSE2-NEXT: pandn %xmm13, %xmm7
2228; SSE2-NEXT: por %xmm5, %xmm7
2229; SSE2-NEXT: movdqa %xmm7, %xmm10
Quentin Colombet48abac82018-02-17 03:05:33 +00002230; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002231; SSE2-NEXT: movdqa %xmm4, %xmm5
2232; SSE2-NEXT: pxor %xmm9, %xmm5
2233; SSE2-NEXT: movdqa %xmm11, %xmm6
2234; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
2235; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2236; SSE2-NEXT: pcmpeqd %xmm11, %xmm5
2237; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2238; SSE2-NEXT: pand %xmm7, %xmm5
2239; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2240; SSE2-NEXT: por %xmm5, %xmm6
2241; SSE2-NEXT: pand %xmm6, %xmm4
2242; SSE2-NEXT: pandn %xmm13, %xmm6
2243; SSE2-NEXT: por %xmm4, %xmm6
2244; SSE2-NEXT: movdqa %xmm6, %xmm7
Quentin Colombet48abac82018-02-17 03:05:33 +00002245; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002246; SSE2-NEXT: movdqa %xmm3, %xmm4
2247; SSE2-NEXT: pxor %xmm9, %xmm4
2248; SSE2-NEXT: movdqa %xmm11, %xmm5
2249; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
2250; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2251; SSE2-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002252; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002253; SSE2-NEXT: pand %xmm6, %xmm4
2254; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2255; SSE2-NEXT: por %xmm4, %xmm5
2256; SSE2-NEXT: pand %xmm5, %xmm3
2257; SSE2-NEXT: pandn %xmm13, %xmm5
2258; SSE2-NEXT: por %xmm3, %xmm5
2259; SSE2-NEXT: movdqa %xmm5, %xmm8
Quentin Colombet48abac82018-02-17 03:05:33 +00002260; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002261; SSE2-NEXT: movdqa %xmm2, %xmm3
2262; SSE2-NEXT: pxor %xmm9, %xmm3
2263; SSE2-NEXT: movdqa %xmm11, %xmm4
2264; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
2265; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2266; SSE2-NEXT: pcmpeqd %xmm11, %xmm3
2267; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2268; SSE2-NEXT: pand %xmm5, %xmm3
2269; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm4[1,1,3,3]
2270; SSE2-NEXT: por %xmm3, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002271; SSE2-NEXT: pand %xmm15, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002272; SSE2-NEXT: pandn %xmm13, %xmm15
2273; SSE2-NEXT: movdqa %xmm13, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002274; SSE2-NEXT: por %xmm2, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002275; SSE2-NEXT: movdqa %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002276; SSE2-NEXT: pxor %xmm9, %xmm2
2277; SSE2-NEXT: movdqa %xmm11, %xmm3
2278; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
2279; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2280; SSE2-NEXT: pcmpeqd %xmm11, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002281; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002282; SSE2-NEXT: pand %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002283; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
2284; SSE2-NEXT: por %xmm2, %xmm13
2285; SSE2-NEXT: pand %xmm13, %xmm1
2286; SSE2-NEXT: pandn %xmm5, %xmm13
2287; SSE2-NEXT: por %xmm1, %xmm13
2288; SSE2-NEXT: movdqa %xmm0, %xmm1
2289; SSE2-NEXT: pxor %xmm9, %xmm1
2290; SSE2-NEXT: movdqa %xmm11, %xmm2
2291; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2292; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2293; SSE2-NEXT: pcmpeqd %xmm11, %xmm1
2294; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2295; SSE2-NEXT: pand %xmm4, %xmm1
2296; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm2[1,1,3,3]
2297; SSE2-NEXT: por %xmm1, %xmm11
2298; SSE2-NEXT: pand %xmm11, %xmm0
2299; SSE2-NEXT: pandn %xmm5, %xmm11
2300; SSE2-NEXT: por %xmm0, %xmm11
2301; SSE2-NEXT: movdqa %xmm11, %xmm0
2302; SSE2-NEXT: pxor %xmm9, %xmm0
2303; SSE2-NEXT: movdqa %xmm0, %xmm1
2304; SSE2-NEXT: pcmpgtd %xmm9, %xmm1
2305; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2306; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2307; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2308; SSE2-NEXT: pand %xmm4, %xmm0
2309; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2310; SSE2-NEXT: por %xmm0, %xmm2
2311; SSE2-NEXT: movdqa %xmm13, %xmm0
2312; SSE2-NEXT: pxor %xmm9, %xmm0
2313; SSE2-NEXT: movdqa %xmm0, %xmm4
2314; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
2315; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2316; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2317; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2318; SSE2-NEXT: pand %xmm5, %xmm0
2319; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm4[1,1,3,3]
2320; SSE2-NEXT: por %xmm0, %xmm12
2321; SSE2-NEXT: movdqa %xmm15, %xmm0
2322; SSE2-NEXT: pxor %xmm9, %xmm0
2323; SSE2-NEXT: movdqa %xmm0, %xmm4
2324; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
2325; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2326; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2327; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2328; SSE2-NEXT: pand %xmm5, %xmm0
2329; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
2330; SSE2-NEXT: por %xmm0, %xmm6
2331; SSE2-NEXT: movdqa %xmm8, %xmm0
2332; SSE2-NEXT: pxor %xmm9, %xmm0
2333; SSE2-NEXT: movdqa %xmm0, %xmm4
2334; SSE2-NEXT: pcmpgtd %xmm9, %xmm4
2335; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2336; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2337; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2338; SSE2-NEXT: pand %xmm5, %xmm0
2339; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm4[1,1,3,3]
2340; SSE2-NEXT: por %xmm0, %xmm8
2341; SSE2-NEXT: movdqa %xmm7, %xmm0
2342; SSE2-NEXT: pxor %xmm9, %xmm0
2343; SSE2-NEXT: movdqa %xmm0, %xmm5
2344; SSE2-NEXT: pcmpgtd %xmm9, %xmm5
2345; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2346; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2347; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
2348; SSE2-NEXT: pand %xmm7, %xmm0
2349; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2350; SSE2-NEXT: por %xmm0, %xmm5
2351; SSE2-NEXT: movdqa %xmm10, %xmm0
2352; SSE2-NEXT: pxor %xmm9, %xmm0
2353; SSE2-NEXT: movdqa %xmm0, %xmm7
2354; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
2355; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2356; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2357; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
2358; SSE2-NEXT: pand %xmm10, %xmm0
2359; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2360; SSE2-NEXT: por %xmm0, %xmm10
2361; SSE2-NEXT: movdqa %xmm14, %xmm0
2362; SSE2-NEXT: pxor %xmm9, %xmm0
2363; SSE2-NEXT: movdqa %xmm0, %xmm7
2364; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
2365; SSE2-NEXT: pcmpeqd %xmm9, %xmm0
2366; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2367; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2368; SSE2-NEXT: pand %xmm0, %xmm1
2369; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2370; SSE2-NEXT: por %xmm1, %xmm0
2371; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload
2372; SSE2-NEXT: movdqa %xmm4, %xmm1
2373; SSE2-NEXT: pxor %xmm9, %xmm1
2374; SSE2-NEXT: movdqa %xmm1, %xmm7
2375; SSE2-NEXT: pcmpgtd %xmm9, %xmm7
2376; SSE2-NEXT: pcmpeqd %xmm9, %xmm1
2377; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2378; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002379; SSE2-NEXT: pand %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002380; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2381; SSE2-NEXT: por %xmm1, %xmm3
2382; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
2383; SSE2-NEXT: pand %xmm1, %xmm3
2384; SSE2-NEXT: pand %xmm4, %xmm3
2385; SSE2-NEXT: pand %xmm1, %xmm0
2386; SSE2-NEXT: pand %xmm14, %xmm0
2387; SSE2-NEXT: packuswb %xmm3, %xmm0
2388; SSE2-NEXT: pand %xmm1, %xmm10
2389; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Folded Reload
2390; SSE2-NEXT: pand %xmm1, %xmm5
2391; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Folded Reload
2392; SSE2-NEXT: packuswb %xmm10, %xmm5
2393; SSE2-NEXT: packuswb %xmm0, %xmm5
2394; SSE2-NEXT: pand %xmm1, %xmm8
2395; SSE2-NEXT: pand -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
2396; SSE2-NEXT: pand %xmm1, %xmm6
2397; SSE2-NEXT: pand %xmm15, %xmm6
2398; SSE2-NEXT: packuswb %xmm8, %xmm6
2399; SSE2-NEXT: pand %xmm1, %xmm12
2400; SSE2-NEXT: pand %xmm13, %xmm12
2401; SSE2-NEXT: pand %xmm1, %xmm2
2402; SSE2-NEXT: pand %xmm11, %xmm2
2403; SSE2-NEXT: packuswb %xmm12, %xmm2
2404; SSE2-NEXT: packuswb %xmm6, %xmm2
2405; SSE2-NEXT: packuswb %xmm5, %xmm2
2406; SSE2-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002407; SSE2-NEXT: retq
2408;
2409; SSSE3-LABEL: trunc_packus_v16i64_v16i8:
2410; SSSE3: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002411; SSSE3-NEXT: movdqa {{.*#+}} xmm13 = [255,255]
2412; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
2413; SSSE3-NEXT: movdqa %xmm7, %xmm8
2414; SSSE3-NEXT: pxor %xmm9, %xmm8
2415; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483903,2147483903]
2416; SSSE3-NEXT: movdqa %xmm11, %xmm10
2417; SSSE3-NEXT: pcmpgtd %xmm8, %xmm10
2418; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
2419; SSSE3-NEXT: pcmpeqd %xmm11, %xmm8
2420; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
2421; SSSE3-NEXT: pand %xmm12, %xmm8
2422; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
2423; SSSE3-NEXT: por %xmm8, %xmm10
2424; SSSE3-NEXT: pand %xmm10, %xmm7
2425; SSSE3-NEXT: pandn %xmm13, %xmm10
2426; SSSE3-NEXT: por %xmm7, %xmm10
2427; SSSE3-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
2428; SSSE3-NEXT: movdqa %xmm6, %xmm7
2429; SSSE3-NEXT: pxor %xmm9, %xmm7
2430; SSSE3-NEXT: movdqa %xmm11, %xmm8
2431; SSSE3-NEXT: pcmpgtd %xmm7, %xmm8
2432; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm8[0,0,2,2]
2433; SSSE3-NEXT: pcmpeqd %xmm11, %xmm7
2434; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2435; SSSE3-NEXT: pand %xmm12, %xmm10
2436; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm8[1,1,3,3]
2437; SSSE3-NEXT: por %xmm10, %xmm14
2438; SSSE3-NEXT: pand %xmm14, %xmm6
2439; SSSE3-NEXT: pandn %xmm13, %xmm14
2440; SSSE3-NEXT: por %xmm6, %xmm14
2441; SSSE3-NEXT: movdqa %xmm5, %xmm6
2442; SSSE3-NEXT: pxor %xmm9, %xmm6
2443; SSSE3-NEXT: movdqa %xmm11, %xmm7
2444; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
2445; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
2446; SSSE3-NEXT: pcmpeqd %xmm11, %xmm6
2447; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2448; SSSE3-NEXT: pand %xmm8, %xmm6
2449; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
2450; SSSE3-NEXT: por %xmm6, %xmm7
2451; SSSE3-NEXT: pand %xmm7, %xmm5
2452; SSSE3-NEXT: pandn %xmm13, %xmm7
2453; SSSE3-NEXT: por %xmm5, %xmm7
2454; SSSE3-NEXT: movdqa %xmm7, %xmm10
Quentin Colombet48abac82018-02-17 03:05:33 +00002455; SSSE3-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002456; SSSE3-NEXT: movdqa %xmm4, %xmm5
2457; SSSE3-NEXT: pxor %xmm9, %xmm5
2458; SSSE3-NEXT: movdqa %xmm11, %xmm6
2459; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
2460; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
2461; SSSE3-NEXT: pcmpeqd %xmm11, %xmm5
2462; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2463; SSSE3-NEXT: pand %xmm7, %xmm5
2464; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
2465; SSSE3-NEXT: por %xmm5, %xmm6
2466; SSSE3-NEXT: pand %xmm6, %xmm4
2467; SSSE3-NEXT: pandn %xmm13, %xmm6
2468; SSSE3-NEXT: por %xmm4, %xmm6
2469; SSSE3-NEXT: movdqa %xmm6, %xmm7
Quentin Colombet48abac82018-02-17 03:05:33 +00002470; SSSE3-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002471; SSSE3-NEXT: movdqa %xmm3, %xmm4
2472; SSSE3-NEXT: pxor %xmm9, %xmm4
2473; SSSE3-NEXT: movdqa %xmm11, %xmm5
2474; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
2475; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2476; SSSE3-NEXT: pcmpeqd %xmm11, %xmm4
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002477; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002478; SSSE3-NEXT: pand %xmm6, %xmm4
2479; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2480; SSSE3-NEXT: por %xmm4, %xmm5
2481; SSSE3-NEXT: pand %xmm5, %xmm3
2482; SSSE3-NEXT: pandn %xmm13, %xmm5
2483; SSSE3-NEXT: por %xmm3, %xmm5
2484; SSSE3-NEXT: movdqa %xmm5, %xmm8
Quentin Colombet48abac82018-02-17 03:05:33 +00002485; SSSE3-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
Simon Pilgrim0be55672018-02-11 10:52:37 +00002486; SSSE3-NEXT: movdqa %xmm2, %xmm3
2487; SSSE3-NEXT: pxor %xmm9, %xmm3
2488; SSSE3-NEXT: movdqa %xmm11, %xmm4
2489; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
2490; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2491; SSSE3-NEXT: pcmpeqd %xmm11, %xmm3
2492; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2493; SSSE3-NEXT: pand %xmm5, %xmm3
2494; SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm4[1,1,3,3]
2495; SSSE3-NEXT: por %xmm3, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002496; SSSE3-NEXT: pand %xmm15, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002497; SSSE3-NEXT: pandn %xmm13, %xmm15
2498; SSSE3-NEXT: movdqa %xmm13, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002499; SSSE3-NEXT: por %xmm2, %xmm15
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002500; SSSE3-NEXT: movdqa %xmm1, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002501; SSSE3-NEXT: pxor %xmm9, %xmm2
2502; SSSE3-NEXT: movdqa %xmm11, %xmm3
2503; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
2504; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2505; SSSE3-NEXT: pcmpeqd %xmm11, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002506; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002507; SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002508; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3]
2509; SSSE3-NEXT: por %xmm2, %xmm13
2510; SSSE3-NEXT: pand %xmm13, %xmm1
2511; SSSE3-NEXT: pandn %xmm5, %xmm13
2512; SSSE3-NEXT: por %xmm1, %xmm13
2513; SSSE3-NEXT: movdqa %xmm0, %xmm1
2514; SSSE3-NEXT: pxor %xmm9, %xmm1
2515; SSSE3-NEXT: movdqa %xmm11, %xmm2
2516; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
2517; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
2518; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1
2519; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2520; SSSE3-NEXT: pand %xmm4, %xmm1
2521; SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm2[1,1,3,3]
2522; SSSE3-NEXT: por %xmm1, %xmm11
2523; SSSE3-NEXT: pand %xmm11, %xmm0
2524; SSSE3-NEXT: pandn %xmm5, %xmm11
2525; SSSE3-NEXT: por %xmm0, %xmm11
2526; SSSE3-NEXT: movdqa %xmm11, %xmm0
2527; SSSE3-NEXT: pxor %xmm9, %xmm0
2528; SSSE3-NEXT: movdqa %xmm0, %xmm1
2529; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1
2530; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2]
2531; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2532; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2533; SSSE3-NEXT: pand %xmm4, %xmm0
2534; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2535; SSSE3-NEXT: por %xmm0, %xmm2
2536; SSSE3-NEXT: movdqa %xmm13, %xmm0
2537; SSSE3-NEXT: pxor %xmm9, %xmm0
2538; SSSE3-NEXT: movdqa %xmm0, %xmm4
2539; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2540; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2541; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2542; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2543; SSSE3-NEXT: pand %xmm5, %xmm0
2544; SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm4[1,1,3,3]
2545; SSSE3-NEXT: por %xmm0, %xmm12
2546; SSSE3-NEXT: movdqa %xmm15, %xmm0
2547; SSSE3-NEXT: pxor %xmm9, %xmm0
2548; SSSE3-NEXT: movdqa %xmm0, %xmm4
2549; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2550; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2551; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2552; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2553; SSSE3-NEXT: pand %xmm5, %xmm0
2554; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
2555; SSSE3-NEXT: por %xmm0, %xmm6
2556; SSSE3-NEXT: movdqa %xmm8, %xmm0
2557; SSSE3-NEXT: pxor %xmm9, %xmm0
2558; SSSE3-NEXT: movdqa %xmm0, %xmm4
2559; SSSE3-NEXT: pcmpgtd %xmm9, %xmm4
2560; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
2561; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2562; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2563; SSSE3-NEXT: pand %xmm5, %xmm0
2564; SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm4[1,1,3,3]
2565; SSSE3-NEXT: por %xmm0, %xmm8
2566; SSSE3-NEXT: movdqa %xmm7, %xmm0
2567; SSSE3-NEXT: pxor %xmm9, %xmm0
2568; SSSE3-NEXT: movdqa %xmm0, %xmm5
2569; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
2570; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2571; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2572; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
2573; SSSE3-NEXT: pand %xmm7, %xmm0
2574; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2575; SSSE3-NEXT: por %xmm0, %xmm5
2576; SSSE3-NEXT: movdqa %xmm10, %xmm0
2577; SSSE3-NEXT: pxor %xmm9, %xmm0
2578; SSSE3-NEXT: movdqa %xmm0, %xmm7
2579; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2580; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2581; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
2582; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
2583; SSSE3-NEXT: pand %xmm10, %xmm0
2584; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3]
2585; SSSE3-NEXT: por %xmm0, %xmm10
2586; SSSE3-NEXT: movdqa %xmm14, %xmm0
2587; SSSE3-NEXT: pxor %xmm9, %xmm0
2588; SSSE3-NEXT: movdqa %xmm0, %xmm7
2589; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2590; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0
2591; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
2592; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2]
2593; SSSE3-NEXT: pand %xmm0, %xmm1
2594; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2595; SSSE3-NEXT: por %xmm1, %xmm0
2596; SSSE3-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm4 # 16-byte Reload
2597; SSSE3-NEXT: movdqa %xmm4, %xmm1
2598; SSSE3-NEXT: pxor %xmm9, %xmm1
2599; SSSE3-NEXT: movdqa %xmm1, %xmm7
2600; SSSE3-NEXT: pcmpgtd %xmm9, %xmm7
2601; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1
2602; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2603; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002604; SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrim0be55672018-02-11 10:52:37 +00002605; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
2606; SSSE3-NEXT: por %xmm1, %xmm3
2607; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255]
2608; SSSE3-NEXT: pand %xmm1, %xmm3
2609; SSSE3-NEXT: pand %xmm4, %xmm3
2610; SSSE3-NEXT: pand %xmm1, %xmm0
2611; SSSE3-NEXT: pand %xmm14, %xmm0
2612; SSSE3-NEXT: packuswb %xmm3, %xmm0
2613; SSSE3-NEXT: pand %xmm1, %xmm10
2614; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm10 # 16-byte Folded Reload
2615; SSSE3-NEXT: pand %xmm1, %xmm5
2616; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Folded Reload
2617; SSSE3-NEXT: packuswb %xmm10, %xmm5
2618; SSSE3-NEXT: packuswb %xmm0, %xmm5
2619; SSSE3-NEXT: pand %xmm1, %xmm8
2620; SSSE3-NEXT: pand -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
2621; SSSE3-NEXT: pand %xmm1, %xmm6
2622; SSSE3-NEXT: pand %xmm15, %xmm6
2623; SSSE3-NEXT: packuswb %xmm8, %xmm6
2624; SSSE3-NEXT: pand %xmm1, %xmm12
2625; SSSE3-NEXT: pand %xmm13, %xmm12
2626; SSSE3-NEXT: pand %xmm1, %xmm2
2627; SSSE3-NEXT: pand %xmm11, %xmm2
2628; SSSE3-NEXT: packuswb %xmm12, %xmm2
2629; SSSE3-NEXT: packuswb %xmm6, %xmm2
2630; SSSE3-NEXT: packuswb %xmm5, %xmm2
2631; SSSE3-NEXT: movdqa %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002632; SSSE3-NEXT: retq
2633;
2634; SSE41-LABEL: trunc_packus_v16i64_v16i8:
2635; SSE41: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002636; SSE41-NEXT: movdqa %xmm0, %xmm8
2637; SSE41-NEXT: movapd {{.*#+}} xmm9 = [255,255]
2638; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002639; SSE41-NEXT: movdqa %xmm7, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002640; SSE41-NEXT: pxor %xmm10, %xmm0
2641; SSE41-NEXT: movdqa {{.*#+}} xmm12 = [2147483903,2147483903]
2642; SSE41-NEXT: movdqa %xmm12, %xmm11
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002643; SSE41-NEXT: pcmpgtd %xmm0, %xmm11
Simon Pilgrim0be55672018-02-11 10:52:37 +00002644; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
2645; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002646; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002647; SSE41-NEXT: pand %xmm13, %xmm14
2648; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm11[1,1,3,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002649; SSE41-NEXT: por %xmm14, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002650; SSE41-NEXT: movapd %xmm9, %xmm11
2651; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm11
2652; SSE41-NEXT: movdqa %xmm6, %xmm0
2653; SSE41-NEXT: pxor %xmm10, %xmm0
2654; SSE41-NEXT: movdqa %xmm12, %xmm7
2655; SSE41-NEXT: pcmpgtd %xmm0, %xmm7
2656; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm7[0,0,2,2]
2657; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2658; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[1,1,3,3]
2659; SSE41-NEXT: pand %xmm13, %xmm14
2660; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
2661; SSE41-NEXT: por %xmm14, %xmm0
2662; SSE41-NEXT: movapd %xmm9, %xmm13
2663; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm13
2664; SSE41-NEXT: movdqa %xmm5, %xmm0
2665; SSE41-NEXT: pxor %xmm10, %xmm0
2666; SSE41-NEXT: movdqa %xmm12, %xmm6
2667; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
2668; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[0,0,2,2]
2669; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2670; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2671; SSE41-NEXT: pand %xmm14, %xmm7
2672; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
2673; SSE41-NEXT: por %xmm7, %xmm0
2674; SSE41-NEXT: movapd %xmm9, %xmm14
2675; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm14
2676; SSE41-NEXT: movdqa %xmm4, %xmm0
2677; SSE41-NEXT: pxor %xmm10, %xmm0
2678; SSE41-NEXT: movdqa %xmm12, %xmm5
2679; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
2680; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
2681; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2682; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2683; SSE41-NEXT: pand %xmm6, %xmm7
2684; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
2685; SSE41-NEXT: por %xmm7, %xmm0
2686; SSE41-NEXT: movapd %xmm9, %xmm15
2687; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm15
2688; SSE41-NEXT: movdqa %xmm3, %xmm0
2689; SSE41-NEXT: pxor %xmm10, %xmm0
2690; SSE41-NEXT: movdqa %xmm12, %xmm4
2691; SSE41-NEXT: pcmpgtd %xmm0, %xmm4
2692; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
2693; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2694; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2695; SSE41-NEXT: pand %xmm6, %xmm7
2696; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
2697; SSE41-NEXT: por %xmm7, %xmm0
2698; SSE41-NEXT: movapd %xmm9, %xmm4
2699; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
2700; SSE41-NEXT: movdqa %xmm2, %xmm0
2701; SSE41-NEXT: pxor %xmm10, %xmm0
2702; SSE41-NEXT: movdqa %xmm12, %xmm3
2703; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
2704; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
2705; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2706; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2707; SSE41-NEXT: pand %xmm6, %xmm7
2708; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2709; SSE41-NEXT: por %xmm7, %xmm0
2710; SSE41-NEXT: movapd %xmm9, %xmm6
2711; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002712; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002713; SSE41-NEXT: pxor %xmm10, %xmm0
2714; SSE41-NEXT: movdqa %xmm12, %xmm2
2715; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
2716; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
2717; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2718; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2719; SSE41-NEXT: pand %xmm3, %xmm7
2720; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2721; SSE41-NEXT: por %xmm7, %xmm0
2722; SSE41-NEXT: movapd %xmm9, %xmm7
2723; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002724; SSE41-NEXT: movdqa %xmm8, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002725; SSE41-NEXT: pxor %xmm10, %xmm0
2726; SSE41-NEXT: movdqa %xmm12, %xmm1
2727; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
2728; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2]
2729; SSE41-NEXT: pcmpeqd %xmm12, %xmm0
2730; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2731; SSE41-NEXT: pand %xmm2, %xmm3
2732; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2733; SSE41-NEXT: por %xmm3, %xmm0
2734; SSE41-NEXT: movapd %xmm9, %xmm2
2735; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002736; SSE41-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002737; SSE41-NEXT: xorpd %xmm10, %xmm0
2738; SSE41-NEXT: movapd %xmm0, %xmm1
2739; SSE41-NEXT: pcmpgtd %xmm10, %xmm1
2740; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
2741; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2742; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2743; SSE41-NEXT: pand %xmm3, %xmm5
2744; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
2745; SSE41-NEXT: por %xmm5, %xmm0
2746; SSE41-NEXT: xorpd %xmm8, %xmm8
2747; SSE41-NEXT: pxor %xmm1, %xmm1
2748; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
2749; SSE41-NEXT: movapd %xmm7, %xmm0
2750; SSE41-NEXT: xorpd %xmm10, %xmm0
2751; SSE41-NEXT: movapd %xmm0, %xmm2
2752; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
2753; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2]
2754; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2755; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2756; SSE41-NEXT: pand %xmm5, %xmm3
2757; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2758; SSE41-NEXT: por %xmm3, %xmm0
2759; SSE41-NEXT: pxor %xmm12, %xmm12
2760; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm12
2761; SSE41-NEXT: movapd %xmm6, %xmm0
2762; SSE41-NEXT: xorpd %xmm10, %xmm0
2763; SSE41-NEXT: movapd %xmm0, %xmm3
2764; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2765; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2766; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2767; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
2768; SSE41-NEXT: pand %xmm5, %xmm7
2769; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2770; SSE41-NEXT: por %xmm7, %xmm0
2771; SSE41-NEXT: pxor %xmm7, %xmm7
2772; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm7
2773; SSE41-NEXT: movapd %xmm4, %xmm0
2774; SSE41-NEXT: xorpd %xmm10, %xmm0
2775; SSE41-NEXT: movapd %xmm0, %xmm3
2776; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2777; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2778; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2779; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2780; SSE41-NEXT: pand %xmm5, %xmm6
2781; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2782; SSE41-NEXT: por %xmm6, %xmm0
2783; SSE41-NEXT: pxor %xmm6, %xmm6
2784; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002785; SSE41-NEXT: movapd %xmm15, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002786; SSE41-NEXT: xorpd %xmm10, %xmm0
2787; SSE41-NEXT: movapd %xmm0, %xmm3
2788; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2789; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
2790; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2791; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2792; SSE41-NEXT: pand %xmm4, %xmm5
2793; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2794; SSE41-NEXT: por %xmm5, %xmm0
2795; SSE41-NEXT: pxor %xmm4, %xmm4
2796; SSE41-NEXT: blendvpd %xmm0, %xmm15, %xmm4
2797; SSE41-NEXT: movapd %xmm14, %xmm0
2798; SSE41-NEXT: xorpd %xmm10, %xmm0
2799; SSE41-NEXT: movapd %xmm0, %xmm3
2800; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2801; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
2802; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2803; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2804; SSE41-NEXT: pand %xmm5, %xmm2
2805; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2806; SSE41-NEXT: por %xmm2, %xmm0
2807; SSE41-NEXT: xorpd %xmm15, %xmm15
2808; SSE41-NEXT: blendvpd %xmm0, %xmm14, %xmm15
2809; SSE41-NEXT: movapd %xmm13, %xmm0
2810; SSE41-NEXT: xorpd %xmm10, %xmm0
2811; SSE41-NEXT: movapd %xmm0, %xmm2
2812; SSE41-NEXT: pcmpgtd %xmm10, %xmm2
2813; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm2[0,0,2,2]
2814; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2815; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2816; SSE41-NEXT: pand %xmm14, %xmm3
2817; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
2818; SSE41-NEXT: por %xmm3, %xmm0
2819; SSE41-NEXT: pxor %xmm2, %xmm2
2820; SSE41-NEXT: blendvpd %xmm0, %xmm13, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002821; SSE41-NEXT: movapd %xmm11, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002822; SSE41-NEXT: xorpd %xmm10, %xmm0
2823; SSE41-NEXT: movapd %xmm0, %xmm3
2824; SSE41-NEXT: pcmpgtd %xmm10, %xmm3
2825; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm3[0,0,2,2]
2826; SSE41-NEXT: pcmpeqd %xmm10, %xmm0
2827; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
2828; SSE41-NEXT: pand %xmm13, %xmm5
2829; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
2830; SSE41-NEXT: por %xmm5, %xmm0
2831; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm8
2832; SSE41-NEXT: andpd %xmm9, %xmm8
2833; SSE41-NEXT: andpd %xmm9, %xmm2
2834; SSE41-NEXT: packuswb %xmm8, %xmm2
2835; SSE41-NEXT: andpd %xmm9, %xmm15
2836; SSE41-NEXT: andpd %xmm9, %xmm4
2837; SSE41-NEXT: packuswb %xmm15, %xmm4
2838; SSE41-NEXT: packuswb %xmm2, %xmm4
2839; SSE41-NEXT: andpd %xmm9, %xmm6
2840; SSE41-NEXT: andpd %xmm9, %xmm7
2841; SSE41-NEXT: packuswb %xmm6, %xmm7
2842; SSE41-NEXT: andpd %xmm9, %xmm12
2843; SSE41-NEXT: andpd %xmm9, %xmm1
2844; SSE41-NEXT: packuswb %xmm12, %xmm1
2845; SSE41-NEXT: packuswb %xmm7, %xmm1
2846; SSE41-NEXT: packuswb %xmm4, %xmm1
2847; SSE41-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002848; SSE41-NEXT: retq
2849;
2850; AVX1-LABEL: trunc_packus_v16i64_v16i8:
2851; AVX1: # %bb.0:
Simon Pilgrim0be55672018-02-11 10:52:37 +00002852; AVX1-NEXT: vmovapd {{.*#+}} ymm5 = [255,255,255,255]
2853; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002854; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255]
Simon Pilgrim0be55672018-02-11 10:52:37 +00002855; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2856; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm7
2857; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2858; AVX1-NEXT: vblendvpd %ymm6, %ymm3, %ymm5, %ymm3
2859; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
2860; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2861; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm7
2862; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2863; AVX1-NEXT: vblendvpd %ymm6, %ymm2, %ymm5, %ymm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002864; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
2865; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2866; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm7
2867; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002868; AVX1-NEXT: vblendvpd %ymm6, %ymm1, %ymm5, %ymm1
2869; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
2870; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm6
2871; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm7
2872; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm7, %ymm6
2873; AVX1-NEXT: vblendvpd %ymm6, %ymm0, %ymm5, %ymm0
2874; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002875; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002876; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002877; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
2878; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002879; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002880; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
2881; AVX1-NEXT: vpand %xmm3, %xmm7, %xmm3
2882; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
2883; AVX1-NEXT: vpackuswb %xmm6, %xmm3, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002884; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002885; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
2886; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002887; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm7
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002888; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
2889; AVX1-NEXT: vpand %xmm2, %xmm7, %xmm2
2890; AVX1-NEXT: vpackuswb %xmm6, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002891; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002892; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
Simon Pilgrim0be55672018-02-11 10:52:37 +00002893; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002894; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
2895; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
Simon Pilgrim0be55672018-02-11 10:52:37 +00002896; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm6
2897; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
2898; AVX1-NEXT: vpand %xmm1, %xmm6, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002899; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002900; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
2901; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm3
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002902; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
Simon Pilgrim0be55672018-02-11 10:52:37 +00002903; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
2904; AVX1-NEXT: vpcmpgtq %xmm5, %xmm0, %xmm5
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002905; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002906; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002907; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
Simon Pilgrim0be55672018-02-11 10:52:37 +00002908; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002909; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2910; AVX1-NEXT: vzeroupper
2911; AVX1-NEXT: retq
2912;
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002913; AVX2-LABEL: trunc_packus_v16i64_v16i8:
2914; AVX2: # %bb.0:
2915; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
2916; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5
2917; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3
2918; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5
2919; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2
2920; AVX2-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5
2921; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1
2922; AVX2-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5
2923; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0
2924; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
2925; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5
2926; AVX2-NEXT: vpand %ymm0, %ymm5, %ymm0
2927; AVX2-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5
2928; AVX2-NEXT: vpand %ymm1, %ymm5, %ymm1
2929; AVX2-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm5
2930; AVX2-NEXT: vpand %ymm2, %ymm5, %ymm2
2931; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm4
2932; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm3
2933; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
2934; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2935; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
2936; AVX2-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
2937; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
2938; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2939; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2940; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
2941; AVX2-NEXT: vpackssdw %xmm4, %xmm1, %xmm1
2942; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
2943; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
2944; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2945; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2946; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2947; AVX2-NEXT: vzeroupper
2948; AVX2-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002949;
2950; AVX512-LABEL: trunc_packus_v16i64_v16i8:
2951; AVX512: # %bb.0:
2952; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255]
2953; AVX512-NEXT: vpminsq %zmm2, %zmm0, %zmm0
2954; AVX512-NEXT: vpminsq %zmm2, %zmm1, %zmm1
2955; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
2956; AVX512-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1
2957; AVX512-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0
2958; AVX512-NEXT: vpmovqd %zmm0, %ymm0
2959; AVX512-NEXT: vpmovqd %zmm1, %ymm1
2960; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2961; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2962; AVX512-NEXT: vzeroupper
2963; AVX512-NEXT: retq
2964 %1 = icmp slt <16 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2965 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
2966 %3 = icmp sgt <16 x i64> %2, zeroinitializer
2967 %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> zeroinitializer
2968 %5 = trunc <16 x i64> %4 to <16 x i8>
2969 ret <16 x i8> %5
2970}
2971
2972define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) {
2973; SSE2-LABEL: trunc_packus_v8i32_v8i8:
2974; SSE2: # %bb.0:
2975; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2976; SSE2-NEXT: movdqa %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002977; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
2978; SSE2-NEXT: pand %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002979; SSE2-NEXT: pandn %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00002980; SSE2-NEXT: por %xmm0, %xmm3
2981; SSE2-NEXT: movdqa %xmm2, %xmm0
2982; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
2983; SSE2-NEXT: pand %xmm0, %xmm1
2984; SSE2-NEXT: pandn %xmm2, %xmm0
2985; SSE2-NEXT: por %xmm1, %xmm0
2986; SSE2-NEXT: pxor %xmm1, %xmm1
2987; SSE2-NEXT: movdqa %xmm0, %xmm2
2988; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
2989; SSE2-NEXT: pand %xmm0, %xmm2
2990; SSE2-NEXT: movdqa %xmm3, %xmm0
2991; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
2992; SSE2-NEXT: pand %xmm3, %xmm0
2993; SSE2-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00002994; SSE2-NEXT: retq
2995;
2996; SSSE3-LABEL: trunc_packus_v8i32_v8i8:
2997; SSSE3: # %bb.0:
2998; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
2999; SSSE3-NEXT: movdqa %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003000; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
3001; SSSE3-NEXT: pand %xmm3, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003002; SSSE3-NEXT: pandn %xmm2, %xmm3
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003003; SSSE3-NEXT: por %xmm0, %xmm3
3004; SSSE3-NEXT: movdqa %xmm2, %xmm0
3005; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
3006; SSSE3-NEXT: pand %xmm0, %xmm1
3007; SSSE3-NEXT: pandn %xmm2, %xmm0
3008; SSSE3-NEXT: por %xmm1, %xmm0
3009; SSSE3-NEXT: pxor %xmm1, %xmm1
3010; SSSE3-NEXT: movdqa %xmm0, %xmm2
3011; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
3012; SSSE3-NEXT: pand %xmm0, %xmm2
3013; SSSE3-NEXT: movdqa %xmm3, %xmm0
3014; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
3015; SSSE3-NEXT: pand %xmm3, %xmm0
3016; SSSE3-NEXT: packssdw %xmm2, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003017; SSSE3-NEXT: retq
3018;
3019; SSE41-LABEL: trunc_packus_v8i32_v8i8:
3020; SSE41: # %bb.0:
3021; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003022; SSE41-NEXT: pminsd %xmm2, %xmm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003023; SSE41-NEXT: pminsd %xmm2, %xmm1
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003024; SSE41-NEXT: pxor %xmm2, %xmm2
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003025; SSE41-NEXT: pmaxsd %xmm2, %xmm1
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003026; SSE41-NEXT: pmaxsd %xmm2, %xmm0
3027; SSE41-NEXT: packssdw %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003028; SSE41-NEXT: retq
3029;
3030; AVX1-LABEL: trunc_packus_v8i32_v8i8:
3031; AVX1: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003032; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255,255,255]
3033; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm2
3034; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3035; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
3036; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3037; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
3038; AVX1-NEXT: vpmaxsd %xmm1, %xmm2, %xmm1
3039; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003040; AVX1-NEXT: vzeroupper
3041; AVX1-NEXT: retq
3042;
3043; AVX2-LABEL: trunc_packus_v8i32_v8i8:
3044; AVX2: # %bb.0:
3045; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3046; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3047; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
3048; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003049; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3050; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003051; AVX2-NEXT: vzeroupper
3052; AVX2-NEXT: retq
3053;
3054; AVX512F-LABEL: trunc_packus_v8i32_v8i8:
3055; AVX512F: # %bb.0:
3056; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3057; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3058; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3059; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3060; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00003061; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003062; AVX512F-NEXT: vzeroupper
3063; AVX512F-NEXT: retq
3064;
3065; AVX512VL-LABEL: trunc_packus_v8i32_v8i8:
3066; AVX512VL: # %bb.0:
3067; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3068; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3069; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3070; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
3071; AVX512VL-NEXT: vzeroupper
3072; AVX512VL-NEXT: retq
3073;
3074; AVX512BW-LABEL: trunc_packus_v8i32_v8i8:
3075; AVX512BW: # %bb.0:
3076; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3077; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3078; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3079; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3080; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00003081; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003082; AVX512BW-NEXT: vzeroupper
3083; AVX512BW-NEXT: retq
3084;
3085; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8:
3086; AVX512BWVL: # %bb.0:
3087; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3088; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3089; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3090; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
3091; AVX512BWVL-NEXT: vzeroupper
3092; AVX512BWVL-NEXT: retq
3093 %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3094 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3095 %3 = icmp sgt <8 x i32> %2, zeroinitializer
3096 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
3097 %5 = trunc <8 x i32> %4 to <8 x i8>
3098 ret <8 x i8> %5
3099}
3100
Simon Pilgrim689d8132018-02-15 17:48:34 +00003101define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
Simon Pilgrimc302a582018-02-19 13:29:20 +00003102; SSE-LABEL: trunc_packus_v8i32_v8i8_store:
3103; SSE: # %bb.0:
3104; SSE-NEXT: packssdw %xmm1, %xmm0
3105; SSE-NEXT: packuswb %xmm0, %xmm0
3106; SSE-NEXT: movq %xmm0, (%rdi)
3107; SSE-NEXT: retq
Simon Pilgrim689d8132018-02-15 17:48:34 +00003108;
3109; AVX1-LABEL: trunc_packus_v8i32_v8i8_store:
3110; AVX1: # %bb.0:
Simon Pilgrimc302a582018-02-19 13:29:20 +00003111; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3112; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
3113; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003114; AVX1-NEXT: vmovq %xmm0, (%rdi)
3115; AVX1-NEXT: vzeroupper
3116; AVX1-NEXT: retq
3117;
3118; AVX2-LABEL: trunc_packus_v8i32_v8i8_store:
3119; AVX2: # %bb.0:
Simon Pilgrim7fae42e2018-02-17 22:19:50 +00003120; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3121; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
Simon Pilgrimc302a582018-02-19 13:29:20 +00003122; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
Simon Pilgrim689d8132018-02-15 17:48:34 +00003123; AVX2-NEXT: vmovq %xmm0, (%rdi)
3124; AVX2-NEXT: vzeroupper
3125; AVX2-NEXT: retq
3126;
3127; AVX512F-LABEL: trunc_packus_v8i32_v8i8_store:
3128; AVX512F: # %bb.0:
3129; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3130; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3131; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3132; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3133; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
3134; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
3135; AVX512F-NEXT: vmovq %xmm0, (%rdi)
3136; AVX512F-NEXT: vzeroupper
3137; AVX512F-NEXT: retq
3138;
3139; AVX512VL-LABEL: trunc_packus_v8i32_v8i8_store:
3140; AVX512VL: # %bb.0:
3141; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3142; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3143; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3144; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi)
3145; AVX512VL-NEXT: vzeroupper
3146; AVX512VL-NEXT: retq
3147;
3148; AVX512BW-LABEL: trunc_packus_v8i32_v8i8_store:
3149; AVX512BW: # %bb.0:
3150; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
3151; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0
3152; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3153; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3154; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
3155; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
3156; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
3157; AVX512BW-NEXT: vzeroupper
3158; AVX512BW-NEXT: retq
3159;
3160; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8_store:
3161; AVX512BWVL: # %bb.0:
3162; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
3163; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3164; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
3165; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi)
3166; AVX512BWVL-NEXT: vzeroupper
3167; AVX512BWVL-NEXT: retq
3168 %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3169 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3170 %3 = icmp sgt <8 x i32> %2, zeroinitializer
3171 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
3172 %5 = trunc <8 x i32> %4 to <8 x i8>
3173 store <8 x i8> %5, <8 x i8> *%p1
3174 ret void
3175}
3176
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003177define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32> %a0) {
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003178; SSE-LABEL: trunc_packus_v16i32_v16i8:
3179; SSE: # %bb.0:
3180; SSE-NEXT: packssdw %xmm3, %xmm2
3181; SSE-NEXT: packssdw %xmm1, %xmm0
3182; SSE-NEXT: packuswb %xmm2, %xmm0
3183; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003184;
3185; AVX1-LABEL: trunc_packus_v16i32_v16i8:
3186; AVX1: # %bb.0:
3187; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003188; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3190; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003191; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3192; AVX1-NEXT: vzeroupper
3193; AVX1-NEXT: retq
3194;
3195; AVX2-LABEL: trunc_packus_v16i32_v16i8:
3196; AVX2: # %bb.0:
Simon Pilgrim17bb6f02018-02-15 14:37:59 +00003197; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3198; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3199; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3200; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003201; AVX2-NEXT: vzeroupper
3202; AVX2-NEXT: retq
3203;
3204; AVX512-LABEL: trunc_packus_v16i32_v16i8:
3205; AVX512: # %bb.0:
3206; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
3207; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
3208; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
3209; AVX512-NEXT: vpmovdb %zmm0, %xmm0
3210; AVX512-NEXT: vzeroupper
3211; AVX512-NEXT: retq
3212 %1 = icmp slt <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3213 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
3214 %3 = icmp sgt <16 x i32> %2, zeroinitializer
3215 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
3216 %5 = trunc <16 x i32> %4 to <16 x i8>
3217 ret <16 x i8> %5
3218}
3219
3220define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003221; SSE-LABEL: trunc_packus_v16i16_v16i8:
3222; SSE: # %bb.0:
3223; SSE-NEXT: packuswb %xmm1, %xmm0
3224; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003225;
3226; AVX1-LABEL: trunc_packus_v16i16_v16i8:
3227; AVX1: # %bb.0:
3228; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00003229; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003230; AVX1-NEXT: vzeroupper
3231; AVX1-NEXT: retq
3232;
3233; AVX2-LABEL: trunc_packus_v16i16_v16i8:
3234; AVX2: # %bb.0:
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003235; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Simon Pilgrimae00a712018-02-06 14:07:46 +00003236; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003237; AVX2-NEXT: vzeroupper
3238; AVX2-NEXT: retq
3239;
3240; AVX512F-LABEL: trunc_packus_v16i16_v16i8:
3241; AVX512F: # %bb.0:
3242; AVX512F-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3243; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
3244; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3245; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3246; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3247; AVX512F-NEXT: vzeroupper
3248; AVX512F-NEXT: retq
3249;
3250; AVX512VL-LABEL: trunc_packus_v16i16_v16i8:
3251; AVX512VL: # %bb.0:
3252; AVX512VL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3253; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3254; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3255; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3256; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3257; AVX512VL-NEXT: vzeroupper
3258; AVX512VL-NEXT: retq
3259;
3260; AVX512BW-LABEL: trunc_packus_v16i16_v16i8:
3261; AVX512BW: # %bb.0:
3262; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3263; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3264; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3265; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Puyan Lotfi43e94b12018-01-31 22:04:26 +00003266; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003267; AVX512BW-NEXT: vzeroupper
3268; AVX512BW-NEXT: retq
3269;
3270; AVX512BWVL-LABEL: trunc_packus_v16i16_v16i8:
3271; AVX512BWVL: # %bb.0:
3272; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
3273; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3274; AVX512BWVL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
3275; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
3276; AVX512BWVL-NEXT: vzeroupper
3277; AVX512BWVL-NEXT: retq
3278 %1 = icmp slt <16 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3279 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3280 %3 = icmp sgt <16 x i16> %2, zeroinitializer
3281 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
3282 %5 = trunc <16 x i16> %4 to <16 x i8>
3283 ret <16 x i8> %5
3284}
3285
3286define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) {
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003287; SSE-LABEL: trunc_packus_v32i16_v32i8:
3288; SSE: # %bb.0:
3289; SSE-NEXT: packuswb %xmm1, %xmm0
3290; SSE-NEXT: packuswb %xmm3, %xmm2
3291; SSE-NEXT: movdqa %xmm2, %xmm1
3292; SSE-NEXT: retq
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003293;
3294; AVX1-LABEL: trunc_packus_v32i16_v32i8:
3295; AVX1: # %bb.0:
3296; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Simon Pilgrimae00a712018-02-06 14:07:46 +00003297; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
3298; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3299; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003300; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3301; AVX1-NEXT: retq
3302;
3303; AVX2-LABEL: trunc_packus_v32i16_v32i8:
3304; AVX2: # %bb.0:
Simon Pilgrim86d15bf2018-02-14 14:14:29 +00003305; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
3306; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
Simon Pilgrim65ec9232018-01-26 14:58:50 +00003307; AVX2-NEXT: retq
3308;
3309; AVX512F-LABEL: trunc_packus_v32i16_v32i8:
3310; AVX512F: # %bb.0:
3311; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3312; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3313; AVX512F-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3314; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
3315; AVX512F-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3316; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3317; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3318; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3319; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
3320; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
3321; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3322; AVX512F-NEXT: retq
3323;
3324; AVX512VL-LABEL: trunc_packus_v32i16_v32i8:
3325; AVX512VL: # %bb.0:
3326; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
3327; AVX512VL-NEXT: vpminsw %ymm2, %ymm0, %ymm0
3328; AVX512VL-NEXT: vpminsw %ymm2, %ymm1, %ymm1
3329; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
3330; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
3331; AVX512VL-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
3332; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
3333; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
3334; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
3335; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
3336; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3337; AVX512VL-NEXT: retq
3338;
3339; AVX512BW-LABEL: trunc_packus_v32i16_v32i8:
3340; AVX512BW: # %bb.0:
3341; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3342; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3343; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3344; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
3345; AVX512BW-NEXT: retq
3346;
3347; AVX512BWVL-LABEL: trunc_packus_v32i16_v32i8:
3348; AVX512BWVL: # %bb.0:
3349; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0
3350; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3351; AVX512BWVL-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
3352; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
3353; AVX512BWVL-NEXT: retq
3354 %1 = icmp slt <32 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3355 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
3356 %3 = icmp sgt <32 x i16> %2, zeroinitializer
3357 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
3358 %5 = trunc <32 x i16> %4 to <32 x i8>
3359 ret <32 x i8> %5
3360}