blob: 1dcfd3223c868fa20c223eb5ee4993c9b4b71fbb [file] [log] [blame]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00008
9;
10; add
11;
12
Simon Pilgrim85af9732016-12-30 22:40:32 +000013define <4 x i32> @trunc_add_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
14; SSE-LABEL: trunc_add_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000015; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000016; SSE-NEXT: paddq %xmm3, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +000017; SSE-NEXT: paddq %xmm2, %xmm0
18; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000019; SSE-NEXT: retq
20;
Simon Pilgrim85af9732016-12-30 22:40:32 +000021; AVX1-LABEL: trunc_add_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000022; AVX1: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +000023; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
24; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
25; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000026; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +000027; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000028; AVX1-NEXT: vzeroupper
29; AVX1-NEXT: retq
30;
Simon Pilgrim85af9732016-12-30 22:40:32 +000031; AVX2-LABEL: trunc_add_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000032; AVX2: # BB#0:
33; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +000034; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
35; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +000036; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000037; AVX2-NEXT: vzeroupper
38; AVX2-NEXT: retq
39;
Simon Pilgrim85af9732016-12-30 22:40:32 +000040; AVX512-LABEL: trunc_add_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000041; AVX512: # BB#0:
42; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
43; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +000044; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000045; AVX512-NEXT: retq
46 %1 = add <4 x i64> %a0, %a1
47 %2 = trunc <4 x i64> %1 to <4 x i32>
48 ret <4 x i32> %2
49}
50
Simon Pilgrim85af9732016-12-30 22:40:32 +000051define <8 x i16> @trunc_add_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
52; SSE-LABEL: trunc_add_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000053; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000054; SSE-NEXT: paddq %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000055; SSE-NEXT: paddq %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +000056; SSE-NEXT: paddq %xmm6, %xmm2
57; SSE-NEXT: paddq %xmm7, %xmm3
58; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
59; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
60; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
61; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
62; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
63; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
64; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
65; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
66; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
67; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
68; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
69; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000070; SSE-NEXT: retq
71;
Simon Pilgrim85af9732016-12-30 22:40:32 +000072; AVX1-LABEL: trunc_add_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000073; AVX1: # BB#0:
74; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm4
75; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
76; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
77; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
78; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm2
79; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
80; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
81; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
82; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
83; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
84; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
85; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
86; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
87; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm3[1,2,3],xmm4[4],xmm3[5,6,7]
88; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
89; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
90; AVX1-NEXT: vzeroupper
91; AVX1-NEXT: retq
92;
Simon Pilgrim85af9732016-12-30 22:40:32 +000093; AVX2-LABEL: trunc_add_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +000094; AVX2: # BB#0:
95; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1
96; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +000097; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
98; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
99; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
100; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000101; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
102; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
103; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000104; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000105; AVX2-NEXT: vzeroupper
106; AVX2-NEXT: retq
107;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000108; AVX512-LABEL: trunc_add_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000109; AVX512: # BB#0:
110; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
111; AVX512-NEXT: vpmovqw %zmm0, %xmm0
112; AVX512-NEXT: retq
113 %1 = add <8 x i64> %a0, %a1
114 %2 = trunc <8 x i64> %1 to <8 x i16>
115 ret <8 x i16> %2
116}
117
Simon Pilgrim85af9732016-12-30 22:40:32 +0000118define <8 x i16> @trunc_add_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
119; SSE-LABEL: trunc_add_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000120; SSE: # BB#0:
121; SSE-NEXT: paddd %xmm2, %xmm0
122; SSE-NEXT: paddd %xmm3, %xmm1
123; SSE-NEXT: pslld $16, %xmm1
124; SSE-NEXT: psrad $16, %xmm1
125; SSE-NEXT: pslld $16, %xmm0
126; SSE-NEXT: psrad $16, %xmm0
127; SSE-NEXT: packssdw %xmm1, %xmm0
128; SSE-NEXT: retq
129;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000130; AVX1-LABEL: trunc_add_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000131; AVX1: # BB#0:
132; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2
133; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
134; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
135; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
136; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
137; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
138; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
139; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
140; AVX1-NEXT: vzeroupper
141; AVX1-NEXT: retq
142;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000143; AVX2-LABEL: trunc_add_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000144; AVX2: # BB#0:
145; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
146; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
147; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000148; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000149; AVX2-NEXT: vzeroupper
150; AVX2-NEXT: retq
151;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000152; AVX512-LABEL: trunc_add_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000153; AVX512: # BB#0:
154; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
155; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000156; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000157; AVX512-NEXT: retq
158 %1 = add <8 x i32> %a0, %a1
159 %2 = trunc <8 x i32> %1 to <8 x i16>
160 ret <8 x i16> %2
161}
162
163define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
164; SSE-LABEL: trunc_add_v16i64_v16i8:
165; SSE: # BB#0:
166; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm0
167; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm1
168; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm2
169; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm3
170; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm4
171; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm5
172; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm6
173; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm7
174; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
175; SSE-NEXT: pand %xmm8, %xmm7
176; SSE-NEXT: pand %xmm8, %xmm6
177; SSE-NEXT: packuswb %xmm7, %xmm6
178; SSE-NEXT: pand %xmm8, %xmm5
179; SSE-NEXT: pand %xmm8, %xmm4
180; SSE-NEXT: packuswb %xmm5, %xmm4
181; SSE-NEXT: packuswb %xmm6, %xmm4
182; SSE-NEXT: pand %xmm8, %xmm3
183; SSE-NEXT: pand %xmm8, %xmm2
184; SSE-NEXT: packuswb %xmm3, %xmm2
185; SSE-NEXT: pand %xmm8, %xmm1
186; SSE-NEXT: pand %xmm8, %xmm0
187; SSE-NEXT: packuswb %xmm1, %xmm0
188; SSE-NEXT: packuswb %xmm2, %xmm0
189; SSE-NEXT: packuswb %xmm4, %xmm0
190; SSE-NEXT: retq
191;
192; AVX1-LABEL: trunc_add_v16i64_v16i8:
193; AVX1: # BB#0:
194; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm8
195; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
196; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
197; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
198; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm4
199; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
200; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
201; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1
202; AVX1-NEXT: vpaddq %xmm6, %xmm2, %xmm5
203; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
204; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
205; AVX1-NEXT: vpaddq %xmm6, %xmm2, %xmm2
206; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm6
207; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
208; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
209; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3
210; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
211; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
212; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
213; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3
214; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
215; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5
216; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2
217; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
218; AVX1-NEXT: vpand %xmm7, %xmm1, %xmm1
219; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm3
220; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
221; AVX1-NEXT: vpand %xmm7, %xmm0, %xmm0
222; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm3
223; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0
224; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
225; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
226; AVX1-NEXT: vzeroupper
227; AVX1-NEXT: retq
228;
229; AVX2-LABEL: trunc_add_v16i64_v16i8:
230; AVX2: # BB#0:
231; AVX2-NEXT: vpaddq %ymm5, %ymm1, %ymm1
232; AVX2-NEXT: vpaddq %ymm4, %ymm0, %ymm0
233; AVX2-NEXT: vpaddq %ymm7, %ymm3, %ymm3
234; AVX2-NEXT: vpaddq %ymm6, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000235; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
236; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
237; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
238; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000239; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
240; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
241; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
242; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
243; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
244; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000245; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
246; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
247; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
248; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000249; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
250; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
251; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
252; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
253; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
254; AVX2-NEXT: vzeroupper
255; AVX2-NEXT: retq
256;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +0000257; AVX512F-LABEL: trunc_add_v16i64_v16i8:
258; AVX512F: # BB#0:
259; AVX512F-NEXT: vpaddq %zmm3, %zmm1, %zmm1
260; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
261; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
262; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
263; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
264; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
265; AVX512F-NEXT: retq
266;
267; AVX512BW-LABEL: trunc_add_v16i64_v16i8:
268; AVX512BW: # BB#0:
269; AVX512BW-NEXT: vpaddq %zmm3, %zmm1, %zmm1
270; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
271; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
272; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
273; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
274; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
275; AVX512BW-NEXT: retq
276;
277; AVX512DQ-LABEL: trunc_add_v16i64_v16i8:
278; AVX512DQ: # BB#0:
279; AVX512DQ-NEXT: vpaddq %zmm3, %zmm1, %zmm1
280; AVX512DQ-NEXT: vpaddq %zmm2, %zmm0, %zmm0
281; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
282; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
283; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
284; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
285; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000286 %1 = add <16 x i64> %a0, %a1
287 %2 = trunc <16 x i64> %1 to <16 x i8>
288 ret <16 x i8> %2
289}
290
291define <16 x i8> @trunc_add_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
292; SSE-LABEL: trunc_add_v16i32_v16i8:
293; SSE: # BB#0:
294; SSE-NEXT: paddd %xmm4, %xmm0
295; SSE-NEXT: paddd %xmm5, %xmm1
296; SSE-NEXT: paddd %xmm6, %xmm2
297; SSE-NEXT: paddd %xmm7, %xmm3
298; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
299; SSE-NEXT: pand %xmm4, %xmm3
300; SSE-NEXT: pand %xmm4, %xmm2
301; SSE-NEXT: packuswb %xmm3, %xmm2
302; SSE-NEXT: pand %xmm4, %xmm1
303; SSE-NEXT: pand %xmm4, %xmm0
304; SSE-NEXT: packuswb %xmm1, %xmm0
305; SSE-NEXT: packuswb %xmm2, %xmm0
306; SSE-NEXT: retq
307;
308; AVX1-LABEL: trunc_add_v16i32_v16i8:
309; AVX1: # BB#0:
310; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm4
311; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
312; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
313; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
314; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm2
315; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
316; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
317; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
318; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
319; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
320; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
321; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
322; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
323; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2
324; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
325; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
326; AVX1-NEXT: vzeroupper
327; AVX1-NEXT: retq
328;
329; AVX2-LABEL: trunc_add_v16i32_v16i8:
330; AVX2: # BB#0:
331; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
332; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
333; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
334; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
335; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
336; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
337; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
338; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
339; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
340; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
341; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
342; AVX2-NEXT: vzeroupper
343; AVX2-NEXT: retq
344;
345; AVX512-LABEL: trunc_add_v16i32_v16i8:
346; AVX512: # BB#0:
347; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
348; AVX512-NEXT: vpmovdb %zmm0, %xmm0
349; AVX512-NEXT: retq
350 %1 = add <16 x i32> %a0, %a1
351 %2 = trunc <16 x i32> %1 to <16 x i8>
352 ret <16 x i8> %2
353}
354
355define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
356; SSE-LABEL: trunc_add_v16i16_v16i8:
357; SSE: # BB#0:
358; SSE-NEXT: paddw %xmm2, %xmm0
359; SSE-NEXT: paddw %xmm3, %xmm1
360; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
361; SSE-NEXT: pand %xmm2, %xmm1
362; SSE-NEXT: pand %xmm2, %xmm0
363; SSE-NEXT: packuswb %xmm1, %xmm0
364; SSE-NEXT: retq
365;
366; AVX1-LABEL: trunc_add_v16i16_v16i8:
367; AVX1: # BB#0:
368; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2
369; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
370; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
371; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
372; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
373; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
374; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
375; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
376; AVX1-NEXT: vzeroupper
377; AVX1-NEXT: retq
378;
379; AVX2-LABEL: trunc_add_v16i16_v16i8:
380; AVX2: # BB#0:
381; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
382; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
383; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
384; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
385; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
386; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
387; AVX2-NEXT: vzeroupper
388; AVX2-NEXT: retq
389;
390; AVX512F-LABEL: trunc_add_v16i16_v16i8:
391; AVX512F: # BB#0:
392; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm0
393; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
394; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
395; AVX512F-NEXT: retq
396;
397; AVX512BW-LABEL: trunc_add_v16i16_v16i8:
398; AVX512BW: # BB#0:
399; AVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0
400; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000401; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000402; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +0000403;
404; AVX512DQ-LABEL: trunc_add_v16i16_v16i8:
405; AVX512DQ: # BB#0:
406; AVX512DQ-NEXT: vpaddw %ymm1, %ymm0, %ymm0
407; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
408; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
409; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000410 %1 = add <16 x i16> %a0, %a1
411 %2 = trunc <16 x i16> %1 to <16 x i8>
412 ret <16 x i8> %2
413}
414
415;
416; add to constant
417;
418
Simon Pilgrim85af9732016-12-30 22:40:32 +0000419define <4 x i32> @trunc_add_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
420; SSE-LABEL: trunc_add_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000421; SSE: # BB#0:
422; SSE-NEXT: movl $1, %eax
423; SSE-NEXT: movd %rax, %xmm2
424; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +0000425; SSE-NEXT: paddq %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000426; SSE-NEXT: paddq {{.*}}(%rip), %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +0000427; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000428; SSE-NEXT: retq
429;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000430; AVX1-LABEL: trunc_add_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000431; AVX1: # BB#0:
432; AVX1-NEXT: movl $1, %eax
433; AVX1-NEXT: vmovq %rax, %xmm1
434; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
435; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1
436; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
437; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +0000438; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000439; AVX1-NEXT: vzeroupper
440; AVX1-NEXT: retq
441;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000442; AVX2-LABEL: trunc_add_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000443; AVX2: # BB#0:
444; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000445; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
446; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000447; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000448; AVX2-NEXT: vzeroupper
449; AVX2-NEXT: retq
450;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000451; AVX512-LABEL: trunc_add_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000452; AVX512: # BB#0:
453; AVX512-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
454; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000455; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000456; AVX512-NEXT: retq
457 %1 = add <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
458 %2 = trunc <4 x i64> %1 to <4 x i32>
459 ret <4 x i32> %2
460}
461
Simon Pilgrim85af9732016-12-30 22:40:32 +0000462define <8 x i16> @trunc_add_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
463; SSE-LABEL: trunc_add_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000464; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000465; SSE-NEXT: movl $1, %eax
Michael Kuperstein7cc21232016-10-06 18:58:24 +0000466; SSE-NEXT: movd %rax, %xmm4
467; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
468; SSE-NEXT: paddq %xmm0, %xmm4
469; SSE-NEXT: paddq {{.*}}(%rip), %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000470; SSE-NEXT: paddq {{.*}}(%rip), %xmm2
471; SSE-NEXT: paddq {{.*}}(%rip), %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +0000472; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
473; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
474; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
475; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
476; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
477; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
478; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
479; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
480; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
481; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
482; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000483; SSE-NEXT: retq
484;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000485; AVX1-LABEL: trunc_add_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000486; AVX1: # BB#0:
487; AVX1-NEXT: movl $1, %eax
488; AVX1-NEXT: vmovq %rax, %xmm2
489; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
490; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2
491; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
492; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
493; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm3
494; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
495; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm1
496; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
497; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
498; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
499; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
500; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
501; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
502; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
503; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
504; AVX1-NEXT: vzeroupper
505; AVX1-NEXT: retq
506;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000507; AVX2-LABEL: trunc_add_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000508; AVX2: # BB#0:
509; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm1, %ymm1
510; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000511; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
512; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
513; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
514; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000515; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
516; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
517; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000518; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000519; AVX2-NEXT: vzeroupper
520; AVX2-NEXT: retq
521;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000522; AVX512-LABEL: trunc_add_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000523; AVX512: # BB#0:
524; AVX512-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
525; AVX512-NEXT: vpmovqw %zmm0, %xmm0
526; AVX512-NEXT: retq
527 %1 = add <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
528 %2 = trunc <8 x i64> %1 to <8 x i16>
529 ret <8 x i16> %2
530}
531
Simon Pilgrim85af9732016-12-30 22:40:32 +0000532define <8 x i16> @trunc_add_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
533; SSE-LABEL: trunc_add_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000534; SSE: # BB#0:
535; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
536; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
537; SSE-NEXT: pslld $16, %xmm1
538; SSE-NEXT: psrad $16, %xmm1
539; SSE-NEXT: pslld $16, %xmm0
540; SSE-NEXT: psrad $16, %xmm0
541; SSE-NEXT: packssdw %xmm1, %xmm0
542; SSE-NEXT: retq
543;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000544; AVX1-LABEL: trunc_add_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000545; AVX1: # BB#0:
546; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm1
547; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
548; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
549; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
550; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
551; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
552; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
553; AVX1-NEXT: vzeroupper
554; AVX1-NEXT: retq
555;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000556; AVX2-LABEL: trunc_add_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000557; AVX2: # BB#0:
558; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
559; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
560; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000561; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000562; AVX2-NEXT: vzeroupper
563; AVX2-NEXT: retq
564;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000565; AVX512-LABEL: trunc_add_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000566; AVX512: # BB#0:
567; AVX512-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
568; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000569; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000570; AVX512-NEXT: retq
571 %1 = add <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
572 %2 = trunc <8 x i32> %1 to <8 x i16>
573 ret <8 x i16> %2
574}
575
576define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
577; SSE-LABEL: trunc_add_const_v16i64_v16i8:
578; SSE: # BB#0:
579; SSE-NEXT: movl $1, %eax
580; SSE-NEXT: movd %rax, %xmm8
581; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
582; SSE-NEXT: paddq %xmm8, %xmm0
583; SSE-NEXT: paddq {{.*}}(%rip), %xmm1
584; SSE-NEXT: paddq {{.*}}(%rip), %xmm2
585; SSE-NEXT: paddq {{.*}}(%rip), %xmm3
586; SSE-NEXT: paddq {{.*}}(%rip), %xmm4
587; SSE-NEXT: paddq {{.*}}(%rip), %xmm5
588; SSE-NEXT: paddq {{.*}}(%rip), %xmm6
589; SSE-NEXT: paddq {{.*}}(%rip), %xmm7
590; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
591; SSE-NEXT: pand %xmm8, %xmm7
592; SSE-NEXT: pand %xmm8, %xmm6
593; SSE-NEXT: packuswb %xmm7, %xmm6
594; SSE-NEXT: pand %xmm8, %xmm5
595; SSE-NEXT: pand %xmm8, %xmm4
596; SSE-NEXT: packuswb %xmm5, %xmm4
597; SSE-NEXT: packuswb %xmm6, %xmm4
598; SSE-NEXT: pand %xmm8, %xmm3
599; SSE-NEXT: pand %xmm8, %xmm2
600; SSE-NEXT: packuswb %xmm3, %xmm2
601; SSE-NEXT: pand %xmm8, %xmm1
602; SSE-NEXT: pand %xmm8, %xmm0
603; SSE-NEXT: packuswb %xmm1, %xmm0
604; SSE-NEXT: packuswb %xmm2, %xmm0
605; SSE-NEXT: packuswb %xmm4, %xmm0
606; SSE-NEXT: retq
607;
608; AVX1-LABEL: trunc_add_const_v16i64_v16i8:
609; AVX1: # BB#0:
610; AVX1-NEXT: movl $1, %eax
611; AVX1-NEXT: vmovq %rax, %xmm4
612; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
613; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm8
614; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
615; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
616; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm5
617; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
618; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm1
619; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm2, %xmm6
620; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
621; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm2, %xmm2
622; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm3, %xmm7
623; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
624; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm3, %xmm3
625; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
626; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
627; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
628; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3
629; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
630; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
631; AVX1-NEXT: vpackuswb %xmm2, %xmm6, %xmm2
632; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
633; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
634; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
635; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
636; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
637; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3
638; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0
639; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
640; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
641; AVX1-NEXT: vzeroupper
642; AVX1-NEXT: retq
643;
644; AVX2-LABEL: trunc_add_const_v16i64_v16i8:
645; AVX2: # BB#0:
646; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm1, %ymm1
647; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
648; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm3, %ymm3
649; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000650; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
651; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
652; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
653; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000654; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
655; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
656; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
657; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
658; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
659; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000660; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
661; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
662; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
663; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000664; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
665; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
666; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
667; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
668; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
669; AVX2-NEXT: vzeroupper
670; AVX2-NEXT: retq
671;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +0000672; AVX512F-LABEL: trunc_add_const_v16i64_v16i8:
673; AVX512F: # BB#0:
674; AVX512F-NEXT: vpaddq {{.*}}(%rip), %zmm1, %zmm1
675; AVX512F-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
676; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
677; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
678; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
679; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
680; AVX512F-NEXT: retq
681;
682; AVX512BW-LABEL: trunc_add_const_v16i64_v16i8:
683; AVX512BW: # BB#0:
684; AVX512BW-NEXT: vpaddq {{.*}}(%rip), %zmm1, %zmm1
685; AVX512BW-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
686; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
687; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
688; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
689; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
690; AVX512BW-NEXT: retq
691;
692; AVX512DQ-LABEL: trunc_add_const_v16i64_v16i8:
693; AVX512DQ: # BB#0:
694; AVX512DQ-NEXT: vpaddq {{.*}}(%rip), %zmm1, %zmm1
695; AVX512DQ-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
696; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
697; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
698; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
699; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
700; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000701 %1 = add <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
702 %2 = trunc <16 x i64> %1 to <16 x i8>
703 ret <16 x i8> %2
704}
705
706define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
707; SSE-LABEL: trunc_add_const_v16i32_v16i8:
708; SSE: # BB#0:
709; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
710; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
711; SSE-NEXT: paddd {{.*}}(%rip), %xmm2
712; SSE-NEXT: paddd {{.*}}(%rip), %xmm3
713; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
714; SSE-NEXT: pand %xmm4, %xmm3
715; SSE-NEXT: pand %xmm4, %xmm2
716; SSE-NEXT: packuswb %xmm3, %xmm2
717; SSE-NEXT: pand %xmm4, %xmm1
718; SSE-NEXT: pand %xmm4, %xmm0
719; SSE-NEXT: packuswb %xmm1, %xmm0
720; SSE-NEXT: packuswb %xmm2, %xmm0
721; SSE-NEXT: retq
722;
723; AVX1-LABEL: trunc_add_const_v16i32_v16i8:
724; AVX1: # BB#0:
725; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm2
726; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
727; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
728; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm3
729; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
730; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
731; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
732; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
733; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
734; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
735; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
736; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
737; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
738; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
739; AVX1-NEXT: vzeroupper
740; AVX1-NEXT: retq
741;
742; AVX2-LABEL: trunc_add_const_v16i32_v16i8:
743; AVX2: # BB#0:
744; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
745; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm1, %ymm1
746; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
747; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
748; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
749; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
750; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
751; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
752; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
753; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
754; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
755; AVX2-NEXT: vzeroupper
756; AVX2-NEXT: retq
757;
758; AVX512-LABEL: trunc_add_const_v16i32_v16i8:
759; AVX512: # BB#0:
760; AVX512-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
761; AVX512-NEXT: vpmovdb %zmm0, %xmm0
762; AVX512-NEXT: retq
763 %1 = add <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
764 %2 = trunc <16 x i32> %1 to <16 x i8>
765 ret <16 x i8> %2
766}
767
768define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
769; SSE-LABEL: trunc_add_const_v16i16_v16i8:
770; SSE: # BB#0:
771; SSE-NEXT: paddw {{.*}}(%rip), %xmm0
772; SSE-NEXT: paddw {{.*}}(%rip), %xmm1
773; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
774; SSE-NEXT: pand %xmm2, %xmm1
775; SSE-NEXT: pand %xmm2, %xmm0
776; SSE-NEXT: packuswb %xmm1, %xmm0
777; SSE-NEXT: retq
778;
779; AVX1-LABEL: trunc_add_const_v16i16_v16i8:
780; AVX1: # BB#0:
781; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm1
782; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
783; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
784; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
785; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
786; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
787; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
788; AVX1-NEXT: vzeroupper
789; AVX1-NEXT: retq
790;
791; AVX2-LABEL: trunc_add_const_v16i16_v16i8:
792; AVX2: # BB#0:
793; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
794; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
795; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
796; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
797; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
798; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
799; AVX2-NEXT: vzeroupper
800; AVX2-NEXT: retq
801;
802; AVX512F-LABEL: trunc_add_const_v16i16_v16i8:
803; AVX512F: # BB#0:
804; AVX512F-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
805; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
806; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
807; AVX512F-NEXT: retq
808;
809; AVX512BW-LABEL: trunc_add_const_v16i16_v16i8:
810; AVX512BW: # BB#0:
811; AVX512BW-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
812; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000813; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000814; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +0000815;
816; AVX512DQ-LABEL: trunc_add_const_v16i16_v16i8:
817; AVX512DQ: # BB#0:
818; AVX512DQ-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
819; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
820; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
821; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000822 %1 = add <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
823 %2 = trunc <16 x i16> %1 to <16 x i8>
824 ret <16 x i8> %2
825}
826
827;
828; sub
829;
830
Simon Pilgrim85af9732016-12-30 22:40:32 +0000831define <4 x i32> @trunc_sub_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
832; SSE-LABEL: trunc_sub_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000833; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000834; SSE-NEXT: psubq %xmm3, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +0000835; SSE-NEXT: psubq %xmm2, %xmm0
836; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000837; SSE-NEXT: retq
838;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000839; AVX1-LABEL: trunc_sub_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000840; AVX1: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +0000841; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
842; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
843; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000844; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +0000845; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000846; AVX1-NEXT: vzeroupper
847; AVX1-NEXT: retq
848;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000849; AVX2-LABEL: trunc_sub_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000850; AVX2: # BB#0:
851; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000852; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
853; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000854; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000855; AVX2-NEXT: vzeroupper
856; AVX2-NEXT: retq
857;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000858; AVX512-LABEL: trunc_sub_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000859; AVX512: # BB#0:
860; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
861; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000862; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000863; AVX512-NEXT: retq
864 %1 = sub <4 x i64> %a0, %a1
865 %2 = trunc <4 x i64> %1 to <4 x i32>
866 ret <4 x i32> %2
867}
868
Simon Pilgrim85af9732016-12-30 22:40:32 +0000869define <8 x i16> @trunc_sub_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
870; SSE-LABEL: trunc_sub_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000871; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000872; SSE-NEXT: psubq %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000873; SSE-NEXT: psubq %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +0000874; SSE-NEXT: psubq %xmm6, %xmm2
875; SSE-NEXT: psubq %xmm7, %xmm3
876; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
877; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
878; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
879; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
880; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
881; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
882; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
883; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
884; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
885; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
886; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
887; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000888; SSE-NEXT: retq
889;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000890; AVX1-LABEL: trunc_sub_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000891; AVX1: # BB#0:
892; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm4
893; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
894; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
895; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
896; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm2
897; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
898; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
899; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1
900; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
901; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
902; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
903; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
904; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
905; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm3[1,2,3],xmm4[4],xmm3[5,6,7]
906; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
907; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
908; AVX1-NEXT: vzeroupper
909; AVX1-NEXT: retq
910;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000911; AVX2-LABEL: trunc_sub_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000912; AVX2: # BB#0:
913; AVX2-NEXT: vpsubq %ymm3, %ymm1, %ymm1
914; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +0000915; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
916; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
917; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
918; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000919; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
920; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
921; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000922; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000923; AVX2-NEXT: vzeroupper
924; AVX2-NEXT: retq
925;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000926; AVX512-LABEL: trunc_sub_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000927; AVX512: # BB#0:
928; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0
929; AVX512-NEXT: vpmovqw %zmm0, %xmm0
930; AVX512-NEXT: retq
931 %1 = sub <8 x i64> %a0, %a1
932 %2 = trunc <8 x i64> %1 to <8 x i16>
933 ret <8 x i16> %2
934}
935
Simon Pilgrim85af9732016-12-30 22:40:32 +0000936define <8 x i16> @trunc_sub_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
937; SSE-LABEL: trunc_sub_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000938; SSE: # BB#0:
939; SSE-NEXT: psubd %xmm2, %xmm0
940; SSE-NEXT: psubd %xmm3, %xmm1
941; SSE-NEXT: pslld $16, %xmm1
942; SSE-NEXT: psrad $16, %xmm1
943; SSE-NEXT: pslld $16, %xmm0
944; SSE-NEXT: psrad $16, %xmm0
945; SSE-NEXT: packssdw %xmm1, %xmm0
946; SSE-NEXT: retq
947;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000948; AVX1-LABEL: trunc_sub_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000949; AVX1: # BB#0:
950; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2
951; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
952; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
953; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
954; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
955; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
956; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
957; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
958; AVX1-NEXT: vzeroupper
959; AVX1-NEXT: retq
960;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000961; AVX2-LABEL: trunc_sub_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000962; AVX2: # BB#0:
963; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
964; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
965; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +0000966; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000967; AVX2-NEXT: vzeroupper
968; AVX2-NEXT: retq
969;
Simon Pilgrim85af9732016-12-30 22:40:32 +0000970; AVX512-LABEL: trunc_sub_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000971; AVX512: # BB#0:
972; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
973; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +0000974; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +0000975; AVX512-NEXT: retq
976 %1 = sub <8 x i32> %a0, %a1
977 %2 = trunc <8 x i32> %1 to <8 x i16>
978 ret <8 x i16> %2
979}
980
981define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
982; SSE-LABEL: trunc_sub_v16i64_v16i8:
983; SSE: # BB#0:
984; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm0
985; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm1
986; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm2
987; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm3
988; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm4
989; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm5
990; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm6
991; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm7
992; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
993; SSE-NEXT: pand %xmm8, %xmm7
994; SSE-NEXT: pand %xmm8, %xmm6
995; SSE-NEXT: packuswb %xmm7, %xmm6
996; SSE-NEXT: pand %xmm8, %xmm5
997; SSE-NEXT: pand %xmm8, %xmm4
998; SSE-NEXT: packuswb %xmm5, %xmm4
999; SSE-NEXT: packuswb %xmm6, %xmm4
1000; SSE-NEXT: pand %xmm8, %xmm3
1001; SSE-NEXT: pand %xmm8, %xmm2
1002; SSE-NEXT: packuswb %xmm3, %xmm2
1003; SSE-NEXT: pand %xmm8, %xmm1
1004; SSE-NEXT: pand %xmm8, %xmm0
1005; SSE-NEXT: packuswb %xmm1, %xmm0
1006; SSE-NEXT: packuswb %xmm2, %xmm0
1007; SSE-NEXT: packuswb %xmm4, %xmm0
1008; SSE-NEXT: retq
1009;
1010; AVX1-LABEL: trunc_sub_v16i64_v16i8:
1011; AVX1: # BB#0:
1012; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8
1013; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
1014; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1015; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0
1016; AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm4
1017; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
1018; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1019; AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1
1020; AVX1-NEXT: vpsubq %xmm6, %xmm2, %xmm5
1021; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
1022; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1023; AVX1-NEXT: vpsubq %xmm6, %xmm2, %xmm2
1024; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm6
1025; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
1026; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1027; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm3
1028; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1029; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
1030; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
1031; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3
1032; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2
1033; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5
1034; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2
1035; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
1036; AVX1-NEXT: vpand %xmm7, %xmm1, %xmm1
1037; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm3
1038; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
1039; AVX1-NEXT: vpand %xmm7, %xmm0, %xmm0
1040; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm3
1041; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0
1042; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1043; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1044; AVX1-NEXT: vzeroupper
1045; AVX1-NEXT: retq
1046;
1047; AVX2-LABEL: trunc_sub_v16i64_v16i8:
1048; AVX2: # BB#0:
1049; AVX2-NEXT: vpsubq %ymm5, %ymm1, %ymm1
1050; AVX2-NEXT: vpsubq %ymm4, %ymm0, %ymm0
1051; AVX2-NEXT: vpsubq %ymm7, %ymm3, %ymm3
1052; AVX2-NEXT: vpsubq %ymm6, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001053; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
1054; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1055; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
1056; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001057; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1058; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
1059; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1060; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1061; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1062; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001063; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1064; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1065; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1066; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001067; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1068; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1069; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1070; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
1071; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1072; AVX2-NEXT: vzeroupper
1073; AVX2-NEXT: retq
1074;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001075; AVX512F-LABEL: trunc_sub_v16i64_v16i8:
1076; AVX512F: # BB#0:
1077; AVX512F-NEXT: vpsubq %zmm3, %zmm1, %zmm1
1078; AVX512F-NEXT: vpsubq %zmm2, %zmm0, %zmm0
1079; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
1080; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
1081; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1082; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1083; AVX512F-NEXT: retq
1084;
1085; AVX512BW-LABEL: trunc_sub_v16i64_v16i8:
1086; AVX512BW: # BB#0:
1087; AVX512BW-NEXT: vpsubq %zmm3, %zmm1, %zmm1
1088; AVX512BW-NEXT: vpsubq %zmm2, %zmm0, %zmm0
1089; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
1090; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
1091; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1092; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
1093; AVX512BW-NEXT: retq
1094;
1095; AVX512DQ-LABEL: trunc_sub_v16i64_v16i8:
1096; AVX512DQ: # BB#0:
1097; AVX512DQ-NEXT: vpsubq %zmm3, %zmm1, %zmm1
1098; AVX512DQ-NEXT: vpsubq %zmm2, %zmm0, %zmm0
1099; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
1100; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
1101; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
1102; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1103; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001104 %1 = sub <16 x i64> %a0, %a1
1105 %2 = trunc <16 x i64> %1 to <16 x i8>
1106 ret <16 x i8> %2
1107}
1108
1109define <16 x i8> @trunc_sub_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
1110; SSE-LABEL: trunc_sub_v16i32_v16i8:
1111; SSE: # BB#0:
1112; SSE-NEXT: psubd %xmm4, %xmm0
1113; SSE-NEXT: psubd %xmm5, %xmm1
1114; SSE-NEXT: psubd %xmm6, %xmm2
1115; SSE-NEXT: psubd %xmm7, %xmm3
1116; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1117; SSE-NEXT: pand %xmm4, %xmm3
1118; SSE-NEXT: pand %xmm4, %xmm2
1119; SSE-NEXT: packuswb %xmm3, %xmm2
1120; SSE-NEXT: pand %xmm4, %xmm1
1121; SSE-NEXT: pand %xmm4, %xmm0
1122; SSE-NEXT: packuswb %xmm1, %xmm0
1123; SSE-NEXT: packuswb %xmm2, %xmm0
1124; SSE-NEXT: retq
1125;
1126; AVX1-LABEL: trunc_sub_v16i32_v16i8:
1127; AVX1: # BB#0:
1128; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm4
1129; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1130; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1131; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0
1132; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm2
1133; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1134; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1135; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1
1136; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1137; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
1138; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
1139; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
1140; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1141; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2
1142; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
1143; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1144; AVX1-NEXT: vzeroupper
1145; AVX1-NEXT: retq
1146;
1147; AVX2-LABEL: trunc_sub_v16i32_v16i8:
1148; AVX2: # BB#0:
1149; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
1150; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1
1151; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
1152; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1153; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1154; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1155; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1156; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
1157; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1158; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1159; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1160; AVX2-NEXT: vzeroupper
1161; AVX2-NEXT: retq
1162;
1163; AVX512-LABEL: trunc_sub_v16i32_v16i8:
1164; AVX512: # BB#0:
1165; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0
1166; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1167; AVX512-NEXT: retq
1168 %1 = sub <16 x i32> %a0, %a1
1169 %2 = trunc <16 x i32> %1 to <16 x i8>
1170 ret <16 x i8> %2
1171}
1172
1173define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
1174; SSE-LABEL: trunc_sub_v16i16_v16i8:
1175; SSE: # BB#0:
1176; SSE-NEXT: psubw %xmm2, %xmm0
1177; SSE-NEXT: psubw %xmm3, %xmm1
1178; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1179; SSE-NEXT: pand %xmm2, %xmm1
1180; SSE-NEXT: pand %xmm2, %xmm0
1181; SSE-NEXT: packuswb %xmm1, %xmm0
1182; SSE-NEXT: retq
1183;
1184; AVX1-LABEL: trunc_sub_v16i16_v16i8:
1185; AVX1: # BB#0:
1186; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2
1187; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1188; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1189; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
1190; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1191; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1192; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
1193; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1194; AVX1-NEXT: vzeroupper
1195; AVX1-NEXT: retq
1196;
1197; AVX2-LABEL: trunc_sub_v16i16_v16i8:
1198; AVX2: # BB#0:
1199; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0
1200; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1201; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1202; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1203; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1204; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1205; AVX2-NEXT: vzeroupper
1206; AVX2-NEXT: retq
1207;
1208; AVX512F-LABEL: trunc_sub_v16i16_v16i8:
1209; AVX512F: # BB#0:
1210; AVX512F-NEXT: vpsubw %ymm1, %ymm0, %ymm0
1211; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1212; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1213; AVX512F-NEXT: retq
1214;
1215; AVX512BW-LABEL: trunc_sub_v16i16_v16i8:
1216; AVX512BW: # BB#0:
1217; AVX512BW-NEXT: vpsubw %ymm1, %ymm0, %ymm0
1218; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00001219; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001220; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001221;
1222; AVX512DQ-LABEL: trunc_sub_v16i16_v16i8:
1223; AVX512DQ: # BB#0:
1224; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm0
1225; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
1226; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1227; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001228 %1 = sub <16 x i16> %a0, %a1
1229 %2 = trunc <16 x i16> %1 to <16 x i8>
1230 ret <16 x i8> %2
1231}
1232
1233;
1234; sub to constant
1235;
1236
Simon Pilgrim85af9732016-12-30 22:40:32 +00001237define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
1238; SSE-LABEL: trunc_sub_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001239; SSE: # BB#0:
1240; SSE-NEXT: movl $1, %eax
1241; SSE-NEXT: movd %rax, %xmm2
1242; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
1243; SSE-NEXT: psubq %xmm2, %xmm0
1244; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00001245; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001246; SSE-NEXT: retq
1247;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001248; AVX1-LABEL: trunc_sub_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001249; AVX1: # BB#0:
1250; AVX1-NEXT: movl $1, %eax
1251; AVX1-NEXT: vmovq %rax, %xmm1
1252; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
1253; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1
1254; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1255; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00001256; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001257; AVX1-NEXT: vzeroupper
1258; AVX1-NEXT: retq
1259;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001260; AVX2-LABEL: trunc_sub_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001261; AVX2: # BB#0:
1262; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001263; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1264; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001265; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001266; AVX2-NEXT: vzeroupper
1267; AVX2-NEXT: retq
1268;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001269; AVX512-LABEL: trunc_sub_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001270; AVX512: # BB#0:
1271; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
1272; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00001273; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001274; AVX512-NEXT: retq
1275 %1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
1276 %2 = trunc <4 x i64> %1 to <4 x i32>
1277 ret <4 x i32> %2
1278}
1279
Simon Pilgrim85af9732016-12-30 22:40:32 +00001280define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
1281; SSE-LABEL: trunc_sub_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001282; SSE: # BB#0:
1283; SSE-NEXT: movl $1, %eax
1284; SSE-NEXT: movd %rax, %xmm4
1285; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
1286; SSE-NEXT: psubq %xmm4, %xmm0
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001287; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001288; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
1289; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001290; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1291; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
1292; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1293; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
1294; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1295; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1296; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1297; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1298; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1299; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1300; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
1301; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001302; SSE-NEXT: retq
1303;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001304; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001305; AVX1: # BB#0:
1306; AVX1-NEXT: movl $1, %eax
1307; AVX1-NEXT: vmovq %rax, %xmm2
1308; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
1309; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2
1310; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1311; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
1312; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm3
1313; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1314; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
1315; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1316; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
1317; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
1318; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
1319; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
1320; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
1321; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
1322; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1323; AVX1-NEXT: vzeroupper
1324; AVX1-NEXT: retq
1325;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001326; AVX2-LABEL: trunc_sub_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001327; AVX2: # BB#0:
1328; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
1329; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001330; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1331; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1332; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1333; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001334; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1335; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
1336; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001337; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001338; AVX2-NEXT: vzeroupper
1339; AVX2-NEXT: retq
1340;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001341; AVX512-LABEL: trunc_sub_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001342; AVX512: # BB#0:
1343; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
1344; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1345; AVX512-NEXT: retq
1346 %1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1347 %2 = trunc <8 x i64> %1 to <8 x i16>
1348 ret <8 x i16> %2
1349}
1350
Simon Pilgrim85af9732016-12-30 22:40:32 +00001351define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
1352; SSE-LABEL: trunc_sub_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001353; SSE: # BB#0:
1354; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
1355; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
1356; SSE-NEXT: pslld $16, %xmm1
1357; SSE-NEXT: psrad $16, %xmm1
1358; SSE-NEXT: pslld $16, %xmm0
1359; SSE-NEXT: psrad $16, %xmm0
1360; SSE-NEXT: packssdw %xmm1, %xmm0
1361; SSE-NEXT: retq
1362;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001363; AVX1-LABEL: trunc_sub_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001364; AVX1: # BB#0:
1365; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm1
1366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1367; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
1368; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1369; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1370; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1371; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1372; AVX1-NEXT: vzeroupper
1373; AVX1-NEXT: retq
1374;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001375; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001376; AVX2: # BB#0:
1377; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
1378; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
1379; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001380; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001381; AVX2-NEXT: vzeroupper
1382; AVX2-NEXT: retq
1383;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001384; AVX512-LABEL: trunc_sub_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001385; AVX512: # BB#0:
1386; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
1387; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00001388; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001389; AVX512-NEXT: retq
1390 %1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1391 %2 = trunc <8 x i32> %1 to <8 x i16>
1392 ret <8 x i16> %2
1393}
1394
1395define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
1396; SSE-LABEL: trunc_sub_const_v16i64_v16i8:
1397; SSE: # BB#0:
1398; SSE-NEXT: movl $1, %eax
1399; SSE-NEXT: movd %rax, %xmm8
1400; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
1401; SSE-NEXT: psubq %xmm8, %xmm0
1402; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
1403; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
1404; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
1405; SSE-NEXT: psubq {{.*}}(%rip), %xmm4
1406; SSE-NEXT: psubq {{.*}}(%rip), %xmm5
1407; SSE-NEXT: psubq {{.*}}(%rip), %xmm6
1408; SSE-NEXT: psubq {{.*}}(%rip), %xmm7
1409; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1410; SSE-NEXT: pand %xmm8, %xmm7
1411; SSE-NEXT: pand %xmm8, %xmm6
1412; SSE-NEXT: packuswb %xmm7, %xmm6
1413; SSE-NEXT: pand %xmm8, %xmm5
1414; SSE-NEXT: pand %xmm8, %xmm4
1415; SSE-NEXT: packuswb %xmm5, %xmm4
1416; SSE-NEXT: packuswb %xmm6, %xmm4
1417; SSE-NEXT: pand %xmm8, %xmm3
1418; SSE-NEXT: pand %xmm8, %xmm2
1419; SSE-NEXT: packuswb %xmm3, %xmm2
1420; SSE-NEXT: pand %xmm8, %xmm1
1421; SSE-NEXT: pand %xmm8, %xmm0
1422; SSE-NEXT: packuswb %xmm1, %xmm0
1423; SSE-NEXT: packuswb %xmm2, %xmm0
1424; SSE-NEXT: packuswb %xmm4, %xmm0
1425; SSE-NEXT: retq
1426;
1427; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
1428; AVX1: # BB#0:
1429; AVX1-NEXT: movl $1, %eax
1430; AVX1-NEXT: vmovq %rax, %xmm4
1431; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
1432; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8
1433; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1434; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
1435; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm5
1436; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1437; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
1438; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm6
1439; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1440; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm2
1441; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm7
1442; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1443; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3
1444; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1445; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
1446; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
1447; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3
1448; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
1449; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
1450; AVX1-NEXT: vpackuswb %xmm2, %xmm6, %xmm2
1451; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
1452; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
1453; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
1454; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
1455; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
1456; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3
1457; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0
1458; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1459; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1460; AVX1-NEXT: vzeroupper
1461; AVX1-NEXT: retq
1462;
1463; AVX2-LABEL: trunc_sub_const_v16i64_v16i8:
1464; AVX2: # BB#0:
1465; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
1466; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
1467; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3
1468; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001469; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
1470; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1471; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
1472; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001473; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1474; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
1475; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1476; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1477; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1478; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001479; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1480; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1481; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1482; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001483; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1484; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1485; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1486; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
1487; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1488; AVX2-NEXT: vzeroupper
1489; AVX2-NEXT: retq
1490;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001491; AVX512F-LABEL: trunc_sub_const_v16i64_v16i8:
1492; AVX512F: # BB#0:
1493; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
1494; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
1495; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
1496; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
1497; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1498; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1499; AVX512F-NEXT: retq
1500;
1501; AVX512BW-LABEL: trunc_sub_const_v16i64_v16i8:
1502; AVX512BW: # BB#0:
1503; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
1504; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
1505; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
1506; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
1507; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1508; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
1509; AVX512BW-NEXT: retq
1510;
1511; AVX512DQ-LABEL: trunc_sub_const_v16i64_v16i8:
1512; AVX512DQ: # BB#0:
1513; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
1514; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
1515; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
1516; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
1517; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
1518; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1519; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001520 %1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
1521 %2 = trunc <16 x i64> %1 to <16 x i8>
1522 ret <16 x i8> %2
1523}
1524
1525define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
1526; SSE-LABEL: trunc_sub_const_v16i32_v16i8:
1527; SSE: # BB#0:
1528; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
1529; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
1530; SSE-NEXT: psubd {{.*}}(%rip), %xmm2
1531; SSE-NEXT: psubd {{.*}}(%rip), %xmm3
1532; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1533; SSE-NEXT: pand %xmm4, %xmm3
1534; SSE-NEXT: pand %xmm4, %xmm2
1535; SSE-NEXT: packuswb %xmm3, %xmm2
1536; SSE-NEXT: pand %xmm4, %xmm1
1537; SSE-NEXT: pand %xmm4, %xmm0
1538; SSE-NEXT: packuswb %xmm1, %xmm0
1539; SSE-NEXT: packuswb %xmm2, %xmm0
1540; SSE-NEXT: retq
1541;
1542; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
1543; AVX1: # BB#0:
1544; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm2
1545; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1546; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
1547; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm3
1548; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1549; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm1
1550; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1551; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
1552; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
1553; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
1554; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
1555; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
1556; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
1557; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1558; AVX1-NEXT: vzeroupper
1559; AVX1-NEXT: retq
1560;
1561; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
1562; AVX2: # BB#0:
1563; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
1564; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm1, %ymm1
1565; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
1566; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1567; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1568; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1569; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1570; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
1571; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1572; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1573; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1574; AVX2-NEXT: vzeroupper
1575; AVX2-NEXT: retq
1576;
1577; AVX512-LABEL: trunc_sub_const_v16i32_v16i8:
1578; AVX512: # BB#0:
1579; AVX512-NEXT: vpsubd {{.*}}(%rip), %zmm0, %zmm0
1580; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1581; AVX512-NEXT: retq
1582 %1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1583 %2 = trunc <16 x i32> %1 to <16 x i8>
1584 ret <16 x i8> %2
1585}
1586
1587define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
1588; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
1589; SSE: # BB#0:
1590; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
1591; SSE-NEXT: psubw {{.*}}(%rip), %xmm1
1592; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1593; SSE-NEXT: pand %xmm2, %xmm1
1594; SSE-NEXT: pand %xmm2, %xmm0
1595; SSE-NEXT: packuswb %xmm1, %xmm0
1596; SSE-NEXT: retq
1597;
1598; AVX1-LABEL: trunc_sub_const_v16i16_v16i8:
1599; AVX1: # BB#0:
1600; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm1
1601; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1602; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1603; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1604; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1605; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1606; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1607; AVX1-NEXT: vzeroupper
1608; AVX1-NEXT: retq
1609;
1610; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
1611; AVX2: # BB#0:
1612; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
1613; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1614; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1615; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1616; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1617; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1618; AVX2-NEXT: vzeroupper
1619; AVX2-NEXT: retq
1620;
1621; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8:
1622; AVX512F: # BB#0:
1623; AVX512F-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
1624; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
1625; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1626; AVX512F-NEXT: retq
1627;
1628; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8:
1629; AVX512BW: # BB#0:
1630; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
1631; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00001632; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001633; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001634;
1635; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8:
1636; AVX512DQ: # BB#0:
1637; AVX512DQ-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
1638; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
1639; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1640; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001641 %1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1642 %2 = trunc <16 x i16> %1 to <16 x i8>
1643 ret <16 x i8> %2
1644}
1645
1646;
1647; mul
1648;
1649
Simon Pilgrim85af9732016-12-30 22:40:32 +00001650define <4 x i32> @trunc_mul_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
1651; SSE-LABEL: trunc_mul_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001652; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00001653; SSE-NEXT: movdqa %xmm1, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001654; SSE-NEXT: psrlq $32, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00001655; SSE-NEXT: pmuludq %xmm3, %xmm4
1656; SSE-NEXT: movdqa %xmm3, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001657; SSE-NEXT: psrlq $32, %xmm5
Sanjay Patela0d8a272016-12-15 18:03:38 +00001658; SSE-NEXT: pmuludq %xmm1, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001659; SSE-NEXT: paddq %xmm4, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001660; SSE-NEXT: psllq $32, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001661; SSE-NEXT: pmuludq %xmm3, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00001662; SSE-NEXT: paddq %xmm5, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00001663; SSE-NEXT: movdqa %xmm0, %xmm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001664; SSE-NEXT: psrlq $32, %xmm3
Sanjay Patela0d8a272016-12-15 18:03:38 +00001665; SSE-NEXT: pmuludq %xmm2, %xmm3
1666; SSE-NEXT: movdqa %xmm2, %xmm4
1667; SSE-NEXT: psrlq $32, %xmm4
1668; SSE-NEXT: pmuludq %xmm0, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001669; SSE-NEXT: paddq %xmm3, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00001670; SSE-NEXT: psllq $32, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00001671; SSE-NEXT: pmuludq %xmm2, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00001672; SSE-NEXT: paddq %xmm4, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00001673; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001674; SSE-NEXT: retq
1675;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001676; AVX1-LABEL: trunc_mul_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001677; AVX1: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00001678; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1679; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001680; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm4
1681; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00001682; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm5
1683; AVX1-NEXT: vpmuludq %xmm5, %xmm3, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001684; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4
1685; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00001686; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001687; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
1688; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
1689; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001690; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4
1691; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001692; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
1693; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001694; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001695; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00001696; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001697; AVX1-NEXT: vzeroupper
1698; AVX1-NEXT: retq
1699;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001700; AVX2-LABEL: trunc_mul_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001701; AVX2: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001702; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
1703; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001704; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
1705; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001706; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
1707; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001708; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001709; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001710; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1711; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001712; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001713; AVX2-NEXT: vzeroupper
1714; AVX2-NEXT: retq
1715;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001716; AVX512F-LABEL: trunc_mul_v4i64_v4i32:
1717; AVX512F: # BB#0:
1718; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm2
1719; AVX512F-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
1720; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm3
1721; AVX512F-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
1722; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2
1723; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
1724; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
1725; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0
1726; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
1727; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1728; AVX512F-NEXT: retq
1729;
1730; AVX512BW-LABEL: trunc_mul_v4i64_v4i32:
1731; AVX512BW: # BB#0:
1732; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
1733; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
1734; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
1735; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
1736; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
1737; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
1738; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
1739; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
1740; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
1741; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1742; AVX512BW-NEXT: retq
1743;
1744; AVX512DQ-LABEL: trunc_mul_v4i64_v4i32:
1745; AVX512DQ: # BB#0:
1746; AVX512DQ-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
1747; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1748; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
1749; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
1750; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1751; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001752 %1 = mul <4 x i64> %a0, %a1
1753 %2 = trunc <4 x i64> %1 to <4 x i32>
1754 ret <4 x i32> %2
1755}
1756
Simon Pilgrim85af9732016-12-30 22:40:32 +00001757define <8 x i16> @trunc_mul_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
1758; SSE-LABEL: trunc_mul_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001759; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001760; SSE-NEXT: movdqa %xmm0, %xmm8
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001761; SSE-NEXT: psrlq $32, %xmm8
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001762; SSE-NEXT: pmuludq %xmm4, %xmm8
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001763; SSE-NEXT: movdqa %xmm4, %xmm9
1764; SSE-NEXT: psrlq $32, %xmm9
1765; SSE-NEXT: pmuludq %xmm0, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001766; SSE-NEXT: paddq %xmm8, %xmm9
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001767; SSE-NEXT: psllq $32, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001768; SSE-NEXT: pmuludq %xmm4, %xmm0
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001769; SSE-NEXT: paddq %xmm9, %xmm0
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001770; SSE-NEXT: movdqa %xmm1, %xmm8
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001771; SSE-NEXT: psrlq $32, %xmm8
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001772; SSE-NEXT: pmuludq %xmm5, %xmm8
1773; SSE-NEXT: movdqa %xmm5, %xmm4
1774; SSE-NEXT: psrlq $32, %xmm4
1775; SSE-NEXT: pmuludq %xmm1, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001776; SSE-NEXT: paddq %xmm8, %xmm4
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001777; SSE-NEXT: psllq $32, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001778; SSE-NEXT: pmuludq %xmm5, %xmm1
Craig Topper1af6cc02016-07-18 06:14:54 +00001779; SSE-NEXT: paddq %xmm4, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001780; SSE-NEXT: movdqa %xmm2, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001781; SSE-NEXT: psrlq $32, %xmm4
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001782; SSE-NEXT: pmuludq %xmm6, %xmm4
1783; SSE-NEXT: movdqa %xmm6, %xmm5
1784; SSE-NEXT: psrlq $32, %xmm5
1785; SSE-NEXT: pmuludq %xmm2, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001786; SSE-NEXT: paddq %xmm4, %xmm5
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001787; SSE-NEXT: psllq $32, %xmm5
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001788; SSE-NEXT: pmuludq %xmm6, %xmm2
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001789; SSE-NEXT: paddq %xmm5, %xmm2
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001790; SSE-NEXT: movdqa %xmm3, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001791; SSE-NEXT: psrlq $32, %xmm4
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001792; SSE-NEXT: pmuludq %xmm7, %xmm4
1793; SSE-NEXT: movdqa %xmm7, %xmm5
1794; SSE-NEXT: psrlq $32, %xmm5
1795; SSE-NEXT: pmuludq %xmm3, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001796; SSE-NEXT: paddq %xmm4, %xmm5
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001797; SSE-NEXT: psllq $32, %xmm5
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001798; SSE-NEXT: pmuludq %xmm7, %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001799; SSE-NEXT: paddq %xmm5, %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00001800; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1801; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
1802; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1803; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
1804; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1805; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1806; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
1807; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1808; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1809; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1810; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
1811; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001812; SSE-NEXT: retq
1813;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001814; AVX1-LABEL: trunc_mul_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001815; AVX1: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001816; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4
1817; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001818; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm5
1819; AVX1-NEXT: vpmuludq %xmm5, %xmm0, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001820; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4
1821; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
1822; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm5
1823; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001824; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
1825; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001826; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm5
1827; AVX1-NEXT: vpmuludq %xmm2, %xmm5, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001828; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm6
1829; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm6
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001830; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
1831; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001832; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001833; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm0
1834; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm2
1835; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001836; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5
1837; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001838; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
1839; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
1840; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm5
1841; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001842; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1843; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001844; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5
1845; AVX1-NEXT: vpmuludq %xmm3, %xmm5, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001846; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm6
1847; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm6
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001848; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
1849; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001850; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001851; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001852; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1853; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
1854; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
1855; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
1856; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
1857; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm3[1,2,3],xmm4[4],xmm3[5,6,7]
1858; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
1859; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1860; AVX1-NEXT: vzeroupper
1861; AVX1-NEXT: retq
1862;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001863; AVX2-LABEL: trunc_mul_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001864; AVX2: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001865; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm4
1866; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001867; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm5
1868; AVX2-NEXT: vpmuludq %ymm5, %ymm1, %ymm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001869; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4
1870; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001871; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001872; AVX2-NEXT: vpaddq %ymm4, %ymm1, %ymm1
1873; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
1874; AVX2-NEXT: vpmuludq %ymm2, %ymm3, %ymm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001875; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm4
1876; AVX2-NEXT: vpmuludq %ymm4, %ymm0, %ymm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001877; AVX2-NEXT: vpaddq %ymm3, %ymm4, %ymm3
1878; AVX2-NEXT: vpsllq $32, %ymm3, %ymm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001879; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001880; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00001881; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
1882; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1883; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
1884; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001885; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1886; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
1887; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001888; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001889; AVX2-NEXT: vzeroupper
1890; AVX2-NEXT: retq
1891;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00001892; AVX512F-LABEL: trunc_mul_v8i64_v8i16:
1893; AVX512F: # BB#0:
1894; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm2
1895; AVX512F-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
1896; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm3
1897; AVX512F-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
1898; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2
1899; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2
1900; AVX512F-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
1901; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
1902; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
1903; AVX512F-NEXT: retq
1904;
1905; AVX512BW-LABEL: trunc_mul_v8i64_v8i16:
1906; AVX512BW: # BB#0:
1907; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
1908; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
1909; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
1910; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
1911; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
1912; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
1913; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
1914; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
1915; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
1916; AVX512BW-NEXT: retq
1917;
1918; AVX512DQ-LABEL: trunc_mul_v8i64_v8i16:
1919; AVX512DQ: # BB#0:
1920; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
1921; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0
1922; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001923 %1 = mul <8 x i64> %a0, %a1
1924 %2 = trunc <8 x i64> %1 to <8 x i16>
1925 ret <8 x i16> %2
1926}
1927
Simon Pilgrim85af9732016-12-30 22:40:32 +00001928define <8 x i16> @trunc_mul_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
1929; SSE-LABEL: trunc_mul_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001930; SSE: # BB#0:
1931; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1932; SSE-NEXT: pmuludq %xmm2, %xmm0
1933; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1934; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1935; SSE-NEXT: pmuludq %xmm4, %xmm2
1936; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1937; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1938; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1939; SSE-NEXT: pmuludq %xmm3, %xmm1
1940; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1941; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1942; SSE-NEXT: pmuludq %xmm2, %xmm3
1943; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
1944; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1945; SSE-NEXT: pslld $16, %xmm1
1946; SSE-NEXT: psrad $16, %xmm1
1947; SSE-NEXT: pslld $16, %xmm0
1948; SSE-NEXT: psrad $16, %xmm0
1949; SSE-NEXT: packssdw %xmm1, %xmm0
1950; SSE-NEXT: retq
1951;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001952; AVX1-LABEL: trunc_mul_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001953; AVX1: # BB#0:
1954; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm2
1955; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1956; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1957; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1958; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1959; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1960; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
1961; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1962; AVX1-NEXT: vzeroupper
1963; AVX1-NEXT: retq
1964;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001965; AVX2-LABEL: trunc_mul_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001966; AVX2: # BB#0:
1967; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
1968; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
1969; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00001970; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001971; AVX2-NEXT: vzeroupper
1972; AVX2-NEXT: retq
1973;
Simon Pilgrim85af9732016-12-30 22:40:32 +00001974; AVX512-LABEL: trunc_mul_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001975; AVX512: # BB#0:
1976; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
1977; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00001978; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001979; AVX512-NEXT: retq
1980 %1 = mul <8 x i32> %a0, %a1
1981 %2 = trunc <8 x i32> %1 to <8 x i16>
1982 ret <8 x i16> %2
1983}
1984
1985define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
1986; SSE-LABEL: trunc_mul_v16i64_v16i8:
1987; SSE: # BB#0:
1988; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
1989; SSE-NEXT: movdqa %xmm0, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001990; SSE-NEXT: psrlq $32, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00001991; SSE-NEXT: pmuludq %xmm8, %xmm9
1992; SSE-NEXT: movdqa %xmm8, %xmm10
1993; SSE-NEXT: psrlq $32, %xmm10
1994; SSE-NEXT: pmuludq %xmm0, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001995; SSE-NEXT: paddq %xmm9, %xmm10
1996; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Craig Topper1af6cc02016-07-18 06:14:54 +00001997; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00001998; SSE-NEXT: pmuludq %xmm8, %xmm0
Craig Topper1af6cc02016-07-18 06:14:54 +00001999; SSE-NEXT: paddq %xmm10, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002000; SSE-NEXT: movdqa %xmm1, %xmm8
2001; SSE-NEXT: psrlq $32, %xmm8
2002; SSE-NEXT: pmuludq %xmm9, %xmm8
2003; SSE-NEXT: movdqa %xmm9, %xmm10
Craig Topper1af6cc02016-07-18 06:14:54 +00002004; SSE-NEXT: psrlq $32, %xmm10
2005; SSE-NEXT: pmuludq %xmm1, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002006; SSE-NEXT: paddq %xmm8, %xmm10
Craig Topper1af6cc02016-07-18 06:14:54 +00002007; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
2008; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002009; SSE-NEXT: pmuludq %xmm9, %xmm1
Craig Topper1af6cc02016-07-18 06:14:54 +00002010; SSE-NEXT: paddq %xmm10, %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002011; SSE-NEXT: movdqa %xmm2, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002012; SSE-NEXT: psrlq $32, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002013; SSE-NEXT: pmuludq %xmm8, %xmm9
2014; SSE-NEXT: movdqa %xmm8, %xmm10
2015; SSE-NEXT: psrlq $32, %xmm10
2016; SSE-NEXT: pmuludq %xmm2, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002017; SSE-NEXT: paddq %xmm9, %xmm10
2018; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Craig Topper1af6cc02016-07-18 06:14:54 +00002019; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002020; SSE-NEXT: pmuludq %xmm8, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002021; SSE-NEXT: paddq %xmm10, %xmm2
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002022; SSE-NEXT: movdqa %xmm3, %xmm8
2023; SSE-NEXT: psrlq $32, %xmm8
2024; SSE-NEXT: pmuludq %xmm9, %xmm8
2025; SSE-NEXT: movdqa %xmm9, %xmm10
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002026; SSE-NEXT: psrlq $32, %xmm10
2027; SSE-NEXT: pmuludq %xmm3, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002028; SSE-NEXT: paddq %xmm8, %xmm10
Craig Topper1af6cc02016-07-18 06:14:54 +00002029; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
2030; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002031; SSE-NEXT: pmuludq %xmm9, %xmm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002032; SSE-NEXT: paddq %xmm10, %xmm3
2033; SSE-NEXT: movdqa %xmm4, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002034; SSE-NEXT: psrlq $32, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002035; SSE-NEXT: pmuludq %xmm8, %xmm9
2036; SSE-NEXT: movdqa %xmm8, %xmm10
2037; SSE-NEXT: psrlq $32, %xmm10
2038; SSE-NEXT: pmuludq %xmm4, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002039; SSE-NEXT: paddq %xmm9, %xmm10
2040; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Craig Topper1af6cc02016-07-18 06:14:54 +00002041; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002042; SSE-NEXT: pmuludq %xmm8, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002043; SSE-NEXT: paddq %xmm10, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002044; SSE-NEXT: movdqa %xmm5, %xmm8
2045; SSE-NEXT: psrlq $32, %xmm8
2046; SSE-NEXT: pmuludq %xmm9, %xmm8
2047; SSE-NEXT: movdqa %xmm9, %xmm10
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002048; SSE-NEXT: psrlq $32, %xmm10
2049; SSE-NEXT: pmuludq %xmm5, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002050; SSE-NEXT: paddq %xmm8, %xmm10
Craig Topper1af6cc02016-07-18 06:14:54 +00002051; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8
2052; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002053; SSE-NEXT: pmuludq %xmm9, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002054; SSE-NEXT: paddq %xmm10, %xmm5
2055; SSE-NEXT: movdqa %xmm6, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002056; SSE-NEXT: psrlq $32, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002057; SSE-NEXT: pmuludq %xmm8, %xmm9
2058; SSE-NEXT: movdqa %xmm8, %xmm10
2059; SSE-NEXT: psrlq $32, %xmm10
2060; SSE-NEXT: pmuludq %xmm6, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002061; SSE-NEXT: paddq %xmm9, %xmm10
2062; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
Craig Topper1af6cc02016-07-18 06:14:54 +00002063; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002064; SSE-NEXT: pmuludq %xmm8, %xmm6
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002065; SSE-NEXT: paddq %xmm10, %xmm6
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002066; SSE-NEXT: movdqa %xmm7, %xmm8
2067; SSE-NEXT: psrlq $32, %xmm8
2068; SSE-NEXT: pmuludq %xmm9, %xmm8
2069; SSE-NEXT: movdqa %xmm9, %xmm10
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002070; SSE-NEXT: psrlq $32, %xmm10
2071; SSE-NEXT: pmuludq %xmm7, %xmm10
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002072; SSE-NEXT: paddq %xmm8, %xmm10
2073; SSE-NEXT: pmuludq %xmm9, %xmm7
Craig Topper1af6cc02016-07-18 06:14:54 +00002074; SSE-NEXT: psllq $32, %xmm10
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002075; SSE-NEXT: paddq %xmm10, %xmm7
2076; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2077; SSE-NEXT: pand %xmm8, %xmm7
2078; SSE-NEXT: pand %xmm8, %xmm6
2079; SSE-NEXT: packuswb %xmm7, %xmm6
2080; SSE-NEXT: pand %xmm8, %xmm5
2081; SSE-NEXT: pand %xmm8, %xmm4
2082; SSE-NEXT: packuswb %xmm5, %xmm4
2083; SSE-NEXT: packuswb %xmm6, %xmm4
2084; SSE-NEXT: pand %xmm8, %xmm3
2085; SSE-NEXT: pand %xmm8, %xmm2
2086; SSE-NEXT: packuswb %xmm3, %xmm2
2087; SSE-NEXT: pand %xmm8, %xmm1
2088; SSE-NEXT: pand %xmm8, %xmm0
2089; SSE-NEXT: packuswb %xmm1, %xmm0
2090; SSE-NEXT: packuswb %xmm2, %xmm0
2091; SSE-NEXT: packuswb %xmm4, %xmm0
2092; SSE-NEXT: retq
2093;
2094; AVX1-LABEL: trunc_mul_v16i64_v16i8:
2095; AVX1: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002096; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm8
2097; AVX1-NEXT: vpmuludq %xmm4, %xmm8, %xmm8
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002098; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm9
2099; AVX1-NEXT: vpmuludq %xmm9, %xmm0, %xmm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002100; AVX1-NEXT: vpaddq %xmm8, %xmm9, %xmm8
2101; AVX1-NEXT: vpsllq $32, %xmm8, %xmm8
2102; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm9
2103; AVX1-NEXT: vpaddq %xmm8, %xmm9, %xmm8
2104; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002105; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002106; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4
2107; AVX1-NEXT: vpmuludq %xmm9, %xmm4, %xmm10
2108; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002109; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002110; AVX1-NEXT: vpaddq %xmm10, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002111; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002112; AVX1-NEXT: vpmuludq %xmm9, %xmm0, %xmm0
2113; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm9
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002114; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4
2115; AVX1-NEXT: vpmuludq %xmm5, %xmm4, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002116; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm0
2117; AVX1-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
Craig Topper1af6cc02016-07-18 06:14:54 +00002118; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002119; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2120; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm4
2121; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm10
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002122; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm0
2123; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002124; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5
2125; AVX1-NEXT: vpmuludq %xmm0, %xmm5, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002126; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4
2127; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002128; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002129; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002130; AVX1-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002131; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm1
2132; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm0
2133; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002134; AVX1-NEXT: vpsrlq $32, %xmm6, %xmm4
2135; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002136; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
2137; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2138; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm4
2139; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002140; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0
2141; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002142; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm4
2143; AVX1-NEXT: vpmuludq %xmm0, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002144; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6
2145; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm6
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002146; AVX1-NEXT: vpaddq %xmm4, %xmm6, %xmm4
2147; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002148; AVX1-NEXT: vpmuludq %xmm0, %xmm2, %xmm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002149; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
2150; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm2
2151; AVX1-NEXT: vpmuludq %xmm7, %xmm2, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002152; AVX1-NEXT: vpsrlq $32, %xmm7, %xmm4
2153; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002154; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
2155; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
2156; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm4
2157; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002158; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm4
2159; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002160; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm6
2161; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm6
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002162; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7
2163; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm7
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002164; AVX1-NEXT: vpaddq %xmm6, %xmm7, %xmm6
2165; AVX1-NEXT: vpsllq $32, %xmm6, %xmm6
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002166; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002167; AVX1-NEXT: vpaddq %xmm6, %xmm3, %xmm3
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002168; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2169; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
2170; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
2171; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
2172; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
2173; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
2174; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0
2175; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2176; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
2177; AVX1-NEXT: vpand %xmm4, %xmm10, %xmm2
2178; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
2179; AVX1-NEXT: vpand %xmm4, %xmm9, %xmm2
2180; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3
2181; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
2182; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
2183; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
2184; AVX1-NEXT: vzeroupper
2185; AVX1-NEXT: retq
2186;
2187; AVX2-LABEL: trunc_mul_v16i64_v16i8:
2188; AVX2: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002189; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm8
2190; AVX2-NEXT: vpmuludq %ymm5, %ymm8, %ymm8
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002191; AVX2-NEXT: vpsrlq $32, %ymm5, %ymm9
2192; AVX2-NEXT: vpmuludq %ymm9, %ymm1, %ymm9
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002193; AVX2-NEXT: vpaddq %ymm8, %ymm9, %ymm8
2194; AVX2-NEXT: vpsllq $32, %ymm8, %ymm8
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002195; AVX2-NEXT: vpmuludq %ymm5, %ymm1, %ymm1
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002196; AVX2-NEXT: vpaddq %ymm8, %ymm1, %ymm1
2197; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm5
2198; AVX2-NEXT: vpmuludq %ymm4, %ymm5, %ymm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002199; AVX2-NEXT: vpsrlq $32, %ymm4, %ymm8
2200; AVX2-NEXT: vpmuludq %ymm8, %ymm0, %ymm8
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002201; AVX2-NEXT: vpaddq %ymm5, %ymm8, %ymm5
2202; AVX2-NEXT: vpsllq $32, %ymm5, %ymm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002203; AVX2-NEXT: vpmuludq %ymm4, %ymm0, %ymm0
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002204; AVX2-NEXT: vpaddq %ymm5, %ymm0, %ymm0
2205; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm4
2206; AVX2-NEXT: vpmuludq %ymm7, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002207; AVX2-NEXT: vpsrlq $32, %ymm7, %ymm5
2208; AVX2-NEXT: vpmuludq %ymm5, %ymm3, %ymm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002209; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4
2210; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002211; AVX2-NEXT: vpmuludq %ymm7, %ymm3, %ymm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002212; AVX2-NEXT: vpaddq %ymm4, %ymm3, %ymm3
2213; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm4
2214; AVX2-NEXT: vpmuludq %ymm6, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002215; AVX2-NEXT: vpsrlq $32, %ymm6, %ymm5
2216; AVX2-NEXT: vpmuludq %ymm5, %ymm2, %ymm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002217; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4
2218; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002219; AVX2-NEXT: vpmuludq %ymm6, %ymm2, %ymm2
Simon Pilgrim081abbb2016-12-21 20:00:10 +00002220; AVX2-NEXT: vpaddq %ymm4, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002221; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
2222; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2223; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
2224; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002225; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2226; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
2227; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2228; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2229; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2230; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002231; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2232; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2233; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2234; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002235; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2236; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2237; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2238; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
2239; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2240; AVX2-NEXT: vzeroupper
2241; AVX2-NEXT: retq
2242;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00002243; AVX512F-LABEL: trunc_mul_v16i64_v16i8:
2244; AVX512F: # BB#0:
2245; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm4
2246; AVX512F-NEXT: vpmuludq %zmm3, %zmm4, %zmm4
2247; AVX512F-NEXT: vpsrlq $32, %zmm3, %zmm5
2248; AVX512F-NEXT: vpmuludq %zmm5, %zmm1, %zmm5
2249; AVX512F-NEXT: vpaddq %zmm4, %zmm5, %zmm4
2250; AVX512F-NEXT: vpsllq $32, %zmm4, %zmm4
2251; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm1
2252; AVX512F-NEXT: vpaddq %zmm4, %zmm1, %zmm1
2253; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
2254; AVX512F-NEXT: vpmuludq %zmm2, %zmm3, %zmm3
2255; AVX512F-NEXT: vpsrlq $32, %zmm2, %zmm4
2256; AVX512F-NEXT: vpmuludq %zmm4, %zmm0, %zmm4
2257; AVX512F-NEXT: vpaddq %zmm3, %zmm4, %zmm3
2258; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3
2259; AVX512F-NEXT: vpmuludq %zmm2, %zmm0, %zmm0
2260; AVX512F-NEXT: vpaddq %zmm3, %zmm0, %zmm0
2261; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
2262; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
2263; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2264; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2265; AVX512F-NEXT: retq
2266;
2267; AVX512BW-LABEL: trunc_mul_v16i64_v16i8:
2268; AVX512BW: # BB#0:
2269; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm4
2270; AVX512BW-NEXT: vpmuludq %zmm3, %zmm4, %zmm4
2271; AVX512BW-NEXT: vpsrlq $32, %zmm3, %zmm5
2272; AVX512BW-NEXT: vpmuludq %zmm5, %zmm1, %zmm5
2273; AVX512BW-NEXT: vpaddq %zmm4, %zmm5, %zmm4
2274; AVX512BW-NEXT: vpsllq $32, %zmm4, %zmm4
2275; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm1
2276; AVX512BW-NEXT: vpaddq %zmm4, %zmm1, %zmm1
2277; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
2278; AVX512BW-NEXT: vpmuludq %zmm2, %zmm3, %zmm3
2279; AVX512BW-NEXT: vpsrlq $32, %zmm2, %zmm4
2280; AVX512BW-NEXT: vpmuludq %zmm4, %zmm0, %zmm4
2281; AVX512BW-NEXT: vpaddq %zmm3, %zmm4, %zmm3
2282; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3
2283; AVX512BW-NEXT: vpmuludq %zmm2, %zmm0, %zmm0
2284; AVX512BW-NEXT: vpaddq %zmm3, %zmm0, %zmm0
2285; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
2286; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
2287; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2288; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
2289; AVX512BW-NEXT: retq
2290;
2291; AVX512DQ-LABEL: trunc_mul_v16i64_v16i8:
2292; AVX512DQ: # BB#0:
2293; AVX512DQ-NEXT: vpmullq %zmm3, %zmm1, %zmm1
2294; AVX512DQ-NEXT: vpmullq %zmm2, %zmm0, %zmm0
2295; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
2296; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
2297; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
2298; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2299; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002300 %1 = mul <16 x i64> %a0, %a1
2301 %2 = trunc <16 x i64> %1 to <16 x i8>
2302 ret <16 x i8> %2
2303}
2304
2305define <16 x i8> @trunc_mul_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
2306; SSE-LABEL: trunc_mul_v16i32_v16i8:
2307; SSE: # BB#0:
2308; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3]
2309; SSE-NEXT: pmuludq %xmm4, %xmm0
2310; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2311; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2312; SSE-NEXT: pmuludq %xmm8, %xmm4
2313; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2314; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
2315; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
2316; SSE-NEXT: pmuludq %xmm5, %xmm1
2317; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2318; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2319; SSE-NEXT: pmuludq %xmm4, %xmm5
2320; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2321; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
2322; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2323; SSE-NEXT: pmuludq %xmm6, %xmm2
2324; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2325; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
2326; SSE-NEXT: pmuludq %xmm4, %xmm5
2327; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2328; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2329; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2330; SSE-NEXT: pmuludq %xmm7, %xmm3
2331; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2332; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
2333; SSE-NEXT: pmuludq %xmm4, %xmm5
2334; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2335; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
2336; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
2337; SSE-NEXT: pand %xmm4, %xmm3
2338; SSE-NEXT: pand %xmm4, %xmm2
2339; SSE-NEXT: packuswb %xmm3, %xmm2
2340; SSE-NEXT: pand %xmm4, %xmm1
2341; SSE-NEXT: pand %xmm4, %xmm0
2342; SSE-NEXT: packuswb %xmm1, %xmm0
2343; SSE-NEXT: packuswb %xmm2, %xmm0
2344; SSE-NEXT: retq
2345;
2346; AVX1-LABEL: trunc_mul_v16i32_v16i8:
2347; AVX1: # BB#0:
2348; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm4
2349; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
2350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2351; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
2352; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm2
2353; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
2354; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2355; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
2356; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
2357; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
2358; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2359; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
2360; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2361; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2
2362; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
2363; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2364; AVX1-NEXT: vzeroupper
2365; AVX1-NEXT: retq
2366;
2367; AVX2-LABEL: trunc_mul_v16i32_v16i8:
2368; AVX2: # BB#0:
2369; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0
2370; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm1
2371; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
2372; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2373; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2374; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2375; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2376; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
2377; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2378; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2379; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2380; AVX2-NEXT: vzeroupper
2381; AVX2-NEXT: retq
2382;
2383; AVX512-LABEL: trunc_mul_v16i32_v16i8:
2384; AVX512: # BB#0:
2385; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0
2386; AVX512-NEXT: vpmovdb %zmm0, %xmm0
2387; AVX512-NEXT: retq
2388 %1 = mul <16 x i32> %a0, %a1
2389 %2 = trunc <16 x i32> %1 to <16 x i8>
2390 ret <16 x i8> %2
2391}
2392
2393define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
2394; SSE-LABEL: trunc_mul_v16i16_v16i8:
2395; SSE: # BB#0:
2396; SSE-NEXT: pmullw %xmm2, %xmm0
2397; SSE-NEXT: pmullw %xmm3, %xmm1
2398; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2399; SSE-NEXT: pand %xmm2, %xmm1
2400; SSE-NEXT: pand %xmm2, %xmm0
2401; SSE-NEXT: packuswb %xmm1, %xmm0
2402; SSE-NEXT: retq
2403;
2404; AVX1-LABEL: trunc_mul_v16i16_v16i8:
2405; AVX1: # BB#0:
2406; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm2
2407; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2408; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2409; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2410; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2411; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2412; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
2413; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2414; AVX1-NEXT: vzeroupper
2415; AVX1-NEXT: retq
2416;
2417; AVX2-LABEL: trunc_mul_v16i16_v16i8:
2418; AVX2: # BB#0:
2419; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
2420; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2421; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2422; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2423; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2424; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2425; AVX2-NEXT: vzeroupper
2426; AVX2-NEXT: retq
2427;
2428; AVX512F-LABEL: trunc_mul_v16i16_v16i8:
2429; AVX512F: # BB#0:
2430; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
2431; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
2432; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2433; AVX512F-NEXT: retq
2434;
2435; AVX512BW-LABEL: trunc_mul_v16i16_v16i8:
2436; AVX512BW: # BB#0:
2437; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
2438; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00002439; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002440; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00002441;
2442; AVX512DQ-LABEL: trunc_mul_v16i16_v16i8:
2443; AVX512DQ: # BB#0:
2444; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
2445; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2446; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
2447; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002448 %1 = mul <16 x i16> %a0, %a1
2449 %2 = trunc <16 x i16> %1 to <16 x i8>
2450 ret <16 x i8> %2
2451}
2452
2453;
2454; mul to constant
2455;
2456
Simon Pilgrim85af9732016-12-30 22:40:32 +00002457define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
2458; SSE-LABEL: trunc_mul_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002459; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00002460; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,3]
2461; SSE-NEXT: movdqa %xmm1, %xmm3
2462; SSE-NEXT: pmuludq %xmm2, %xmm3
2463; SSE-NEXT: psrlq $32, %xmm1
2464; SSE-NEXT: pmuludq %xmm2, %xmm1
2465; SSE-NEXT: psllq $32, %xmm1
2466; SSE-NEXT: paddq %xmm3, %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002467; SSE-NEXT: movl $1, %eax
2468; SSE-NEXT: movd %rax, %xmm2
2469; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
2470; SSE-NEXT: movdqa %xmm0, %xmm3
2471; SSE-NEXT: pmuludq %xmm2, %xmm3
2472; SSE-NEXT: psrlq $32, %xmm0
2473; SSE-NEXT: pmuludq %xmm2, %xmm0
2474; SSE-NEXT: psllq $32, %xmm0
2475; SSE-NEXT: paddq %xmm3, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00002476; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002477; SSE-NEXT: retq
2478;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002479; AVX1-LABEL: trunc_mul_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002480; AVX1: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00002481; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002482; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3]
Sanjay Patela0d8a272016-12-15 18:03:38 +00002483; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm3
2484; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
2485; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
2486; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
2487; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
2488; AVX1-NEXT: movl $1, %eax
2489; AVX1-NEXT: vmovq %rax, %xmm2
2490; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002491; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm3
2492; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
2493; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0
2494; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2495; AVX1-NEXT: vpaddq %xmm0, %xmm3, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00002496; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002497; AVX1-NEXT: vzeroupper
2498; AVX1-NEXT: retq
2499;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002500; AVX2-LABEL: trunc_mul_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002501; AVX2: # BB#0:
2502; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3]
2503; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
2504; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2505; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
2506; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2507; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002508; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2509; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00002510; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002511; AVX2-NEXT: vzeroupper
2512; AVX2-NEXT: retq
2513;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00002514; AVX512F-LABEL: trunc_mul_const_v4i64_v4i32:
2515; AVX512F: # BB#0:
2516; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3]
2517; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
2518; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
2519; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
2520; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0
2521; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0
2522; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
2523; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2524; AVX512F-NEXT: retq
2525;
2526; AVX512BW-LABEL: trunc_mul_const_v4i64_v4i32:
2527; AVX512BW: # BB#0:
2528; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3]
2529; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
2530; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm0
2531; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
2532; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0
2533; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0
2534; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
2535; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2536; AVX512BW-NEXT: retq
2537;
2538; AVX512DQ-LABEL: trunc_mul_const_v4i64_v4i32:
2539; AVX512DQ: # BB#0:
2540; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2541; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3]
2542; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
2543; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
2544; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2545; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002546 %1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
2547 %2 = trunc <4 x i64> %1 to <4 x i32>
2548 ret <4 x i32> %2
2549}
2550
Simon Pilgrim85af9732016-12-30 22:40:32 +00002551define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
2552; SSE-LABEL: trunc_mul_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002553; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002554; SSE-NEXT: movl $1, %eax
2555; SSE-NEXT: movd %rax, %xmm4
2556; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
2557; SSE-NEXT: movdqa %xmm0, %xmm5
2558; SSE-NEXT: pmuludq %xmm4, %xmm5
2559; SSE-NEXT: psrlq $32, %xmm0
2560; SSE-NEXT: pmuludq %xmm4, %xmm0
2561; SSE-NEXT: psllq $32, %xmm0
2562; SSE-NEXT: paddq %xmm5, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002563; SSE-NEXT: movdqa {{.*#+}} xmm4 = [2,3]
2564; SSE-NEXT: movdqa %xmm1, %xmm5
2565; SSE-NEXT: pmuludq %xmm4, %xmm5
2566; SSE-NEXT: psrlq $32, %xmm1
2567; SSE-NEXT: pmuludq %xmm4, %xmm1
2568; SSE-NEXT: psllq $32, %xmm1
2569; SSE-NEXT: paddq %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +00002570; SSE-NEXT: movdqa {{.*#+}} xmm4 = [4,5]
2571; SSE-NEXT: movdqa %xmm2, %xmm5
2572; SSE-NEXT: pmuludq %xmm4, %xmm5
2573; SSE-NEXT: psrlq $32, %xmm2
2574; SSE-NEXT: pmuludq %xmm4, %xmm2
2575; SSE-NEXT: psllq $32, %xmm2
2576; SSE-NEXT: paddq %xmm5, %xmm2
2577; SSE-NEXT: movdqa {{.*#+}} xmm4 = [6,7]
2578; SSE-NEXT: movdqa %xmm3, %xmm5
2579; SSE-NEXT: pmuludq %xmm4, %xmm5
2580; SSE-NEXT: psrlq $32, %xmm3
2581; SSE-NEXT: pmuludq %xmm4, %xmm3
2582; SSE-NEXT: psllq $32, %xmm3
2583; SSE-NEXT: paddq %xmm5, %xmm3
2584; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2585; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
2586; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2587; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
2588; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2589; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2590; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
2591; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2592; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2593; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2594; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
2595; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002596; SSE-NEXT: retq
2597;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002598; AVX1-LABEL: trunc_mul_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002599; AVX1: # BB#0:
2600; AVX1-NEXT: movl $1, %eax
2601; AVX1-NEXT: vmovq %rax, %xmm2
2602; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
2603; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm3
2604; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4
2605; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
2606; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
2607; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
2608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2609; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3]
2610; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm4
2611; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
2612; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm0
2613; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2614; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
2615; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4,5]
2616; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm4
2617; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5
2618; AVX1-NEXT: vpmuludq %xmm3, %xmm5, %xmm3
2619; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
2620; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
2621; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2622; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [6,7]
2623; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm5
2624; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
2625; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm1
2626; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
2627; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
2628; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
2629; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
2630; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
2631; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
2632; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
2633; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
2634; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
2635; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2636; AVX1-NEXT: vzeroupper
2637; AVX1-NEXT: retq
2638;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002639; AVX2-LABEL: trunc_mul_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002640; AVX2: # BB#0:
2641; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7]
2642; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm3
2643; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
2644; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
2645; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
2646; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1
2647; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3]
2648; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm3
2649; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2650; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
2651; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2652; AVX2-NEXT: vpaddq %ymm0, %ymm3, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002653; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2654; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2655; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2656; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002657; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2658; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
2659; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00002660; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002661; AVX2-NEXT: vzeroupper
2662; AVX2-NEXT: retq
2663;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00002664; AVX512F-LABEL: trunc_mul_const_v8i64_v8i16:
2665; AVX512F: # BB#0:
2666; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7]
2667; AVX512F-NEXT: vpmuludq %zmm1, %zmm0, %zmm2
2668; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm0
2669; AVX512F-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
2670; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
2671; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0
2672; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
2673; AVX512F-NEXT: retq
2674;
2675; AVX512BW-LABEL: trunc_mul_const_v8i64_v8i16:
2676; AVX512BW: # BB#0:
2677; AVX512BW-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7]
2678; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm2
2679; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm0
2680; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
2681; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
2682; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0
2683; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
2684; AVX512BW-NEXT: retq
2685;
2686; AVX512DQ-LABEL: trunc_mul_const_v8i64_v8i16:
2687; AVX512DQ: # BB#0:
2688; AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %zmm0, %zmm0
2689; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0
2690; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002691 %1 = mul <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
2692 %2 = trunc <8 x i64> %1 to <8 x i16>
2693 ret <8 x i16> %2
2694}
2695
Simon Pilgrim85af9732016-12-30 22:40:32 +00002696define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
2697; SSE-LABEL: trunc_mul_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002698; SSE: # BB#0:
2699; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3]
2700; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2701; SSE-NEXT: pmuludq %xmm2, %xmm0
2702; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2703; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2704; SSE-NEXT: pmuludq %xmm3, %xmm2
2705; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2706; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2707; SSE-NEXT: movdqa {{.*#+}} xmm2 = [4,5,6,7]
2708; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
2709; SSE-NEXT: pmuludq %xmm2, %xmm1
2710; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2711; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2712; SSE-NEXT: pmuludq %xmm3, %xmm2
2713; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2714; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2715; SSE-NEXT: pslld $16, %xmm1
2716; SSE-NEXT: psrad $16, %xmm1
2717; SSE-NEXT: pslld $16, %xmm0
2718; SSE-NEXT: psrad $16, %xmm0
2719; SSE-NEXT: packssdw %xmm1, %xmm0
2720; SSE-NEXT: retq
2721;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002722; AVX1-LABEL: trunc_mul_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002723; AVX1: # BB#0:
2724; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
2725; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2726; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2727; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2728; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2729; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2730; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2731; AVX1-NEXT: vzeroupper
2732; AVX1-NEXT: retq
2733;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002734; AVX2-LABEL: trunc_mul_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002735; AVX2: # BB#0:
2736; AVX2-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
2737; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
2738; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00002739; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002740; AVX2-NEXT: vzeroupper
2741; AVX2-NEXT: retq
2742;
Simon Pilgrim85af9732016-12-30 22:40:32 +00002743; AVX512-LABEL: trunc_mul_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002744; AVX512: # BB#0:
2745; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
2746; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00002747; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002748; AVX512-NEXT: retq
2749 %1 = mul <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2750 %2 = trunc <8 x i32> %1 to <8 x i16>
2751 ret <8 x i16> %2
2752}
2753
2754define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
2755; SSE-LABEL: trunc_mul_const_v16i64_v16i8:
2756; SSE: # BB#0:
2757; SSE-NEXT: movl $1, %eax
2758; SSE-NEXT: movd %rax, %xmm8
2759; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
2760; SSE-NEXT: movdqa %xmm0, %xmm9
2761; SSE-NEXT: pmuludq %xmm8, %xmm9
2762; SSE-NEXT: psrlq $32, %xmm0
2763; SSE-NEXT: pmuludq %xmm8, %xmm0
2764; SSE-NEXT: psllq $32, %xmm0
2765; SSE-NEXT: paddq %xmm9, %xmm0
2766; SSE-NEXT: movdqa {{.*#+}} xmm8 = [2,3]
2767; SSE-NEXT: movdqa %xmm1, %xmm9
2768; SSE-NEXT: pmuludq %xmm8, %xmm9
2769; SSE-NEXT: psrlq $32, %xmm1
2770; SSE-NEXT: pmuludq %xmm8, %xmm1
2771; SSE-NEXT: psllq $32, %xmm1
2772; SSE-NEXT: paddq %xmm9, %xmm1
2773; SSE-NEXT: movdqa {{.*#+}} xmm8 = [4,5]
2774; SSE-NEXT: movdqa %xmm2, %xmm9
2775; SSE-NEXT: pmuludq %xmm8, %xmm9
2776; SSE-NEXT: psrlq $32, %xmm2
2777; SSE-NEXT: pmuludq %xmm8, %xmm2
2778; SSE-NEXT: psllq $32, %xmm2
2779; SSE-NEXT: paddq %xmm9, %xmm2
2780; SSE-NEXT: movdqa {{.*#+}} xmm8 = [6,7]
2781; SSE-NEXT: movdqa %xmm3, %xmm9
2782; SSE-NEXT: pmuludq %xmm8, %xmm9
2783; SSE-NEXT: psrlq $32, %xmm3
2784; SSE-NEXT: pmuludq %xmm8, %xmm3
2785; SSE-NEXT: psllq $32, %xmm3
2786; SSE-NEXT: paddq %xmm9, %xmm3
2787; SSE-NEXT: movdqa {{.*#+}} xmm8 = [8,9]
2788; SSE-NEXT: movdqa %xmm4, %xmm9
2789; SSE-NEXT: pmuludq %xmm8, %xmm9
2790; SSE-NEXT: psrlq $32, %xmm4
2791; SSE-NEXT: pmuludq %xmm8, %xmm4
2792; SSE-NEXT: psllq $32, %xmm4
2793; SSE-NEXT: paddq %xmm9, %xmm4
2794; SSE-NEXT: movdqa {{.*#+}} xmm8 = [10,11]
2795; SSE-NEXT: movdqa %xmm5, %xmm9
2796; SSE-NEXT: pmuludq %xmm8, %xmm9
2797; SSE-NEXT: psrlq $32, %xmm5
2798; SSE-NEXT: pmuludq %xmm8, %xmm5
2799; SSE-NEXT: psllq $32, %xmm5
2800; SSE-NEXT: paddq %xmm9, %xmm5
2801; SSE-NEXT: movdqa {{.*#+}} xmm8 = [12,13]
2802; SSE-NEXT: movdqa %xmm6, %xmm9
2803; SSE-NEXT: pmuludq %xmm8, %xmm9
2804; SSE-NEXT: psrlq $32, %xmm6
2805; SSE-NEXT: pmuludq %xmm8, %xmm6
2806; SSE-NEXT: psllq $32, %xmm6
2807; SSE-NEXT: paddq %xmm9, %xmm6
2808; SSE-NEXT: movdqa {{.*#+}} xmm8 = [14,15]
2809; SSE-NEXT: movdqa %xmm7, %xmm9
2810; SSE-NEXT: pmuludq %xmm8, %xmm9
2811; SSE-NEXT: psrlq $32, %xmm7
2812; SSE-NEXT: pmuludq %xmm8, %xmm7
2813; SSE-NEXT: psllq $32, %xmm7
2814; SSE-NEXT: paddq %xmm9, %xmm7
2815; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2816; SSE-NEXT: pand %xmm8, %xmm7
2817; SSE-NEXT: pand %xmm8, %xmm6
2818; SSE-NEXT: packuswb %xmm7, %xmm6
2819; SSE-NEXT: pand %xmm8, %xmm5
2820; SSE-NEXT: pand %xmm8, %xmm4
2821; SSE-NEXT: packuswb %xmm5, %xmm4
2822; SSE-NEXT: packuswb %xmm6, %xmm4
2823; SSE-NEXT: pand %xmm8, %xmm3
2824; SSE-NEXT: pand %xmm8, %xmm2
2825; SSE-NEXT: packuswb %xmm3, %xmm2
2826; SSE-NEXT: pand %xmm8, %xmm1
2827; SSE-NEXT: pand %xmm8, %xmm0
2828; SSE-NEXT: packuswb %xmm1, %xmm0
2829; SSE-NEXT: packuswb %xmm2, %xmm0
2830; SSE-NEXT: packuswb %xmm4, %xmm0
2831; SSE-NEXT: retq
2832;
2833; AVX1-LABEL: trunc_mul_const_v16i64_v16i8:
2834; AVX1: # BB#0:
2835; AVX1-NEXT: movl $1, %eax
2836; AVX1-NEXT: vmovq %rax, %xmm4
2837; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
2838; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm5
2839; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6
2840; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
2841; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
2842; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm8
2843; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2844; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [2,3]
2845; AVX1-NEXT: vpmuludq %xmm5, %xmm0, %xmm6
2846; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
2847; AVX1-NEXT: vpmuludq %xmm5, %xmm0, %xmm0
2848; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2849; AVX1-NEXT: vpaddq %xmm0, %xmm6, %xmm9
2850; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [4,5]
2851; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm6
2852; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm7
2853; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
2854; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5
2855; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
2856; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2857; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [6,7]
2858; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm7
2859; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
2860; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm1
2861; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
2862; AVX1-NEXT: vpaddq %xmm1, %xmm7, %xmm1
2863; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9]
2864; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm7
2865; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm4
2866; AVX1-NEXT: vpmuludq %xmm6, %xmm4, %xmm4
2867; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
2868; AVX1-NEXT: vpaddq %xmm4, %xmm7, %xmm4
2869; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
2870; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [10,11]
2871; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm7
2872; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
2873; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm2
2874; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
2875; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2
2876; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [12,13]
2877; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm7
2878; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm0
2879; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm0
2880; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
2881; AVX1-NEXT: vpaddq %xmm0, %xmm7, %xmm0
2882; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
2883; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [14,15]
2884; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm7
2885; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm3
2886; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm3
2887; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
2888; AVX1-NEXT: vpaddq %xmm3, %xmm7, %xmm3
2889; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
2890; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
2891; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
2892; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
2893; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
2894; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm3
2895; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
2896; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
2897; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
2898; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm2
2899; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
2900; AVX1-NEXT: vpand %xmm6, %xmm9, %xmm2
2901; AVX1-NEXT: vpand %xmm6, %xmm8, %xmm3
2902; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
2903; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
2904; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
2905; AVX1-NEXT: vzeroupper
2906; AVX1-NEXT: retq
2907;
2908; AVX2-LABEL: trunc_mul_const_v16i64_v16i8:
2909; AVX2: # BB#0:
2910; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [4,5,6,7]
2911; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm5
2912; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
2913; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
2914; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
2915; AVX2-NEXT: vpaddq %ymm1, %ymm5, %ymm1
2916; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3]
2917; AVX2-NEXT: vpmuludq %ymm4, %ymm0, %ymm5
2918; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2919; AVX2-NEXT: vpmuludq %ymm4, %ymm0, %ymm0
2920; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2921; AVX2-NEXT: vpaddq %ymm0, %ymm5, %ymm0
2922; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [12,13,14,15]
2923; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm5
2924; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
2925; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
2926; AVX2-NEXT: vpsllq $32, %ymm3, %ymm3
2927; AVX2-NEXT: vpaddq %ymm3, %ymm5, %ymm3
2928; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,9,10,11]
2929; AVX2-NEXT: vpmuludq %ymm4, %ymm2, %ymm5
2930; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
2931; AVX2-NEXT: vpmuludq %ymm4, %ymm2, %ymm2
2932; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
2933; AVX2-NEXT: vpaddq %ymm2, %ymm5, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002934; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
2935; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2936; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
2937; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002938; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2939; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
2940; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2941; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2942; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
2943; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00002944; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2945; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2946; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2947; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00002948; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2949; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2950; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2951; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
2952; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2953; AVX2-NEXT: vzeroupper
2954; AVX2-NEXT: retq
2955;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00002956; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8:
2957; AVX512F: # BB#0:
2958; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [8,9,10,11,12,13,14,15]
2959; AVX512F-NEXT: vpmuludq %zmm2, %zmm1, %zmm3
2960; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1
2961; AVX512F-NEXT: vpmuludq %zmm2, %zmm1, %zmm1
2962; AVX512F-NEXT: vpsllq $32, %zmm1, %zmm1
2963; AVX512F-NEXT: vpaddq %zmm1, %zmm3, %zmm1
2964; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7]
2965; AVX512F-NEXT: vpmuludq %zmm2, %zmm0, %zmm3
2966; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm0
2967; AVX512F-NEXT: vpmuludq %zmm2, %zmm0, %zmm0
2968; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
2969; AVX512F-NEXT: vpaddq %zmm0, %zmm3, %zmm0
2970; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
2971; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
2972; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2973; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
2974; AVX512F-NEXT: retq
2975;
2976; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8:
2977; AVX512BW: # BB#0:
2978; AVX512BW-NEXT: vmovdqa32 {{.*#+}} zmm2 = [8,9,10,11,12,13,14,15]
2979; AVX512BW-NEXT: vpmuludq %zmm2, %zmm1, %zmm3
2980; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1
2981; AVX512BW-NEXT: vpmuludq %zmm2, %zmm1, %zmm1
2982; AVX512BW-NEXT: vpsllq $32, %zmm1, %zmm1
2983; AVX512BW-NEXT: vpaddq %zmm1, %zmm3, %zmm1
2984; AVX512BW-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7]
2985; AVX512BW-NEXT: vpmuludq %zmm2, %zmm0, %zmm3
2986; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm0
2987; AVX512BW-NEXT: vpmuludq %zmm2, %zmm0, %zmm0
2988; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
2989; AVX512BW-NEXT: vpaddq %zmm0, %zmm3, %zmm0
2990; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
2991; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
2992; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
2993; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
2994; AVX512BW-NEXT: retq
2995;
2996; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8:
2997; AVX512DQ: # BB#0:
2998; AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %zmm1, %zmm1
2999; AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %zmm0, %zmm0
3000; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
3001; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
3002; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
3003; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3004; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003005 %1 = mul <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
3006 %2 = trunc <16 x i64> %1 to <16 x i8>
3007 ret <16 x i8> %2
3008}
3009
3010define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
3011; SSE-LABEL: trunc_mul_const_v16i32_v16i8:
3012; SSE: # BB#0:
3013; SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,1,2,3]
3014; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
3015; SSE-NEXT: pmuludq %xmm4, %xmm0
3016; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3017; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3018; SSE-NEXT: pmuludq %xmm5, %xmm4
3019; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3020; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
3021; SSE-NEXT: movdqa {{.*#+}} xmm4 = [4,5,6,7]
3022; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
3023; SSE-NEXT: pmuludq %xmm4, %xmm1
3024; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
3025; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3026; SSE-NEXT: pmuludq %xmm5, %xmm4
3027; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3028; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
3029; SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,9,10,11]
3030; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
3031; SSE-NEXT: pmuludq %xmm4, %xmm2
3032; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3033; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3034; SSE-NEXT: pmuludq %xmm5, %xmm4
3035; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3036; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
3037; SSE-NEXT: movdqa {{.*#+}} xmm4 = [12,13,14,15]
3038; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
3039; SSE-NEXT: pmuludq %xmm4, %xmm3
3040; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3041; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
3042; SSE-NEXT: pmuludq %xmm5, %xmm4
3043; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3044; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
3045; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3046; SSE-NEXT: pand %xmm4, %xmm3
3047; SSE-NEXT: pand %xmm4, %xmm2
3048; SSE-NEXT: packuswb %xmm3, %xmm2
3049; SSE-NEXT: pand %xmm4, %xmm1
3050; SSE-NEXT: pand %xmm4, %xmm0
3051; SSE-NEXT: packuswb %xmm1, %xmm0
3052; SSE-NEXT: packuswb %xmm2, %xmm0
3053; SSE-NEXT: retq
3054;
3055; AVX1-LABEL: trunc_mul_const_v16i32_v16i8:
3056; AVX1: # BB#0:
3057; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
3058; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3059; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
3060; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm3
3061; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3062; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3063; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3064; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
3065; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
3066; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
3067; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
3068; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
3069; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
3070; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3071; AVX1-NEXT: vzeroupper
3072; AVX1-NEXT: retq
3073;
3074; AVX2-LABEL: trunc_mul_const_v16i32_v16i8:
3075; AVX2: # BB#0:
3076; AVX2-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
3077; AVX2-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1
3078; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
3079; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3080; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3081; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3082; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
3083; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3084; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3085; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
3086; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3087; AVX2-NEXT: vzeroupper
3088; AVX2-NEXT: retq
3089;
3090; AVX512-LABEL: trunc_mul_const_v16i32_v16i8:
3091; AVX512: # BB#0:
3092; AVX512-NEXT: vpmulld {{.*}}(%rip), %zmm0, %zmm0
3093; AVX512-NEXT: vpmovdb %zmm0, %xmm0
3094; AVX512-NEXT: retq
3095 %1 = mul <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3096 %2 = trunc <16 x i32> %1 to <16 x i8>
3097 ret <16 x i8> %2
3098}
3099
3100define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
3101; SSE-LABEL: trunc_mul_const_v16i16_v16i8:
3102; SSE: # BB#0:
3103; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
3104; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
3105; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3106; SSE-NEXT: pand %xmm2, %xmm1
3107; SSE-NEXT: pand %xmm2, %xmm0
3108; SSE-NEXT: packuswb %xmm1, %xmm0
3109; SSE-NEXT: retq
3110;
3111; AVX1-LABEL: trunc_mul_const_v16i16_v16i8:
3112; AVX1: # BB#0:
3113; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
3114; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3115; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
3116; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3117; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3118; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3119; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3120; AVX1-NEXT: vzeroupper
3121; AVX1-NEXT: retq
3122;
3123; AVX2-LABEL: trunc_mul_const_v16i16_v16i8:
3124; AVX2: # BB#0:
3125; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
3126; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3127; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3128; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3129; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3130; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3131; AVX2-NEXT: vzeroupper
3132; AVX2-NEXT: retq
3133;
3134; AVX512F-LABEL: trunc_mul_const_v16i16_v16i8:
3135; AVX512F: # BB#0:
3136; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
3137; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3138; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3139; AVX512F-NEXT: retq
3140;
3141; AVX512BW-LABEL: trunc_mul_const_v16i16_v16i8:
3142; AVX512BW: # BB#0:
3143; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
3144; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003145; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003146; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00003147;
3148; AVX512DQ-LABEL: trunc_mul_const_v16i16_v16i8:
3149; AVX512DQ: # BB#0:
3150; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
3151; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
3152; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3153; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003154 %1 = mul <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
3155 %2 = trunc <16 x i16> %1 to <16 x i8>
3156 ret <16 x i8> %2
3157}
3158
3159;
3160; and
3161;
3162
Simon Pilgrim85af9732016-12-30 22:40:32 +00003163define <4 x i32> @trunc_and_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3164; SSE-LABEL: trunc_and_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003165; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00003166; SSE-NEXT: andps %xmm3, %xmm1
3167; SSE-NEXT: andps %xmm2, %xmm0
3168; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003169; SSE-NEXT: retq
3170;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003171; AVX1-LABEL: trunc_and_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003172; AVX1: # BB#0:
3173; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
3174; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00003175; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003176; AVX1-NEXT: vzeroupper
3177; AVX1-NEXT: retq
3178;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003179; AVX2-LABEL: trunc_and_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003180; AVX2: # BB#0:
3181; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003182; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3183; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003184; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003185; AVX2-NEXT: vzeroupper
3186; AVX2-NEXT: retq
3187;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003188; AVX512-LABEL: trunc_and_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003189; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003190; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003191; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003192; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003193; AVX512-NEXT: retq
3194 %1 = and <4 x i64> %a0, %a1
3195 %2 = trunc <4 x i64> %1 to <4 x i32>
3196 ret <4 x i32> %2
3197}
3198
Simon Pilgrim85af9732016-12-30 22:40:32 +00003199define <8 x i16> @trunc_and_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
3200; SSE-LABEL: trunc_and_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003201; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003202; SSE-NEXT: pand %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003203; SSE-NEXT: pand %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +00003204; SSE-NEXT: pand %xmm6, %xmm2
3205; SSE-NEXT: pand %xmm7, %xmm3
3206; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3207; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
3208; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3209; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
3210; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3211; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
3212; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
3213; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3214; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3215; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3216; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
3217; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003218; SSE-NEXT: retq
3219;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003220; AVX1-LABEL: trunc_and_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003221; AVX1: # BB#0:
3222; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
3223; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
3224; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3225; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
3226; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
3227; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
3228; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3229; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3230; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
3231; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
3232; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
3233; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
3234; AVX1-NEXT: vzeroupper
3235; AVX1-NEXT: retq
3236;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003237; AVX2-LABEL: trunc_and_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003238; AVX2: # BB#0:
3239; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
3240; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003241; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3242; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3243; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
3244; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003245; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3246; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
3247; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003248; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003249; AVX2-NEXT: vzeroupper
3250; AVX2-NEXT: retq
3251;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003252; AVX512-LABEL: trunc_and_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003253; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003254; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003255; AVX512-NEXT: vpmovqw %zmm0, %xmm0
3256; AVX512-NEXT: retq
3257 %1 = and <8 x i64> %a0, %a1
3258 %2 = trunc <8 x i64> %1 to <8 x i16>
3259 ret <8 x i16> %2
3260}
3261
Simon Pilgrim85af9732016-12-30 22:40:32 +00003262define <8 x i16> @trunc_and_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
3263; SSE-LABEL: trunc_and_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003264; SSE: # BB#0:
3265; SSE-NEXT: pand %xmm2, %xmm0
3266; SSE-NEXT: pand %xmm3, %xmm1
3267; SSE-NEXT: pslld $16, %xmm1
3268; SSE-NEXT: psrad $16, %xmm1
3269; SSE-NEXT: pslld $16, %xmm0
3270; SSE-NEXT: psrad $16, %xmm0
3271; SSE-NEXT: packssdw %xmm1, %xmm0
3272; SSE-NEXT: retq
3273;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003274; AVX1-LABEL: trunc_and_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003275; AVX1: # BB#0:
3276; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
3277; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3278; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
3279; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3280; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3281; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3282; AVX1-NEXT: vzeroupper
3283; AVX1-NEXT: retq
3284;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003285; AVX2-LABEL: trunc_and_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003286; AVX2: # BB#0:
3287; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
3288; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
3289; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003290; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003291; AVX2-NEXT: vzeroupper
3292; AVX2-NEXT: retq
3293;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003294; AVX512-LABEL: trunc_and_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003295; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003296; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003297; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003298; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003299; AVX512-NEXT: retq
3300 %1 = and <8 x i32> %a0, %a1
3301 %2 = trunc <8 x i32> %1 to <8 x i16>
3302 ret <8 x i16> %2
3303}
3304
3305define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
3306; SSE-LABEL: trunc_and_v16i64_v16i8:
3307; SSE: # BB#0:
3308; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm0
3309; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm1
3310; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm2
3311; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm3
3312; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm4
3313; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm5
3314; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm6
3315; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm7
3316; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
3317; SSE-NEXT: pand %xmm8, %xmm7
3318; SSE-NEXT: pand %xmm8, %xmm6
3319; SSE-NEXT: packuswb %xmm7, %xmm6
3320; SSE-NEXT: pand %xmm8, %xmm5
3321; SSE-NEXT: pand %xmm8, %xmm4
3322; SSE-NEXT: packuswb %xmm5, %xmm4
3323; SSE-NEXT: packuswb %xmm6, %xmm4
3324; SSE-NEXT: pand %xmm8, %xmm3
3325; SSE-NEXT: pand %xmm8, %xmm2
3326; SSE-NEXT: packuswb %xmm3, %xmm2
3327; SSE-NEXT: pand %xmm8, %xmm1
3328; SSE-NEXT: pand %xmm8, %xmm0
3329; SSE-NEXT: packuswb %xmm1, %xmm0
3330; SSE-NEXT: packuswb %xmm2, %xmm0
3331; SSE-NEXT: packuswb %xmm4, %xmm0
3332; SSE-NEXT: retq
3333;
3334; AVX1-LABEL: trunc_and_v16i64_v16i8:
3335; AVX1: # BB#0:
3336; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
3337; AVX1-NEXT: vandps %ymm5, %ymm1, %ymm1
3338; AVX1-NEXT: vandps %ymm6, %ymm2, %ymm2
3339; AVX1-NEXT: vandps %ymm7, %ymm3, %ymm3
3340; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
3341; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
3342; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
3343; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3344; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
3345; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
3346; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
3347; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
3348; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
3349; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
3350; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3351; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3352; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
3353; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
3354; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3355; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3356; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
3357; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
3358; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3359; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3360; AVX1-NEXT: vzeroupper
3361; AVX1-NEXT: retq
3362;
3363; AVX2-LABEL: trunc_and_v16i64_v16i8:
3364; AVX2: # BB#0:
3365; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
3366; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
3367; AVX2-NEXT: vpand %ymm7, %ymm3, %ymm3
3368; AVX2-NEXT: vpand %ymm6, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003369; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
3370; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3371; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
3372; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003373; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3374; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
3375; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3376; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3377; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3378; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003379; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3380; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3381; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
3382; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003383; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3384; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3385; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3386; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
3387; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3388; AVX2-NEXT: vzeroupper
3389; AVX2-NEXT: retq
3390;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00003391; AVX512F-LABEL: trunc_and_v16i64_v16i8:
3392; AVX512F: # BB#0:
3393; AVX512F-NEXT: vpandq %zmm3, %zmm1, %zmm1
3394; AVX512F-NEXT: vpandq %zmm2, %zmm0, %zmm0
3395; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
3396; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
3397; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3398; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3399; AVX512F-NEXT: retq
3400;
3401; AVX512BW-LABEL: trunc_and_v16i64_v16i8:
3402; AVX512BW: # BB#0:
3403; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1
3404; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
3405; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
3406; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
3407; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3408; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
3409; AVX512BW-NEXT: retq
3410;
3411; AVX512DQ-LABEL: trunc_and_v16i64_v16i8:
3412; AVX512DQ: # BB#0:
3413; AVX512DQ-NEXT: vpandq %zmm3, %zmm1, %zmm1
3414; AVX512DQ-NEXT: vpandq %zmm2, %zmm0, %zmm0
3415; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
3416; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
3417; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
3418; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3419; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003420 %1 = and <16 x i64> %a0, %a1
3421 %2 = trunc <16 x i64> %1 to <16 x i8>
3422 ret <16 x i8> %2
3423}
3424
3425define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
3426; SSE-LABEL: trunc_and_v16i32_v16i8:
3427; SSE: # BB#0:
3428; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3429; SSE-NEXT: pand %xmm8, %xmm7
3430; SSE-NEXT: pand %xmm3, %xmm7
3431; SSE-NEXT: pand %xmm8, %xmm6
3432; SSE-NEXT: pand %xmm2, %xmm6
3433; SSE-NEXT: packuswb %xmm7, %xmm6
3434; SSE-NEXT: pand %xmm8, %xmm5
3435; SSE-NEXT: pand %xmm1, %xmm5
3436; SSE-NEXT: pand %xmm8, %xmm4
3437; SSE-NEXT: pand %xmm4, %xmm0
3438; SSE-NEXT: packuswb %xmm5, %xmm0
3439; SSE-NEXT: packuswb %xmm6, %xmm0
3440; SSE-NEXT: retq
3441;
3442; AVX1-LABEL: trunc_and_v16i32_v16i8:
3443; AVX1: # BB#0:
3444; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
3445; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
3446; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3447; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3448; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
3449; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
3450; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
3451; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3452; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
3453; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
3454; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3455; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3456; AVX1-NEXT: vzeroupper
3457; AVX1-NEXT: retq
3458;
3459; AVX2-LABEL: trunc_and_v16i32_v16i8:
3460; AVX2: # BB#0:
3461; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
3462; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
3463; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
3464; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3465; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3466; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3467; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
3468; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3469; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3470; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
3471; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3472; AVX2-NEXT: vzeroupper
3473; AVX2-NEXT: retq
3474;
3475; AVX512-LABEL: trunc_and_v16i32_v16i8:
3476; AVX512: # BB#0:
3477; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
3478; AVX512-NEXT: vpmovdb %zmm0, %xmm0
3479; AVX512-NEXT: retq
3480 %1 = and <16 x i32> %a0, %a1
3481 %2 = trunc <16 x i32> %1 to <16 x i8>
3482 ret <16 x i8> %2
3483}
3484
3485define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
3486; SSE-LABEL: trunc_and_v16i16_v16i8:
3487; SSE: # BB#0:
3488; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
3489; SSE-NEXT: pand %xmm4, %xmm3
3490; SSE-NEXT: pand %xmm1, %xmm3
3491; SSE-NEXT: pand %xmm4, %xmm2
3492; SSE-NEXT: pand %xmm2, %xmm0
3493; SSE-NEXT: packuswb %xmm3, %xmm0
3494; SSE-NEXT: retq
3495;
3496; AVX1-LABEL: trunc_and_v16i16_v16i8:
3497; AVX1: # BB#0:
3498; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
3499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3500; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3501; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3502; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3503; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3504; AVX1-NEXT: vzeroupper
3505; AVX1-NEXT: retq
3506;
3507; AVX2-LABEL: trunc_and_v16i16_v16i8:
3508; AVX2: # BB#0:
3509; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
3510; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3511; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3512; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3513; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3514; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3515; AVX2-NEXT: vzeroupper
3516; AVX2-NEXT: retq
3517;
3518; AVX512F-LABEL: trunc_and_v16i16_v16i8:
3519; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003520; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003521; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3522; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3523; AVX512F-NEXT: retq
3524;
3525; AVX512BW-LABEL: trunc_and_v16i16_v16i8:
3526; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003527; AVX512BW-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003528; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003529; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003530; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00003531;
3532; AVX512DQ-LABEL: trunc_and_v16i16_v16i8:
3533; AVX512DQ: # BB#0:
3534; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
3535; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
3536; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3537; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003538 %1 = and <16 x i16> %a0, %a1
3539 %2 = trunc <16 x i16> %1 to <16 x i8>
3540 ret <16 x i8> %2
3541}
3542
3543;
3544; and to constant
3545;
3546
Simon Pilgrim85af9732016-12-30 22:40:32 +00003547define <4 x i32> @trunc_and_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
3548; SSE-LABEL: trunc_and_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003549; SSE: # BB#0:
3550; SSE-NEXT: movl $1, %eax
3551; SSE-NEXT: movd %rax, %xmm2
3552; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +00003553; SSE-NEXT: pand %xmm2, %xmm0
3554; SSE-NEXT: andps {{.*}}(%rip), %xmm1
3555; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003556; SSE-NEXT: retq
3557;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003558; AVX1-LABEL: trunc_and_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003559; AVX1: # BB#0:
3560; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3561; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00003562; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003563; AVX1-NEXT: vzeroupper
3564; AVX1-NEXT: retq
3565;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003566; AVX2-LABEL: trunc_and_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003567; AVX2: # BB#0:
3568; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003569; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3570; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003571; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003572; AVX2-NEXT: vzeroupper
3573; AVX2-NEXT: retq
3574;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003575; AVX512-LABEL: trunc_and_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003576; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003577; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003578; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003579; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003580; AVX512-NEXT: retq
3581 %1 = and <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
3582 %2 = trunc <4 x i64> %1 to <4 x i32>
3583 ret <4 x i32> %2
3584}
3585
Simon Pilgrim85af9732016-12-30 22:40:32 +00003586define <8 x i16> @trunc_and_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
3587; SSE-LABEL: trunc_and_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003588; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003589; SSE-NEXT: movl $1, %eax
Michael Kuperstein7cc21232016-10-06 18:58:24 +00003590; SSE-NEXT: movd %rax, %xmm4
3591; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
3592; SSE-NEXT: pand %xmm0, %xmm4
3593; SSE-NEXT: pand {{.*}}(%rip), %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003594; SSE-NEXT: pand {{.*}}(%rip), %xmm2
3595; SSE-NEXT: pand {{.*}}(%rip), %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00003596; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
3597; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
3598; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
3599; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
3600; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3601; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
3602; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
3603; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
3604; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
3605; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3606; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003607; SSE-NEXT: retq
3608;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003609; AVX1-LABEL: trunc_and_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003610; AVX1: # BB#0:
3611; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3612; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
3613; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3614; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3615; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
3616; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
3617; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3618; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3619; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
3620; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
3621; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
3622; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
3623; AVX1-NEXT: vzeroupper
3624; AVX1-NEXT: retq
3625;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003626; AVX2-LABEL: trunc_and_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003627; AVX2: # BB#0:
3628; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
3629; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003630; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3631; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3632; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
3633; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003634; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3635; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
3636; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003637; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003638; AVX2-NEXT: vzeroupper
3639; AVX2-NEXT: retq
3640;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003641; AVX512-LABEL: trunc_and_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003642; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003643; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003644; AVX512-NEXT: vpmovqw %zmm0, %xmm0
3645; AVX512-NEXT: retq
3646 %1 = and <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
3647 %2 = trunc <8 x i64> %1 to <8 x i16>
3648 ret <8 x i16> %2
3649}
3650
Simon Pilgrim85af9732016-12-30 22:40:32 +00003651define <8 x i16> @trunc_and_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
3652; SSE-LABEL: trunc_and_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003653; SSE: # BB#0:
3654; SSE-NEXT: pand {{.*}}(%rip), %xmm0
3655; SSE-NEXT: pand {{.*}}(%rip), %xmm1
3656; SSE-NEXT: pslld $16, %xmm1
3657; SSE-NEXT: psrad $16, %xmm1
3658; SSE-NEXT: pslld $16, %xmm0
3659; SSE-NEXT: psrad $16, %xmm0
3660; SSE-NEXT: packssdw %xmm1, %xmm0
3661; SSE-NEXT: retq
3662;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003663; AVX1-LABEL: trunc_and_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003664; AVX1: # BB#0:
3665; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3666; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3667; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
3668; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3669; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3670; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3671; AVX1-NEXT: vzeroupper
3672; AVX1-NEXT: retq
3673;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003674; AVX2-LABEL: trunc_and_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003675; AVX2: # BB#0:
3676; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3677; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
3678; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003679; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003680; AVX2-NEXT: vzeroupper
3681; AVX2-NEXT: retq
3682;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003683; AVX512-LABEL: trunc_and_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003684; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003685; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003686; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003687; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003688; AVX512-NEXT: retq
3689 %1 = and <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3690 %2 = trunc <8 x i32> %1 to <8 x i16>
3691 ret <8 x i16> %2
3692}
3693
3694define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
3695; SSE-LABEL: trunc_and_const_v16i64_v16i8:
3696; SSE: # BB#0:
3697; SSE-NEXT: movl $1, %eax
3698; SSE-NEXT: movd %rax, %xmm8
3699; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
3700; SSE-NEXT: pand {{.*}}(%rip), %xmm1
3701; SSE-NEXT: pand {{.*}}(%rip), %xmm2
3702; SSE-NEXT: pand {{.*}}(%rip), %xmm3
3703; SSE-NEXT: pand {{.*}}(%rip), %xmm4
3704; SSE-NEXT: pand {{.*}}(%rip), %xmm5
3705; SSE-NEXT: pand {{.*}}(%rip), %xmm6
3706; SSE-NEXT: pand {{.*}}(%rip), %xmm7
3707; SSE-NEXT: movdqa {{.*#+}} xmm9 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
3708; SSE-NEXT: pand %xmm9, %xmm7
3709; SSE-NEXT: pand %xmm9, %xmm6
3710; SSE-NEXT: packuswb %xmm7, %xmm6
3711; SSE-NEXT: pand %xmm9, %xmm5
3712; SSE-NEXT: pand %xmm9, %xmm4
3713; SSE-NEXT: packuswb %xmm5, %xmm4
3714; SSE-NEXT: packuswb %xmm6, %xmm4
3715; SSE-NEXT: pand %xmm9, %xmm3
3716; SSE-NEXT: pand %xmm9, %xmm2
3717; SSE-NEXT: packuswb %xmm3, %xmm2
3718; SSE-NEXT: pand %xmm9, %xmm1
3719; SSE-NEXT: pand %xmm9, %xmm8
3720; SSE-NEXT: pand %xmm8, %xmm0
3721; SSE-NEXT: packuswb %xmm1, %xmm0
3722; SSE-NEXT: packuswb %xmm2, %xmm0
3723; SSE-NEXT: packuswb %xmm4, %xmm0
3724; SSE-NEXT: retq
3725;
3726; AVX1-LABEL: trunc_and_const_v16i64_v16i8:
3727; AVX1: # BB#0:
3728; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3729; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
3730; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
3731; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
3732; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
3733; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
3734; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
3735; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3736; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
3737; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
3738; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
3739; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
3740; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
3741; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
3742; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3743; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3744; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
3745; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
3746; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3747; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
3748; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
3749; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
3750; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3751; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3752; AVX1-NEXT: vzeroupper
3753; AVX1-NEXT: retq
3754;
3755; AVX2-LABEL: trunc_and_const_v16i64_v16i8:
3756; AVX2: # BB#0:
3757; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
3758; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3759; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
3760; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003761; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
3762; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3763; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
3764; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003765; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3766; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
3767; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3768; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3769; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3770; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003771; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3772; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3773; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
3774; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003775; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3776; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3777; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3778; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
3779; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3780; AVX2-NEXT: vzeroupper
3781; AVX2-NEXT: retq
3782;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00003783; AVX512F-LABEL: trunc_and_const_v16i64_v16i8:
3784; AVX512F: # BB#0:
3785; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
3786; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
3787; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
3788; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
3789; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3790; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3791; AVX512F-NEXT: retq
3792;
3793; AVX512BW-LABEL: trunc_and_const_v16i64_v16i8:
3794; AVX512BW: # BB#0:
3795; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
3796; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
3797; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
3798; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
3799; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
3800; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
3801; AVX512BW-NEXT: retq
3802;
3803; AVX512DQ-LABEL: trunc_and_const_v16i64_v16i8:
3804; AVX512DQ: # BB#0:
3805; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
3806; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
3807; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
3808; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
3809; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
3810; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3811; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003812 %1 = and <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
3813 %2 = trunc <16 x i64> %1 to <16 x i8>
3814 ret <16 x i8> %2
3815}
3816
3817define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
3818; SSE-LABEL: trunc_and_const_v16i32_v16i8:
3819; SSE: # BB#0:
3820; SSE-NEXT: pand {{.*}}(%rip), %xmm0
3821; SSE-NEXT: pand {{.*}}(%rip), %xmm1
3822; SSE-NEXT: pand {{.*}}(%rip), %xmm2
3823; SSE-NEXT: pand {{.*}}(%rip), %xmm3
3824; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3825; SSE-NEXT: pand %xmm4, %xmm3
3826; SSE-NEXT: pand %xmm4, %xmm2
3827; SSE-NEXT: packuswb %xmm3, %xmm2
3828; SSE-NEXT: pand %xmm4, %xmm1
3829; SSE-NEXT: pand %xmm4, %xmm0
3830; SSE-NEXT: packuswb %xmm1, %xmm0
3831; SSE-NEXT: packuswb %xmm2, %xmm0
3832; SSE-NEXT: retq
3833;
3834; AVX1-LABEL: trunc_and_const_v16i32_v16i8:
3835; AVX1: # BB#0:
3836; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3837; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
3838; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3839; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
3840; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
3841; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
3842; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
3843; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3844; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
3845; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
3846; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3847; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3848; AVX1-NEXT: vzeroupper
3849; AVX1-NEXT: retq
3850;
3851; AVX2-LABEL: trunc_and_const_v16i32_v16i8:
3852; AVX2: # BB#0:
3853; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3854; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
3855; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
3856; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3857; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3858; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3859; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
3860; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3861; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3862; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
3863; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3864; AVX2-NEXT: vzeroupper
3865; AVX2-NEXT: retq
3866;
3867; AVX512-LABEL: trunc_and_const_v16i32_v16i8:
3868; AVX512: # BB#0:
3869; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
3870; AVX512-NEXT: vpmovdb %zmm0, %xmm0
3871; AVX512-NEXT: retq
3872 %1 = and <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3873 %2 = trunc <16 x i32> %1 to <16 x i8>
3874 ret <16 x i8> %2
3875}
3876
3877define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
3878; SSE-LABEL: trunc_and_const_v16i16_v16i8:
3879; SSE: # BB#0:
3880; SSE-NEXT: pand {{.*}}(%rip), %xmm0
3881; SSE-NEXT: pand {{.*}}(%rip), %xmm1
3882; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3883; SSE-NEXT: pand %xmm2, %xmm1
3884; SSE-NEXT: pand %xmm2, %xmm0
3885; SSE-NEXT: packuswb %xmm1, %xmm0
3886; SSE-NEXT: retq
3887;
3888; AVX1-LABEL: trunc_and_const_v16i16_v16i8:
3889; AVX1: # BB#0:
3890; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
3891; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3892; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3893; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3894; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3895; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3896; AVX1-NEXT: vzeroupper
3897; AVX1-NEXT: retq
3898;
3899; AVX2-LABEL: trunc_and_const_v16i16_v16i8:
3900; AVX2: # BB#0:
3901; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3902; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3903; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
3904; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3905; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3906; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3907; AVX2-NEXT: vzeroupper
3908; AVX2-NEXT: retq
3909;
3910; AVX512F-LABEL: trunc_and_const_v16i16_v16i8:
3911; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003912; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003913; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
3914; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
3915; AVX512F-NEXT: retq
3916;
3917; AVX512BW-LABEL: trunc_and_const_v16i16_v16i8:
3918; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003919; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003920; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003921; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003922; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00003923;
3924; AVX512DQ-LABEL: trunc_and_const_v16i16_v16i8:
3925; AVX512DQ: # BB#0:
3926; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3927; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
3928; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
3929; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003930 %1 = and <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
3931 %2 = trunc <16 x i16> %1 to <16 x i8>
3932 ret <16 x i8> %2
3933}
3934
3935;
3936; xor
3937;
3938
Simon Pilgrim85af9732016-12-30 22:40:32 +00003939define <4 x i32> @trunc_xor_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3940; SSE-LABEL: trunc_xor_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003941; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00003942; SSE-NEXT: xorps %xmm3, %xmm1
3943; SSE-NEXT: xorps %xmm2, %xmm0
3944; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003945; SSE-NEXT: retq
3946;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003947; AVX1-LABEL: trunc_xor_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003948; AVX1: # BB#0:
3949; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
3950; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00003951; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003952; AVX1-NEXT: vzeroupper
3953; AVX1-NEXT: retq
3954;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003955; AVX2-LABEL: trunc_xor_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003956; AVX2: # BB#0:
3957; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00003958; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
3959; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00003960; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003961; AVX2-NEXT: vzeroupper
3962; AVX2-NEXT: retq
3963;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003964; AVX512-LABEL: trunc_xor_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003965; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00003966; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003967; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00003968; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003969; AVX512-NEXT: retq
3970 %1 = xor <4 x i64> %a0, %a1
3971 %2 = trunc <4 x i64> %1 to <4 x i32>
3972 ret <4 x i32> %2
3973}
3974
Simon Pilgrim85af9732016-12-30 22:40:32 +00003975define <8 x i16> @trunc_xor_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
3976; SSE-LABEL: trunc_xor_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003977; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003978; SSE-NEXT: pxor %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003979; SSE-NEXT: pxor %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +00003980; SSE-NEXT: pxor %xmm6, %xmm2
3981; SSE-NEXT: pxor %xmm7, %xmm3
3982; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3983; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
3984; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3985; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
3986; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3987; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
3988; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
3989; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3990; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3991; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3992; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
3993; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003994; SSE-NEXT: retq
3995;
Simon Pilgrim85af9732016-12-30 22:40:32 +00003996; AVX1-LABEL: trunc_xor_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00003997; AVX1: # BB#0:
3998; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
3999; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
4000; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4001; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
4002; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4003; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
4004; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
4005; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4006; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4007; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
4008; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
4009; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
4010; AVX1-NEXT: vzeroupper
4011; AVX1-NEXT: retq
4012;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004013; AVX2-LABEL: trunc_xor_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004014; AVX2: # BB#0:
4015; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
4016; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004017; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4018; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4019; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4020; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004021; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4022; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4023; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004024; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004025; AVX2-NEXT: vzeroupper
4026; AVX2-NEXT: retq
4027;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004028; AVX512-LABEL: trunc_xor_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004029; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004030; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004031; AVX512-NEXT: vpmovqw %zmm0, %xmm0
4032; AVX512-NEXT: retq
4033 %1 = xor <8 x i64> %a0, %a1
4034 %2 = trunc <8 x i64> %1 to <8 x i16>
4035 ret <8 x i16> %2
4036}
4037
Simon Pilgrim85af9732016-12-30 22:40:32 +00004038define <8 x i16> @trunc_xor_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
4039; SSE-LABEL: trunc_xor_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004040; SSE: # BB#0:
4041; SSE-NEXT: pxor %xmm2, %xmm0
4042; SSE-NEXT: pxor %xmm3, %xmm1
4043; SSE-NEXT: pslld $16, %xmm1
4044; SSE-NEXT: psrad $16, %xmm1
4045; SSE-NEXT: pslld $16, %xmm0
4046; SSE-NEXT: psrad $16, %xmm0
4047; SSE-NEXT: packssdw %xmm1, %xmm0
4048; SSE-NEXT: retq
4049;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004050; AVX1-LABEL: trunc_xor_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004051; AVX1: # BB#0:
4052; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
4053; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4054; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
4055; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4056; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4057; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4058; AVX1-NEXT: vzeroupper
4059; AVX1-NEXT: retq
4060;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004061; AVX2-LABEL: trunc_xor_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004062; AVX2: # BB#0:
4063; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
4064; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4065; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004066; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004067; AVX2-NEXT: vzeroupper
4068; AVX2-NEXT: retq
4069;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004070; AVX512-LABEL: trunc_xor_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004071; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004072; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004073; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004074; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004075; AVX512-NEXT: retq
4076 %1 = xor <8 x i32> %a0, %a1
4077 %2 = trunc <8 x i32> %1 to <8 x i16>
4078 ret <8 x i16> %2
4079}
4080
4081define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
4082; SSE-LABEL: trunc_xor_v16i64_v16i8:
4083; SSE: # BB#0:
4084; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm0
4085; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm1
4086; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm2
4087; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm3
4088; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm4
4089; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm5
4090; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm6
4091; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm7
4092; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4093; SSE-NEXT: pand %xmm8, %xmm7
4094; SSE-NEXT: pand %xmm8, %xmm6
4095; SSE-NEXT: packuswb %xmm7, %xmm6
4096; SSE-NEXT: pand %xmm8, %xmm5
4097; SSE-NEXT: pand %xmm8, %xmm4
4098; SSE-NEXT: packuswb %xmm5, %xmm4
4099; SSE-NEXT: packuswb %xmm6, %xmm4
4100; SSE-NEXT: pand %xmm8, %xmm3
4101; SSE-NEXT: pand %xmm8, %xmm2
4102; SSE-NEXT: packuswb %xmm3, %xmm2
4103; SSE-NEXT: pand %xmm8, %xmm1
4104; SSE-NEXT: pand %xmm8, %xmm0
4105; SSE-NEXT: packuswb %xmm1, %xmm0
4106; SSE-NEXT: packuswb %xmm2, %xmm0
4107; SSE-NEXT: packuswb %xmm4, %xmm0
4108; SSE-NEXT: retq
4109;
4110; AVX1-LABEL: trunc_xor_v16i64_v16i8:
4111; AVX1: # BB#0:
4112; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0
4113; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
4114; AVX1-NEXT: vxorps %ymm6, %ymm2, %ymm2
4115; AVX1-NEXT: vxorps %ymm7, %ymm3, %ymm3
4116; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
4117; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4118; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4119; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4120; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
4121; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
4122; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4123; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
4124; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
4125; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
4126; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4127; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4128; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
4129; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
4130; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4131; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4132; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
4133; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
4134; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4135; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4136; AVX1-NEXT: vzeroupper
4137; AVX1-NEXT: retq
4138;
4139; AVX2-LABEL: trunc_xor_v16i64_v16i8:
4140; AVX2: # BB#0:
4141; AVX2-NEXT: vpxor %ymm5, %ymm1, %ymm1
4142; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm0
4143; AVX2-NEXT: vpxor %ymm7, %ymm3, %ymm3
4144; AVX2-NEXT: vpxor %ymm6, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004145; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
4146; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4147; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
4148; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004149; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4150; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
4151; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4152; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4153; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4154; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004155; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4156; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4157; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4158; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004159; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4160; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4161; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4162; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
4163; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4164; AVX2-NEXT: vzeroupper
4165; AVX2-NEXT: retq
4166;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00004167; AVX512F-LABEL: trunc_xor_v16i64_v16i8:
4168; AVX512F: # BB#0:
4169; AVX512F-NEXT: vpxorq %zmm3, %zmm1, %zmm1
4170; AVX512F-NEXT: vpxorq %zmm2, %zmm0, %zmm0
4171; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
4172; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
4173; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4174; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
4175; AVX512F-NEXT: retq
4176;
4177; AVX512BW-LABEL: trunc_xor_v16i64_v16i8:
4178; AVX512BW: # BB#0:
4179; AVX512BW-NEXT: vpxorq %zmm3, %zmm1, %zmm1
4180; AVX512BW-NEXT: vpxorq %zmm2, %zmm0, %zmm0
4181; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
4182; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
4183; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4184; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
4185; AVX512BW-NEXT: retq
4186;
4187; AVX512DQ-LABEL: trunc_xor_v16i64_v16i8:
4188; AVX512DQ: # BB#0:
4189; AVX512DQ-NEXT: vpxorq %zmm3, %zmm1, %zmm1
4190; AVX512DQ-NEXT: vpxorq %zmm2, %zmm0, %zmm0
4191; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
4192; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
4193; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
4194; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
4195; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004196 %1 = xor <16 x i64> %a0, %a1
4197 %2 = trunc <16 x i64> %1 to <16 x i8>
4198 ret <16 x i8> %2
4199}
4200
4201define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
4202; SSE-LABEL: trunc_xor_v16i32_v16i8:
4203; SSE: # BB#0:
4204; SSE-NEXT: pxor %xmm4, %xmm0
4205; SSE-NEXT: pxor %xmm5, %xmm1
4206; SSE-NEXT: pxor %xmm6, %xmm2
4207; SSE-NEXT: pxor %xmm7, %xmm3
4208; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
4209; SSE-NEXT: pand %xmm4, %xmm3
4210; SSE-NEXT: pand %xmm4, %xmm2
4211; SSE-NEXT: packuswb %xmm3, %xmm2
4212; SSE-NEXT: pand %xmm4, %xmm1
4213; SSE-NEXT: pand %xmm4, %xmm0
4214; SSE-NEXT: packuswb %xmm1, %xmm0
4215; SSE-NEXT: packuswb %xmm2, %xmm0
4216; SSE-NEXT: retq
4217;
4218; AVX1-LABEL: trunc_xor_v16i32_v16i8:
4219; AVX1: # BB#0:
4220; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
4221; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
4222; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4223; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
4224; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
4225; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
4226; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
4227; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4228; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
4229; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
4230; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4231; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4232; AVX1-NEXT: vzeroupper
4233; AVX1-NEXT: retq
4234;
4235; AVX2-LABEL: trunc_xor_v16i32_v16i8:
4236; AVX2: # BB#0:
4237; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
4238; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
4239; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
4240; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
4241; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
4242; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4243; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
4244; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
4245; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4246; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
4247; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4248; AVX2-NEXT: vzeroupper
4249; AVX2-NEXT: retq
4250;
4251; AVX512-LABEL: trunc_xor_v16i32_v16i8:
4252; AVX512: # BB#0:
4253; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
4254; AVX512-NEXT: vpmovdb %zmm0, %xmm0
4255; AVX512-NEXT: retq
4256 %1 = xor <16 x i32> %a0, %a1
4257 %2 = trunc <16 x i32> %1 to <16 x i8>
4258 ret <16 x i8> %2
4259}
4260
4261define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
4262; SSE-LABEL: trunc_xor_v16i16_v16i8:
4263; SSE: # BB#0:
4264; SSE-NEXT: pxor %xmm2, %xmm0
4265; SSE-NEXT: pxor %xmm3, %xmm1
4266; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
4267; SSE-NEXT: pand %xmm2, %xmm1
4268; SSE-NEXT: pand %xmm2, %xmm0
4269; SSE-NEXT: packuswb %xmm1, %xmm0
4270; SSE-NEXT: retq
4271;
4272; AVX1-LABEL: trunc_xor_v16i16_v16i8:
4273; AVX1: # BB#0:
4274; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
4275; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4276; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4277; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4278; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4279; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4280; AVX1-NEXT: vzeroupper
4281; AVX1-NEXT: retq
4282;
4283; AVX2-LABEL: trunc_xor_v16i16_v16i8:
4284; AVX2: # BB#0:
4285; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
4286; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
4287; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4288; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4289; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4290; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4291; AVX2-NEXT: vzeroupper
4292; AVX2-NEXT: retq
4293;
4294; AVX512F-LABEL: trunc_xor_v16i16_v16i8:
4295; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004296; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004297; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
4298; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
4299; AVX512F-NEXT: retq
4300;
4301; AVX512BW-LABEL: trunc_xor_v16i16_v16i8:
4302; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004303; AVX512BW-NEXT: vpxor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004304; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004305; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004306; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00004307;
4308; AVX512DQ-LABEL: trunc_xor_v16i16_v16i8:
4309; AVX512DQ: # BB#0:
4310; AVX512DQ-NEXT: vpxor %ymm1, %ymm0, %ymm0
4311; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4312; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
4313; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004314 %1 = xor <16 x i16> %a0, %a1
4315 %2 = trunc <16 x i16> %1 to <16 x i8>
4316 ret <16 x i8> %2
4317}
4318
4319;
4320; xor to constant
4321;
4322
Simon Pilgrim85af9732016-12-30 22:40:32 +00004323define <4 x i32> @trunc_xor_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
4324; SSE-LABEL: trunc_xor_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004325; SSE: # BB#0:
4326; SSE-NEXT: movl $1, %eax
4327; SSE-NEXT: movd %rax, %xmm2
4328; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +00004329; SSE-NEXT: pxor %xmm2, %xmm0
4330; SSE-NEXT: xorps {{.*}}(%rip), %xmm1
4331; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004332; SSE-NEXT: retq
4333;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004334; AVX1-LABEL: trunc_xor_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004335; AVX1: # BB#0:
4336; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4337; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00004338; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004339; AVX1-NEXT: vzeroupper
4340; AVX1-NEXT: retq
4341;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004342; AVX2-LABEL: trunc_xor_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004343; AVX2: # BB#0:
4344; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004345; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4346; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004347; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004348; AVX2-NEXT: vzeroupper
4349; AVX2-NEXT: retq
4350;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004351; AVX512-LABEL: trunc_xor_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004352; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004353; AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004354; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004355; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004356; AVX512-NEXT: retq
4357 %1 = xor <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
4358 %2 = trunc <4 x i64> %1 to <4 x i32>
4359 ret <4 x i32> %2
4360}
4361
Simon Pilgrim85af9732016-12-30 22:40:32 +00004362define <8 x i16> @trunc_xor_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
4363; SSE-LABEL: trunc_xor_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004364; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004365; SSE-NEXT: movl $1, %eax
Michael Kuperstein7cc21232016-10-06 18:58:24 +00004366; SSE-NEXT: movd %rax, %xmm4
4367; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
4368; SSE-NEXT: pxor %xmm0, %xmm4
4369; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004370; SSE-NEXT: pxor {{.*}}(%rip), %xmm2
4371; SSE-NEXT: pxor {{.*}}(%rip), %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00004372; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
4373; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
4374; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
4375; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
4376; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4377; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
4378; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
4379; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4380; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
4381; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4382; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004383; SSE-NEXT: retq
4384;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004385; AVX1-LABEL: trunc_xor_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004386; AVX1: # BB#0:
4387; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4388; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
4389; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4390; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
4391; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4392; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
4393; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
4394; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4395; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4396; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
4397; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
4398; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
4399; AVX1-NEXT: vzeroupper
4400; AVX1-NEXT: retq
4401;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004402; AVX2-LABEL: trunc_xor_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004403; AVX2: # BB#0:
4404; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
4405; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004406; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4407; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4408; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4409; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004410; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4411; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4412; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004413; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004414; AVX2-NEXT: vzeroupper
4415; AVX2-NEXT: retq
4416;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004417; AVX512-LABEL: trunc_xor_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004418; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004419; AVX512-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004420; AVX512-NEXT: vpmovqw %zmm0, %xmm0
4421; AVX512-NEXT: retq
4422 %1 = xor <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
4423 %2 = trunc <8 x i64> %1 to <8 x i16>
4424 ret <8 x i16> %2
4425}
4426
Simon Pilgrim85af9732016-12-30 22:40:32 +00004427define <8 x i16> @trunc_xor_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
4428; SSE-LABEL: trunc_xor_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004429; SSE: # BB#0:
4430; SSE-NEXT: pxor {{.*}}(%rip), %xmm0
4431; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
4432; SSE-NEXT: pslld $16, %xmm1
4433; SSE-NEXT: psrad $16, %xmm1
4434; SSE-NEXT: pslld $16, %xmm0
4435; SSE-NEXT: psrad $16, %xmm0
4436; SSE-NEXT: packssdw %xmm1, %xmm0
4437; SSE-NEXT: retq
4438;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004439; AVX1-LABEL: trunc_xor_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004440; AVX1: # BB#0:
4441; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4442; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4443; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
4444; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4445; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4446; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4447; AVX1-NEXT: vzeroupper
4448; AVX1-NEXT: retq
4449;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004450; AVX2-LABEL: trunc_xor_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004451; AVX2: # BB#0:
4452; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
4453; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4454; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004455; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004456; AVX2-NEXT: vzeroupper
4457; AVX2-NEXT: retq
4458;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004459; AVX512-LABEL: trunc_xor_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004460; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004461; AVX512-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004462; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004463; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004464; AVX512-NEXT: retq
4465 %1 = xor <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4466 %2 = trunc <8 x i32> %1 to <8 x i16>
4467 ret <8 x i16> %2
4468}
4469
4470define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
4471; SSE-LABEL: trunc_xor_const_v16i64_v16i8:
4472; SSE: # BB#0:
4473; SSE-NEXT: movl $1, %eax
4474; SSE-NEXT: movd %rax, %xmm8
4475; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
4476; SSE-NEXT: pxor %xmm8, %xmm0
4477; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
4478; SSE-NEXT: pxor {{.*}}(%rip), %xmm2
4479; SSE-NEXT: pxor {{.*}}(%rip), %xmm3
4480; SSE-NEXT: pxor {{.*}}(%rip), %xmm4
4481; SSE-NEXT: pxor {{.*}}(%rip), %xmm5
4482; SSE-NEXT: pxor {{.*}}(%rip), %xmm6
4483; SSE-NEXT: pxor {{.*}}(%rip), %xmm7
4484; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4485; SSE-NEXT: pand %xmm8, %xmm7
4486; SSE-NEXT: pand %xmm8, %xmm6
4487; SSE-NEXT: packuswb %xmm7, %xmm6
4488; SSE-NEXT: pand %xmm8, %xmm5
4489; SSE-NEXT: pand %xmm8, %xmm4
4490; SSE-NEXT: packuswb %xmm5, %xmm4
4491; SSE-NEXT: packuswb %xmm6, %xmm4
4492; SSE-NEXT: pand %xmm8, %xmm3
4493; SSE-NEXT: pand %xmm8, %xmm2
4494; SSE-NEXT: packuswb %xmm3, %xmm2
4495; SSE-NEXT: pand %xmm8, %xmm1
4496; SSE-NEXT: pand %xmm8, %xmm0
4497; SSE-NEXT: packuswb %xmm1, %xmm0
4498; SSE-NEXT: packuswb %xmm2, %xmm0
4499; SSE-NEXT: packuswb %xmm4, %xmm0
4500; SSE-NEXT: retq
4501;
4502; AVX1-LABEL: trunc_xor_const_v16i64_v16i8:
4503; AVX1: # BB#0:
4504; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4505; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
4506; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm2, %ymm2
4507; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm3, %ymm3
4508; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
4509; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4510; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4511; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4512; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
4513; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
4514; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4515; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
4516; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
4517; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
4518; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4519; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4520; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
4521; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
4522; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4523; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4524; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
4525; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
4526; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4527; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4528; AVX1-NEXT: vzeroupper
4529; AVX1-NEXT: retq
4530;
4531; AVX2-LABEL: trunc_xor_const_v16i64_v16i8:
4532; AVX2: # BB#0:
4533; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
4534; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
4535; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm3, %ymm3
4536; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004537; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
4538; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4539; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
4540; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004541; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4542; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
4543; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4544; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4545; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4546; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004547; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4548; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4549; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4550; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004551; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4552; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4553; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4554; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
4555; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4556; AVX2-NEXT: vzeroupper
4557; AVX2-NEXT: retq
4558;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00004559; AVX512F-LABEL: trunc_xor_const_v16i64_v16i8:
4560; AVX512F: # BB#0:
4561; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm1, %zmm1
4562; AVX512F-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
4563; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
4564; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
4565; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4566; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
4567; AVX512F-NEXT: retq
4568;
4569; AVX512BW-LABEL: trunc_xor_const_v16i64_v16i8:
4570; AVX512BW: # BB#0:
4571; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm1, %zmm1
4572; AVX512BW-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
4573; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
4574; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
4575; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4576; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
4577; AVX512BW-NEXT: retq
4578;
4579; AVX512DQ-LABEL: trunc_xor_const_v16i64_v16i8:
4580; AVX512DQ: # BB#0:
4581; AVX512DQ-NEXT: vpxorq {{.*}}(%rip), %zmm1, %zmm1
4582; AVX512DQ-NEXT: vpxorq {{.*}}(%rip), %zmm0, %zmm0
4583; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
4584; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
4585; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
4586; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
4587; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004588 %1 = xor <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
4589 %2 = trunc <16 x i64> %1 to <16 x i8>
4590 ret <16 x i8> %2
4591}
4592
4593define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
4594; SSE-LABEL: trunc_xor_const_v16i32_v16i8:
4595; SSE: # BB#0:
4596; SSE-NEXT: pxor {{.*}}(%rip), %xmm0
4597; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
4598; SSE-NEXT: pxor {{.*}}(%rip), %xmm2
4599; SSE-NEXT: pxor {{.*}}(%rip), %xmm3
4600; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
4601; SSE-NEXT: pand %xmm4, %xmm3
4602; SSE-NEXT: pand %xmm4, %xmm2
4603; SSE-NEXT: packuswb %xmm3, %xmm2
4604; SSE-NEXT: pand %xmm4, %xmm1
4605; SSE-NEXT: pand %xmm4, %xmm0
4606; SSE-NEXT: packuswb %xmm1, %xmm0
4607; SSE-NEXT: packuswb %xmm2, %xmm0
4608; SSE-NEXT: retq
4609;
4610; AVX1-LABEL: trunc_xor_const_v16i32_v16i8:
4611; AVX1: # BB#0:
4612; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4613; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm1, %ymm1
4614; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4615; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
4616; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
4617; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
4618; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
4619; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4620; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
4621; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
4622; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4623; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4624; AVX1-NEXT: vzeroupper
4625; AVX1-NEXT: retq
4626;
4627; AVX2-LABEL: trunc_xor_const_v16i32_v16i8:
4628; AVX2: # BB#0:
4629; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
4630; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm1, %ymm1
4631; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
4632; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
4633; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
4634; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4635; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
4636; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
4637; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4638; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
4639; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4640; AVX2-NEXT: vzeroupper
4641; AVX2-NEXT: retq
4642;
4643; AVX512-LABEL: trunc_xor_const_v16i32_v16i8:
4644; AVX512: # BB#0:
4645; AVX512-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
4646; AVX512-NEXT: vpmovdb %zmm0, %xmm0
4647; AVX512-NEXT: retq
4648 %1 = xor <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4649 %2 = trunc <16 x i32> %1 to <16 x i8>
4650 ret <16 x i8> %2
4651}
4652
4653define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
4654; SSE-LABEL: trunc_xor_const_v16i16_v16i8:
4655; SSE: # BB#0:
4656; SSE-NEXT: pxor {{.*}}(%rip), %xmm0
4657; SSE-NEXT: pxor {{.*}}(%rip), %xmm1
4658; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
4659; SSE-NEXT: pand %xmm2, %xmm1
4660; SSE-NEXT: pand %xmm2, %xmm0
4661; SSE-NEXT: packuswb %xmm1, %xmm0
4662; SSE-NEXT: retq
4663;
4664; AVX1-LABEL: trunc_xor_const_v16i16_v16i8:
4665; AVX1: # BB#0:
4666; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
4667; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4668; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4669; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4670; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4671; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4672; AVX1-NEXT: vzeroupper
4673; AVX1-NEXT: retq
4674;
4675; AVX2-LABEL: trunc_xor_const_v16i16_v16i8:
4676; AVX2: # BB#0:
4677; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
4678; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
4679; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4680; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4681; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4682; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4683; AVX2-NEXT: vzeroupper
4684; AVX2-NEXT: retq
4685;
4686; AVX512F-LABEL: trunc_xor_const_v16i16_v16i8:
4687; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004688; AVX512F-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004689; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
4690; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
4691; AVX512F-NEXT: retq
4692;
4693; AVX512BW-LABEL: trunc_xor_const_v16i16_v16i8:
4694; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004695; AVX512BW-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004696; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004697; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004698; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00004699;
4700; AVX512DQ-LABEL: trunc_xor_const_v16i16_v16i8:
4701; AVX512DQ: # BB#0:
4702; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
4703; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
4704; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
4705; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004706 %1 = xor <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
4707 %2 = trunc <16 x i16> %1 to <16 x i8>
4708 ret <16 x i8> %2
4709}
4710
4711;
4712; or
4713;
4714
Simon Pilgrim85af9732016-12-30 22:40:32 +00004715define <4 x i32> @trunc_or_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
4716; SSE-LABEL: trunc_or_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004717; SSE: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00004718; SSE-NEXT: orps %xmm3, %xmm1
4719; SSE-NEXT: orps %xmm2, %xmm0
4720; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004721; SSE-NEXT: retq
4722;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004723; AVX1-LABEL: trunc_or_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004724; AVX1: # BB#0:
4725; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
4726; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00004727; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004728; AVX1-NEXT: vzeroupper
4729; AVX1-NEXT: retq
4730;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004731; AVX2-LABEL: trunc_or_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004732; AVX2: # BB#0:
4733; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004734; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4735; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004736; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004737; AVX2-NEXT: vzeroupper
4738; AVX2-NEXT: retq
4739;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004740; AVX512-LABEL: trunc_or_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004741; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004742; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004743; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004744; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004745; AVX512-NEXT: retq
4746 %1 = or <4 x i64> %a0, %a1
4747 %2 = trunc <4 x i64> %1 to <4 x i32>
4748 ret <4 x i32> %2
4749}
4750
Simon Pilgrim85af9732016-12-30 22:40:32 +00004751define <8 x i16> @trunc_or_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
4752; SSE-LABEL: trunc_or_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004753; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004754; SSE-NEXT: por %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004755; SSE-NEXT: por %xmm5, %xmm1
Michael Kuperstein7cc21232016-10-06 18:58:24 +00004756; SSE-NEXT: por %xmm6, %xmm2
4757; SSE-NEXT: por %xmm7, %xmm3
4758; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
4759; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
4760; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
4761; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
4762; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
4763; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
4764; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
4765; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
4766; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
4767; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4768; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
4769; SSE-NEXT: movapd %xmm2, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004770; SSE-NEXT: retq
4771;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004772; AVX1-LABEL: trunc_or_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004773; AVX1: # BB#0:
4774; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
4775; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
4776; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4777; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
4778; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4779; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
4780; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
4781; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4782; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
4783; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
4784; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
4785; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
4786; AVX1-NEXT: vzeroupper
4787; AVX1-NEXT: retq
4788;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004789; AVX2-LABEL: trunc_or_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004790; AVX2: # BB#0:
4791; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
4792; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004793; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4794; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4795; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4796; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004797; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4798; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4799; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004800; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004801; AVX2-NEXT: vzeroupper
4802; AVX2-NEXT: retq
4803;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004804; AVX512-LABEL: trunc_or_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004805; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004806; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004807; AVX512-NEXT: vpmovqw %zmm0, %xmm0
4808; AVX512-NEXT: retq
4809 %1 = or <8 x i64> %a0, %a1
4810 %2 = trunc <8 x i64> %1 to <8 x i16>
4811 ret <8 x i16> %2
4812}
4813
Simon Pilgrim85af9732016-12-30 22:40:32 +00004814define <8 x i16> @trunc_or_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
4815; SSE-LABEL: trunc_or_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004816; SSE: # BB#0:
4817; SSE-NEXT: por %xmm2, %xmm0
4818; SSE-NEXT: por %xmm3, %xmm1
4819; SSE-NEXT: pslld $16, %xmm1
4820; SSE-NEXT: psrad $16, %xmm1
4821; SSE-NEXT: pslld $16, %xmm0
4822; SSE-NEXT: psrad $16, %xmm0
4823; SSE-NEXT: packssdw %xmm1, %xmm0
4824; SSE-NEXT: retq
4825;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004826; AVX1-LABEL: trunc_or_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004827; AVX1: # BB#0:
4828; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
4829; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4830; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
4831; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4832; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4833; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4834; AVX1-NEXT: vzeroupper
4835; AVX1-NEXT: retq
4836;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004837; AVX2-LABEL: trunc_or_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004838; AVX2: # BB#0:
4839; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
4840; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
4841; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00004842; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004843; AVX2-NEXT: vzeroupper
4844; AVX2-NEXT: retq
4845;
Simon Pilgrim85af9732016-12-30 22:40:32 +00004846; AVX512-LABEL: trunc_or_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004847; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00004848; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004849; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00004850; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004851; AVX512-NEXT: retq
4852 %1 = or <8 x i32> %a0, %a1
4853 %2 = trunc <8 x i32> %1 to <8 x i16>
4854 ret <8 x i16> %2
4855}
4856
4857define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind {
4858; SSE-LABEL: trunc_or_v16i64_v16i8:
4859; SSE: # BB#0:
4860; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm0
4861; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm1
4862; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm2
4863; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm3
4864; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm4
4865; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm5
4866; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm6
4867; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm7
4868; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4869; SSE-NEXT: pand %xmm8, %xmm7
4870; SSE-NEXT: pand %xmm8, %xmm6
4871; SSE-NEXT: packuswb %xmm7, %xmm6
4872; SSE-NEXT: pand %xmm8, %xmm5
4873; SSE-NEXT: pand %xmm8, %xmm4
4874; SSE-NEXT: packuswb %xmm5, %xmm4
4875; SSE-NEXT: packuswb %xmm6, %xmm4
4876; SSE-NEXT: pand %xmm8, %xmm3
4877; SSE-NEXT: pand %xmm8, %xmm2
4878; SSE-NEXT: packuswb %xmm3, %xmm2
4879; SSE-NEXT: pand %xmm8, %xmm1
4880; SSE-NEXT: pand %xmm8, %xmm0
4881; SSE-NEXT: packuswb %xmm1, %xmm0
4882; SSE-NEXT: packuswb %xmm2, %xmm0
4883; SSE-NEXT: packuswb %xmm4, %xmm0
4884; SSE-NEXT: retq
4885;
4886; AVX1-LABEL: trunc_or_v16i64_v16i8:
4887; AVX1: # BB#0:
4888; AVX1-NEXT: vorps %ymm4, %ymm0, %ymm0
4889; AVX1-NEXT: vorps %ymm5, %ymm1, %ymm1
4890; AVX1-NEXT: vorps %ymm6, %ymm2, %ymm2
4891; AVX1-NEXT: vorps %ymm7, %ymm3, %ymm3
4892; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
4893; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
4894; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4895; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4896; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
4897; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
4898; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
4899; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
4900; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
4901; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
4902; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4903; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4904; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
4905; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
4906; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4907; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
4908; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
4909; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
4910; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4911; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4912; AVX1-NEXT: vzeroupper
4913; AVX1-NEXT: retq
4914;
4915; AVX2-LABEL: trunc_or_v16i64_v16i8:
4916; AVX2: # BB#0:
4917; AVX2-NEXT: vpor %ymm5, %ymm1, %ymm1
4918; AVX2-NEXT: vpor %ymm4, %ymm0, %ymm0
4919; AVX2-NEXT: vpor %ymm7, %ymm3, %ymm3
4920; AVX2-NEXT: vpor %ymm6, %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004921; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
4922; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4923; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
4924; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004925; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4926; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
4927; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4928; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4929; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4930; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00004931; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
4932; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4933; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
4934; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004935; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4936; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4937; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4938; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
4939; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4940; AVX2-NEXT: vzeroupper
4941; AVX2-NEXT: retq
4942;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00004943; AVX512F-LABEL: trunc_or_v16i64_v16i8:
4944; AVX512F: # BB#0:
4945; AVX512F-NEXT: vporq %zmm3, %zmm1, %zmm1
4946; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
4947; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
4948; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
4949; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4950; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
4951; AVX512F-NEXT: retq
4952;
4953; AVX512BW-LABEL: trunc_or_v16i64_v16i8:
4954; AVX512BW: # BB#0:
4955; AVX512BW-NEXT: vporq %zmm3, %zmm1, %zmm1
4956; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
4957; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
4958; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
4959; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4960; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
4961; AVX512BW-NEXT: retq
4962;
4963; AVX512DQ-LABEL: trunc_or_v16i64_v16i8:
4964; AVX512DQ: # BB#0:
4965; AVX512DQ-NEXT: vporq %zmm3, %zmm1, %zmm1
4966; AVX512DQ-NEXT: vporq %zmm2, %zmm0, %zmm0
4967; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
4968; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
4969; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
4970; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
4971; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00004972 %1 = or <16 x i64> %a0, %a1
4973 %2 = trunc <16 x i64> %1 to <16 x i8>
4974 ret <16 x i8> %2
4975}
4976
4977define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind {
4978; SSE-LABEL: trunc_or_v16i32_v16i8:
4979; SSE: # BB#0:
4980; SSE-NEXT: por %xmm4, %xmm0
4981; SSE-NEXT: por %xmm5, %xmm1
4982; SSE-NEXT: por %xmm6, %xmm2
4983; SSE-NEXT: por %xmm7, %xmm3
4984; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
4985; SSE-NEXT: pand %xmm4, %xmm3
4986; SSE-NEXT: pand %xmm4, %xmm2
4987; SSE-NEXT: packuswb %xmm3, %xmm2
4988; SSE-NEXT: pand %xmm4, %xmm1
4989; SSE-NEXT: pand %xmm4, %xmm0
4990; SSE-NEXT: packuswb %xmm1, %xmm0
4991; SSE-NEXT: packuswb %xmm2, %xmm0
4992; SSE-NEXT: retq
4993;
4994; AVX1-LABEL: trunc_or_v16i32_v16i8:
4995; AVX1: # BB#0:
4996; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
4997; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
4998; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4999; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
5000; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
5001; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
5002; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
5003; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5004; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
5005; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
5006; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
5007; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
5008; AVX1-NEXT: vzeroupper
5009; AVX1-NEXT: retq
5010;
5011; AVX2-LABEL: trunc_or_v16i32_v16i8:
5012; AVX2: # BB#0:
5013; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
5014; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
5015; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
5016; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
5017; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5018; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5019; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
5020; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
5021; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5022; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
5023; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5024; AVX2-NEXT: vzeroupper
5025; AVX2-NEXT: retq
5026;
5027; AVX512-LABEL: trunc_or_v16i32_v16i8:
5028; AVX512: # BB#0:
5029; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
5030; AVX512-NEXT: vpmovdb %zmm0, %xmm0
5031; AVX512-NEXT: retq
5032 %1 = or <16 x i32> %a0, %a1
5033 %2 = trunc <16 x i32> %1 to <16 x i8>
5034 ret <16 x i8> %2
5035}
5036
5037define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
5038; SSE-LABEL: trunc_or_v16i16_v16i8:
5039; SSE: # BB#0:
5040; SSE-NEXT: por %xmm2, %xmm0
5041; SSE-NEXT: por %xmm3, %xmm1
5042; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
5043; SSE-NEXT: pand %xmm2, %xmm1
5044; SSE-NEXT: pand %xmm2, %xmm0
5045; SSE-NEXT: packuswb %xmm1, %xmm0
5046; SSE-NEXT: retq
5047;
5048; AVX1-LABEL: trunc_or_v16i16_v16i8:
5049; AVX1: # BB#0:
5050; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
5051; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5052; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5053; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5054; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5055; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5056; AVX1-NEXT: vzeroupper
5057; AVX1-NEXT: retq
5058;
5059; AVX2-LABEL: trunc_or_v16i16_v16i8:
5060; AVX2: # BB#0:
5061; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
5062; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5063; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5064; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5065; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5066; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5067; AVX2-NEXT: vzeroupper
5068; AVX2-NEXT: retq
5069;
5070; AVX512F-LABEL: trunc_or_v16i16_v16i8:
5071; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005072; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005073; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
5074; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
5075; AVX512F-NEXT: retq
5076;
5077; AVX512BW-LABEL: trunc_or_v16i16_v16i8:
5078; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005079; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005080; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00005081; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005082; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00005083;
5084; AVX512DQ-LABEL: trunc_or_v16i16_v16i8:
5085; AVX512DQ: # BB#0:
5086; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0
5087; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
5088; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
5089; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005090 %1 = or <16 x i16> %a0, %a1
5091 %2 = trunc <16 x i16> %1 to <16 x i8>
5092 ret <16 x i8> %2
5093}
5094
5095;
5096; or to constant
5097;
5098
Simon Pilgrim85af9732016-12-30 22:40:32 +00005099define <4 x i32> @trunc_or_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
5100; SSE-LABEL: trunc_or_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005101; SSE: # BB#0:
5102; SSE-NEXT: movl $1, %eax
5103; SSE-NEXT: movd %rax, %xmm2
5104; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
Sanjay Patela0d8a272016-12-15 18:03:38 +00005105; SSE-NEXT: por %xmm2, %xmm0
5106; SSE-NEXT: orps {{.*}}(%rip), %xmm1
5107; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005108; SSE-NEXT: retq
5109;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005110; AVX1-LABEL: trunc_or_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005111; AVX1: # BB#0:
5112; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5113; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patela0d8a272016-12-15 18:03:38 +00005114; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005115; AVX1-NEXT: vzeroupper
5116; AVX1-NEXT: retq
5117;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005118; AVX2-LABEL: trunc_or_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005119; AVX2: # BB#0:
5120; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00005121; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
5122; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00005123; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005124; AVX2-NEXT: vzeroupper
5125; AVX2-NEXT: retq
5126;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005127; AVX512-LABEL: trunc_or_const_v4i64_v4i32:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005128; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005129; AVX512-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005130; AVX512-NEXT: vpmovqd %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00005131; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005132; AVX512-NEXT: retq
5133 %1 = or <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
5134 %2 = trunc <4 x i64> %1 to <4 x i32>
5135 ret <4 x i32> %2
5136}
5137
Simon Pilgrim85af9732016-12-30 22:40:32 +00005138define <8 x i16> @trunc_or_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
5139; SSE-LABEL: trunc_or_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005140; SSE: # BB#0:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005141; SSE-NEXT: movl $1, %eax
Michael Kuperstein7cc21232016-10-06 18:58:24 +00005142; SSE-NEXT: movd %rax, %xmm4
5143; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
5144; SSE-NEXT: por %xmm0, %xmm4
5145; SSE-NEXT: por {{.*}}(%rip), %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005146; SSE-NEXT: por {{.*}}(%rip), %xmm2
5147; SSE-NEXT: por {{.*}}(%rip), %xmm3
Michael Kuperstein7cc21232016-10-06 18:58:24 +00005148; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
5149; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
5150; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
5151; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
5152; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
5153; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
5154; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
5155; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
5156; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
5157; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5158; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005159; SSE-NEXT: retq
5160;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005161; AVX1-LABEL: trunc_or_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005162; AVX1: # BB#0:
5163; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5164; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1
5165; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5166; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
5167; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
5168; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
5169; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
5170; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5171; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
5172; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
5173; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
5174; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
5175; AVX1-NEXT: vzeroupper
5176; AVX1-NEXT: retq
5177;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005178; AVX2-LABEL: trunc_or_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005179; AVX2: # BB#0:
5180; AVX2-NEXT: vpor {{.*}}(%rip), %ymm1, %ymm1
5181; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00005182; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
5183; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5184; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
5185; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005186; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
5187; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
5188; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00005189; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005190; AVX2-NEXT: vzeroupper
5191; AVX2-NEXT: retq
5192;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005193; AVX512-LABEL: trunc_or_const_v8i64_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005194; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005195; AVX512-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005196; AVX512-NEXT: vpmovqw %zmm0, %xmm0
5197; AVX512-NEXT: retq
5198 %1 = or <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
5199 %2 = trunc <8 x i64> %1 to <8 x i16>
5200 ret <8 x i16> %2
5201}
5202
Simon Pilgrim85af9732016-12-30 22:40:32 +00005203define <8 x i16> @trunc_or_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
5204; SSE-LABEL: trunc_or_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005205; SSE: # BB#0:
5206; SSE-NEXT: por {{.*}}(%rip), %xmm0
5207; SSE-NEXT: por {{.*}}(%rip), %xmm1
5208; SSE-NEXT: pslld $16, %xmm1
5209; SSE-NEXT: psrad $16, %xmm1
5210; SSE-NEXT: pslld $16, %xmm0
5211; SSE-NEXT: psrad $16, %xmm0
5212; SSE-NEXT: packssdw %xmm1, %xmm0
5213; SSE-NEXT: retq
5214;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005215; AVX1-LABEL: trunc_or_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005216; AVX1: # BB#0:
5217; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5218; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5219; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
5220; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5221; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5222; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5223; AVX1-NEXT: vzeroupper
5224; AVX1-NEXT: retq
5225;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005226; AVX2-LABEL: trunc_or_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005227; AVX2: # BB#0:
5228; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
5229; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
5230; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00005231; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005232; AVX2-NEXT: vzeroupper
5233; AVX2-NEXT: retq
5234;
Simon Pilgrim85af9732016-12-30 22:40:32 +00005235; AVX512-LABEL: trunc_or_const_v8i32_v8i16:
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005236; AVX512: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005237; AVX512-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005238; AVX512-NEXT: vpmovdw %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00005239; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005240; AVX512-NEXT: retq
5241 %1 = or <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5242 %2 = trunc <8 x i32> %1 to <8 x i16>
5243 ret <8 x i16> %2
5244}
5245
5246define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
5247; SSE-LABEL: trunc_or_const_v16i64_v16i8:
5248; SSE: # BB#0:
5249; SSE-NEXT: movl $1, %eax
5250; SSE-NEXT: movd %rax, %xmm8
5251; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
5252; SSE-NEXT: por %xmm8, %xmm0
5253; SSE-NEXT: por {{.*}}(%rip), %xmm1
5254; SSE-NEXT: por {{.*}}(%rip), %xmm2
5255; SSE-NEXT: por {{.*}}(%rip), %xmm3
5256; SSE-NEXT: por {{.*}}(%rip), %xmm4
5257; SSE-NEXT: por {{.*}}(%rip), %xmm5
5258; SSE-NEXT: por {{.*}}(%rip), %xmm6
5259; SSE-NEXT: por {{.*}}(%rip), %xmm7
5260; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
5261; SSE-NEXT: pand %xmm8, %xmm7
5262; SSE-NEXT: pand %xmm8, %xmm6
5263; SSE-NEXT: packuswb %xmm7, %xmm6
5264; SSE-NEXT: pand %xmm8, %xmm5
5265; SSE-NEXT: pand %xmm8, %xmm4
5266; SSE-NEXT: packuswb %xmm5, %xmm4
5267; SSE-NEXT: packuswb %xmm6, %xmm4
5268; SSE-NEXT: pand %xmm8, %xmm3
5269; SSE-NEXT: pand %xmm8, %xmm2
5270; SSE-NEXT: packuswb %xmm3, %xmm2
5271; SSE-NEXT: pand %xmm8, %xmm1
5272; SSE-NEXT: pand %xmm8, %xmm0
5273; SSE-NEXT: packuswb %xmm1, %xmm0
5274; SSE-NEXT: packuswb %xmm2, %xmm0
5275; SSE-NEXT: packuswb %xmm4, %xmm0
5276; SSE-NEXT: retq
5277;
5278; AVX1-LABEL: trunc_or_const_v16i64_v16i8:
5279; AVX1: # BB#0:
5280; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5281; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1
5282; AVX1-NEXT: vorps {{.*}}(%rip), %ymm2, %ymm2
5283; AVX1-NEXT: vorps {{.*}}(%rip), %ymm3, %ymm3
5284; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
5285; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
5286; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
5287; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
5288; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
5289; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
5290; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4
5291; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2
5292; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
5293; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
5294; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
5295; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
5296; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1
5297; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
5298; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
5299; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3
5300; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0
5301; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
5302; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
5303; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
5304; AVX1-NEXT: vzeroupper
5305; AVX1-NEXT: retq
5306;
5307; AVX2-LABEL: trunc_or_const_v16i64_v16i8:
5308; AVX2: # BB#0:
5309; AVX2-NEXT: vpor {{.*}}(%rip), %ymm1, %ymm1
5310; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
5311; AVX2-NEXT: vpor {{.*}}(%rip), %ymm3, %ymm3
5312; AVX2-NEXT: vpor {{.*}}(%rip), %ymm2, %ymm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00005313; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
5314; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
5315; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
5316; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005317; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
5318; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
5319; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
5320; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
5321; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5322; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2
Simon Pilgrim2683ad52016-07-15 09:49:12 +00005323; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
5324; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5325; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
5326; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005327; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
5328; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
5329; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5330; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0
5331; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5332; AVX2-NEXT: vzeroupper
5333; AVX2-NEXT: retq
5334;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00005335; AVX512F-LABEL: trunc_or_const_v16i64_v16i8:
5336; AVX512F: # BB#0:
5337; AVX512F-NEXT: vporq {{.*}}(%rip), %zmm1, %zmm1
5338; AVX512F-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
5339; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
5340; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
5341; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
5342; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
5343; AVX512F-NEXT: retq
5344;
5345; AVX512BW-LABEL: trunc_or_const_v16i64_v16i8:
5346; AVX512BW: # BB#0:
5347; AVX512BW-NEXT: vporq {{.*}}(%rip), %zmm1, %zmm1
5348; AVX512BW-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
5349; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
5350; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
5351; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
5352; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
5353; AVX512BW-NEXT: retq
5354;
5355; AVX512DQ-LABEL: trunc_or_const_v16i64_v16i8:
5356; AVX512DQ: # BB#0:
5357; AVX512DQ-NEXT: vporq {{.*}}(%rip), %zmm1, %zmm1
5358; AVX512DQ-NEXT: vporq {{.*}}(%rip), %zmm0, %zmm0
5359; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
5360; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
5361; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
5362; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
5363; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005364 %1 = or <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
5365 %2 = trunc <16 x i64> %1 to <16 x i8>
5366 ret <16 x i8> %2
5367}
5368
5369define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
5370; SSE-LABEL: trunc_or_const_v16i32_v16i8:
5371; SSE: # BB#0:
5372; SSE-NEXT: por {{.*}}(%rip), %xmm0
5373; SSE-NEXT: por {{.*}}(%rip), %xmm1
5374; SSE-NEXT: por {{.*}}(%rip), %xmm2
5375; SSE-NEXT: por {{.*}}(%rip), %xmm3
5376; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
5377; SSE-NEXT: pand %xmm4, %xmm3
5378; SSE-NEXT: pand %xmm4, %xmm2
5379; SSE-NEXT: packuswb %xmm3, %xmm2
5380; SSE-NEXT: pand %xmm4, %xmm1
5381; SSE-NEXT: pand %xmm4, %xmm0
5382; SSE-NEXT: packuswb %xmm1, %xmm0
5383; SSE-NEXT: packuswb %xmm2, %xmm0
5384; SSE-NEXT: retq
5385;
5386; AVX1-LABEL: trunc_or_const_v16i32_v16i8:
5387; AVX1: # BB#0:
5388; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5389; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1
5390; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
5391; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
5392; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
5393; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
5394; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
5395; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
5396; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
5397; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
5398; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
5399; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
5400; AVX1-NEXT: vzeroupper
5401; AVX1-NEXT: retq
5402;
5403; AVX2-LABEL: trunc_or_const_v16i32_v16i8:
5404; AVX2: # BB#0:
5405; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
5406; AVX2-NEXT: vpor {{.*}}(%rip), %ymm1, %ymm1
5407; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
5408; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
5409; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
5410; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5411; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
5412; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
5413; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
5414; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
5415; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5416; AVX2-NEXT: vzeroupper
5417; AVX2-NEXT: retq
5418;
5419; AVX512-LABEL: trunc_or_const_v16i32_v16i8:
5420; AVX512: # BB#0:
5421; AVX512-NEXT: vpord {{.*}}(%rip), %zmm0, %zmm0
5422; AVX512-NEXT: vpmovdb %zmm0, %xmm0
5423; AVX512-NEXT: retq
5424 %1 = or <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5425 %2 = trunc <16 x i32> %1 to <16 x i8>
5426 ret <16 x i8> %2
5427}
5428
5429define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
5430; SSE-LABEL: trunc_or_const_v16i16_v16i8:
5431; SSE: # BB#0:
5432; SSE-NEXT: por {{.*}}(%rip), %xmm0
5433; SSE-NEXT: por {{.*}}(%rip), %xmm1
5434; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
5435; SSE-NEXT: pand %xmm2, %xmm1
5436; SSE-NEXT: pand %xmm2, %xmm0
5437; SSE-NEXT: packuswb %xmm1, %xmm0
5438; SSE-NEXT: retq
5439;
5440; AVX1-LABEL: trunc_or_const_v16i16_v16i8:
5441; AVX1: # BB#0:
5442; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
5443; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5444; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5445; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5446; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5447; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5448; AVX1-NEXT: vzeroupper
5449; AVX1-NEXT: retq
5450;
5451; AVX2-LABEL: trunc_or_const_v16i16_v16i8:
5452; AVX2: # BB#0:
5453; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
5454; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5455; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
5456; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5457; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5458; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5459; AVX2-NEXT: vzeroupper
5460; AVX2-NEXT: retq
5461;
5462; AVX512F-LABEL: trunc_or_const_v16i16_v16i8:
5463; AVX512F: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005464; AVX512F-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005465; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
5466; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
5467; AVX512F-NEXT: retq
5468;
5469; AVX512BW-LABEL: trunc_or_const_v16i16_v16i8:
5470; AVX512BW: # BB#0:
Craig Topper52e2e832016-07-22 05:46:44 +00005471; AVX512BW-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005472; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
Matthias Braun152e7c82016-07-09 00:19:07 +00005473; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005474; AVX512BW-NEXT: retq
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00005475;
5476; AVX512DQ-LABEL: trunc_or_const_v16i16_v16i8:
5477; AVX512DQ: # BB#0:
5478; AVX512DQ-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
5479; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
5480; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
5481; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005482 %1 = or <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
5483 %2 = trunc <16 x i16> %1 to <16 x i8>
5484 ret <16 x i8> %2
5485}
5486
5487;
5488; complex patterns - often created by vectorizer
5489;
5490
5491define <4 x i32> @mul_add_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
5492; SSE-LABEL: mul_add_v4i64_v4i32:
5493; SSE: # BB#0:
Simon Pilgrim081abbb2016-12-21 20:00:10 +00005494; SSE-NEXT: movdqa %xmm0, %xmm2
5495; SSE-NEXT: psrad $31, %xmm2
5496; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
5497; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5498; SSE-NEXT: movdqa %xmm3, %xmm2
5499; SSE-NEXT: psrad $31, %xmm2
5500; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
5501; SSE-NEXT: movdqa %xmm1, %xmm4
5502; SSE-NEXT: psrad $31, %xmm4
5503; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
5504; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
5505; SSE-NEXT: movdqa %xmm2, %xmm4
5506; SSE-NEXT: psrad $31, %xmm4
5507; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
5508; SSE-NEXT: movdqa %xmm3, %xmm4
5509; SSE-NEXT: psrlq $32, %xmm4
5510; SSE-NEXT: pmuludq %xmm2, %xmm4
5511; SSE-NEXT: movdqa %xmm2, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005512; SSE-NEXT: psrlq $32, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00005513; SSE-NEXT: pmuludq %xmm3, %xmm5
5514; SSE-NEXT: paddq %xmm4, %xmm5
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005515; SSE-NEXT: psllq $32, %xmm5
Simon Pilgrim081abbb2016-12-21 20:00:10 +00005516; SSE-NEXT: pmuludq %xmm3, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +00005517; SSE-NEXT: paddq %xmm5, %xmm2
Sanjay Patela0d8a272016-12-15 18:03:38 +00005518; SSE-NEXT: movdqa %xmm0, %xmm3
Simon Pilgrim081abbb2016-12-21 20:00:10 +00005519; SSE-NEXT: psrlq $32, %xmm3
Sanjay Patela0d8a272016-12-15 18:03:38 +00005520; SSE-NEXT: pmuludq %xmm1, %xmm3
5521; SSE-NEXT: movdqa %xmm1, %xmm4
5522; SSE-NEXT: psrlq $32, %xmm4
5523; SSE-NEXT: pmuludq %xmm0, %xmm4
Simon Pilgrim081abbb2016-12-21 20:00:10 +00005524; SSE-NEXT: paddq %xmm3, %xmm4
Sanjay Patela0d8a272016-12-15 18:03:38 +00005525; SSE-NEXT: psllq $32, %xmm4
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005526; SSE-NEXT: pmuludq %xmm1, %xmm0
Craig Topper1af6cc02016-07-18 06:14:54 +00005527; SSE-NEXT: paddq %xmm4, %xmm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005528; SSE-NEXT: paddq {{.*}}(%rip), %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00005529; SSE-NEXT: paddq {{.*}}(%rip), %xmm2
5530; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005531; SSE-NEXT: retq
5532;
5533; AVX1-LABEL: mul_add_v4i64_v4i32:
5534; AVX1: # BB#0:
Sanjay Patela0d8a272016-12-15 18:03:38 +00005535; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2
5536; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005537; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00005538; AVX1-NEXT: vpmovsxdq %xmm1, %xmm3
5539; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005540; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
Simon Pilgrim369cd342016-12-12 10:49:15 +00005541; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
5542; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm1
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005543; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm1
5544; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patela0d8a272016-12-15 18:03:38 +00005545; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005546; AVX1-NEXT: retq
5547;
5548; AVX2-LABEL: mul_add_v4i64_v4i32:
5549; AVX2: # BB#0:
5550; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
5551; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
Simon Pilgrim369cd342016-12-12 10:49:15 +00005552; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005553; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim2683ad52016-07-15 09:49:12 +00005554; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
5555; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
Matthias Braun152e7c82016-07-09 00:19:07 +00005556; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005557; AVX2-NEXT: vzeroupper
5558; AVX2-NEXT: retq
5559;
Simon Pilgrimc5fde8d2016-12-30 22:43:41 +00005560; AVX512F-LABEL: mul_add_v4i64_v4i32:
5561; AVX512F: # BB#0:
5562; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0
5563; AVX512F-NEXT: vpmovsxdq %xmm1, %ymm1
5564; AVX512F-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
5565; AVX512F-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
5566; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
5567; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
5568; AVX512F-NEXT: retq
5569;
5570; AVX512BW-LABEL: mul_add_v4i64_v4i32:
5571; AVX512BW: # BB#0:
5572; AVX512BW-NEXT: vpmovsxdq %xmm0, %ymm0
5573; AVX512BW-NEXT: vpmovsxdq %xmm1, %ymm1
5574; AVX512BW-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
5575; AVX512BW-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
5576; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
5577; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
5578; AVX512BW-NEXT: retq
5579;
5580; AVX512DQ-LABEL: mul_add_v4i64_v4i32:
5581; AVX512DQ: # BB#0:
5582; AVX512DQ-NEXT: vpmovsxdq %xmm0, %ymm0
5583; AVX512DQ-NEXT: vpmovsxdq %xmm1, %ymm1
5584; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
5585; AVX512DQ-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
5586; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
5587; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
5588; AVX512DQ-NEXT: retq
Simon Pilgrim5f1326f2016-03-13 19:08:01 +00005589 %1 = sext <4 x i32> %a0 to <4 x i64>
5590 %2 = sext <4 x i32> %a1 to <4 x i64>
5591 %3 = mul <4 x i64> %1, %2
5592 %4 = add <4 x i64> %3, <i64 -3, i64 -1, i64 1, i64 3>
5593 %5 = trunc <4 x i64> %4 to <4 x i32>
5594 ret <4 x i32> %5
5595}