blob: cb29f33b9e85ea2665296e39fda625abdffc18fe [file] [log] [blame]
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
Craig Topper6ffeeb72019-01-06 18:10:18 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Craig Topper6ffeeb72019-01-06 18:10:18 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00008
9declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
10declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
11declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
12declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
13
14;
15; Variable Shifts
16;
17
18define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +000019; AVX512F-LABEL: var_funnnel_v8i64:
20; AVX512F: # %bb.0:
21; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
22; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
23; AVX512F-NEXT: vpsllvq %zmm4, %zmm0, %zmm5
24; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
25; AVX512F-NEXT: vpsubq %zmm4, %zmm6, %zmm4
26; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
27; AVX512F-NEXT: vporq %zmm1, %zmm5, %zmm1
28; AVX512F-NEXT: vptestnmq %zmm3, %zmm2, %k1
29; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
30; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
31; AVX512F-NEXT: retq
32;
33; AVX512VL-LABEL: var_funnnel_v8i64:
34; AVX512VL: # %bb.0:
35; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
36; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
37; AVX512VL-NEXT: vpsllvq %zmm4, %zmm0, %zmm5
38; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
39; AVX512VL-NEXT: vpsubq %zmm4, %zmm6, %zmm4
40; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
41; AVX512VL-NEXT: vporq %zmm1, %zmm5, %zmm1
42; AVX512VL-NEXT: vptestnmq %zmm3, %zmm2, %k1
43; AVX512VL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
44; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0
45; AVX512VL-NEXT: retq
46;
47; AVX512BW-LABEL: var_funnnel_v8i64:
48; AVX512BW: # %bb.0:
49; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
50; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
51; AVX512BW-NEXT: vpsllvq %zmm4, %zmm0, %zmm5
52; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
53; AVX512BW-NEXT: vpsubq %zmm4, %zmm6, %zmm4
54; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
55; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
56; AVX512BW-NEXT: vptestnmq %zmm3, %zmm2, %k1
57; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
58; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
59; AVX512BW-NEXT: retq
60;
61; AVX512VBMI2-LABEL: var_funnnel_v8i64:
62; AVX512VBMI2: # %bb.0:
63; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
64; AVX512VBMI2-NEXT: retq
65;
66; AVX512VLBW-LABEL: var_funnnel_v8i64:
67; AVX512VLBW: # %bb.0:
68; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
69; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
70; AVX512VLBW-NEXT: vpsllvq %zmm4, %zmm0, %zmm5
71; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
72; AVX512VLBW-NEXT: vpsubq %zmm4, %zmm6, %zmm4
73; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
74; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
75; AVX512VLBW-NEXT: vptestnmq %zmm3, %zmm2, %k1
76; AVX512VLBW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
77; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
78; AVX512VLBW-NEXT: retq
79;
80; AVX512VLVBMI2-LABEL: var_funnnel_v8i64:
81; AVX512VLVBMI2: # %bb.0:
82; AVX512VLVBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
83; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +000084 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt)
85 ret <8 x i64> %res
86}
87
88define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +000089; AVX512F-LABEL: var_funnnel_v16i32:
90; AVX512F: # %bb.0:
91; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
92; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4
93; AVX512F-NEXT: vpsllvd %zmm4, %zmm0, %zmm5
94; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
95; AVX512F-NEXT: vpsubd %zmm4, %zmm6, %zmm4
96; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
97; AVX512F-NEXT: vpord %zmm1, %zmm5, %zmm1
98; AVX512F-NEXT: vptestnmd %zmm3, %zmm2, %k1
99; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
100; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
101; AVX512F-NEXT: retq
102;
103; AVX512VL-LABEL: var_funnnel_v16i32:
104; AVX512VL: # %bb.0:
105; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
106; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4
107; AVX512VL-NEXT: vpsllvd %zmm4, %zmm0, %zmm5
108; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
109; AVX512VL-NEXT: vpsubd %zmm4, %zmm6, %zmm4
110; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
111; AVX512VL-NEXT: vpord %zmm1, %zmm5, %zmm1
112; AVX512VL-NEXT: vptestnmd %zmm3, %zmm2, %k1
113; AVX512VL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
114; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0
115; AVX512VL-NEXT: retq
116;
117; AVX512BW-LABEL: var_funnnel_v16i32:
118; AVX512BW: # %bb.0:
119; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
120; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4
121; AVX512BW-NEXT: vpsllvd %zmm4, %zmm0, %zmm5
122; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
123; AVX512BW-NEXT: vpsubd %zmm4, %zmm6, %zmm4
124; AVX512BW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
125; AVX512BW-NEXT: vpord %zmm1, %zmm5, %zmm1
126; AVX512BW-NEXT: vptestnmd %zmm3, %zmm2, %k1
127; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
128; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
129; AVX512BW-NEXT: retq
130;
131; AVX512VBMI2-LABEL: var_funnnel_v16i32:
132; AVX512VBMI2: # %bb.0:
133; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
134; AVX512VBMI2-NEXT: retq
135;
136; AVX512VLBW-LABEL: var_funnnel_v16i32:
137; AVX512VLBW: # %bb.0:
138; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
139; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4
140; AVX512VLBW-NEXT: vpsllvd %zmm4, %zmm0, %zmm5
141; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
142; AVX512VLBW-NEXT: vpsubd %zmm4, %zmm6, %zmm4
143; AVX512VLBW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
144; AVX512VLBW-NEXT: vpord %zmm1, %zmm5, %zmm1
145; AVX512VLBW-NEXT: vptestnmd %zmm3, %zmm2, %k1
146; AVX512VLBW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
147; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
148; AVX512VLBW-NEXT: retq
149;
150; AVX512VLVBMI2-LABEL: var_funnnel_v16i32:
151; AVX512VLVBMI2: # %bb.0:
152; AVX512VLVBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
153; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000154 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt)
155 ret <16 x i32> %res
156}
157
158define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
159; AVX512F-LABEL: var_funnnel_v32i16:
160; AVX512F: # %bb.0:
161; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
162; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
163; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
164; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
165; AVX512F-NEXT: vpsllvd %zmm7, %zmm8, %zmm7
166; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
167; AVX512F-NEXT: vpsubw %ymm4, %ymm8, %ymm9
168; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
169; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
170; AVX512F-NEXT: vpsrlvd %zmm9, %zmm2, %zmm2
171; AVX512F-NEXT: vpord %zmm2, %zmm7, %zmm2
172; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
173; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
174; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
175; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
176; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm2
177; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
178; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
179; AVX512F-NEXT: vpsllvd %zmm4, %zmm5, %zmm4
180; AVX512F-NEXT: vpsubw %ymm2, %ymm8, %ymm5
181; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
182; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
183; AVX512F-NEXT: vpsrlvd %zmm5, %zmm3, %zmm3
184; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3
185; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
186; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
187; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
188; AVX512F-NEXT: retq
189;
190; AVX512VL-LABEL: var_funnnel_v32i16:
191; AVX512VL: # %bb.0:
192; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
193; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
194; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
195; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
196; AVX512VL-NEXT: vpsllvd %zmm7, %zmm8, %zmm7
197; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
198; AVX512VL-NEXT: vpsubw %ymm4, %ymm8, %ymm9
199; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
200; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
201; AVX512VL-NEXT: vpsrlvd %zmm9, %zmm2, %zmm2
202; AVX512VL-NEXT: vpord %zmm2, %zmm7, %zmm2
203; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2
204; AVX512VL-NEXT: vpxor %xmm7, %xmm7, %xmm7
205; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
206; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
207; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm2
208; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
209; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
210; AVX512VL-NEXT: vpsllvd %zmm4, %zmm5, %zmm4
211; AVX512VL-NEXT: vpsubw %ymm2, %ymm8, %ymm5
212; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
213; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
214; AVX512VL-NEXT: vpsrlvd %zmm5, %zmm3, %zmm3
215; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3
216; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3
217; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
218; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
219; AVX512VL-NEXT: retq
220;
221; AVX512BW-LABEL: var_funnnel_v32i16:
222; AVX512BW: # %bb.0:
223; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
224; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
225; AVX512BW-NEXT: vpsllvw %zmm4, %zmm0, %zmm5
226; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
227; AVX512BW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
228; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
229; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
230; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
231; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
232; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
233; AVX512BW-NEXT: retq
234;
Craig Topper6ffeeb72019-01-06 18:10:18 +0000235; AVX512VBMI2-LABEL: var_funnnel_v32i16:
236; AVX512VBMI2: # %bb.0:
237; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
238; AVX512VBMI2-NEXT: retq
239;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000240; AVX512VLBW-LABEL: var_funnnel_v32i16:
241; AVX512VLBW: # %bb.0:
242; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
243; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
244; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm0, %zmm5
245; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
246; AVX512VLBW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
247; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
248; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
249; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
250; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
251; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
252; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +0000253;
254; AVX512VLVBMI2-LABEL: var_funnnel_v32i16:
255; AVX512VLVBMI2: # %bb.0:
256; AVX512VLVBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
257; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000258 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt)
259 ret <32 x i16> %res
260}
261
262define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
263; AVX512F-LABEL: var_funnnel_v64i8:
264; AVX512F: # %bb.0:
265; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm7
266; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
267; AVX512F-NEXT: vpand %ymm6, %ymm7, %ymm8
268; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
269; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm9
270; AVX512F-NEXT: vpsllw $5, %ymm9, %ymm10
271; AVX512F-NEXT: vpblendvb %ymm10, %ymm8, %ymm0, %ymm8
272; AVX512F-NEXT: vpsllw $2, %ymm8, %ymm11
273; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
274; AVX512F-NEXT: vpand %ymm4, %ymm11, %ymm11
275; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
276; AVX512F-NEXT: vpblendvb %ymm10, %ymm11, %ymm8, %ymm8
277; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm11
278; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
279; AVX512F-NEXT: vpblendvb %ymm10, %ymm11, %ymm8, %ymm10
280; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm11
281; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
282; AVX512F-NEXT: vpand %ymm8, %ymm11, %ymm11
283; AVX512F-NEXT: vmovdqa {{.*#+}} ymm12 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
284; AVX512F-NEXT: vpsubb %ymm9, %ymm12, %ymm13
285; AVX512F-NEXT: vpsllw $5, %ymm13, %ymm13
286; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
287; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm11
288; AVX512F-NEXT: vmovdqa {{.*#+}} ymm14 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
289; AVX512F-NEXT: vpand %ymm14, %ymm11, %ymm11
290; AVX512F-NEXT: vpaddb %ymm13, %ymm13, %ymm13
291; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
292; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm11
293; AVX512F-NEXT: vmovdqa {{.*#+}} ymm15 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
294; AVX512F-NEXT: vpand %ymm15, %ymm11, %ymm11
295; AVX512F-NEXT: vpaddb %ymm13, %ymm13, %ymm13
296; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
297; AVX512F-NEXT: vpor %ymm2, %ymm10, %ymm2
298; AVX512F-NEXT: vpxor %xmm10, %xmm10, %xmm10
299; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm9, %ymm9
300; AVX512F-NEXT: vpblendvb %ymm9, %ymm0, %ymm2, %ymm0
301; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
302; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
303; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
304; AVX512F-NEXT: vpsllw $5, %ymm5, %ymm6
305; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
306; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm7
307; AVX512F-NEXT: vpand %ymm4, %ymm7, %ymm4
308; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
309; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
310; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm4
311; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
312; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
313; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm4
314; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
315; AVX512F-NEXT: vpsubb %ymm5, %ymm12, %ymm6
316; AVX512F-NEXT: vpsllw $5, %ymm6, %ymm6
317; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
318; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm4
319; AVX512F-NEXT: vpand %ymm14, %ymm4, %ymm4
320; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
321; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
322; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm4
323; AVX512F-NEXT: vpand %ymm15, %ymm4, %ymm4
324; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
325; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
326; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
327; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm5, %ymm3
328; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
329; AVX512F-NEXT: retq
330;
331; AVX512VL-LABEL: var_funnnel_v64i8:
332; AVX512VL: # %bb.0:
333; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
334; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
335; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
336; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
337; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
338; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm9
339; AVX512VL-NEXT: vpblendvb %ymm9, %ymm6, %ymm0, %ymm6
340; AVX512VL-NEXT: vpsllw $2, %ymm6, %ymm10
341; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm11 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
342; AVX512VL-NEXT: vpand %ymm11, %ymm10, %ymm10
343; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
344; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
345; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm10
346; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
347; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
348; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm9
349; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
350; AVX512VL-NEXT: vpand %ymm10, %ymm9, %ymm9
351; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm12 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
352; AVX512VL-NEXT: vpsubb %ymm4, %ymm12, %ymm13
353; AVX512VL-NEXT: vpsllw $5, %ymm13, %ymm13
354; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
355; AVX512VL-NEXT: vpsrlw $2, %ymm2, %ymm9
356; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm14 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
357; AVX512VL-NEXT: vpand %ymm14, %ymm9, %ymm9
358; AVX512VL-NEXT: vpaddb %ymm13, %ymm13, %ymm13
359; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
360; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm9
361; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm15 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
362; AVX512VL-NEXT: vpand %ymm15, %ymm9, %ymm9
363; AVX512VL-NEXT: vpaddb %ymm13, %ymm13, %ymm13
364; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
365; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
366; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
367; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
368; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
369; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
370; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
371; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm4
372; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm5
373; AVX512VL-NEXT: vpblendvb %ymm5, %ymm2, %ymm1, %ymm2
374; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm7
375; AVX512VL-NEXT: vpand %ymm11, %ymm7, %ymm7
376; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
377; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
378; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm7
379; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
380; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
381; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm5
382; AVX512VL-NEXT: vpand %ymm10, %ymm5, %ymm5
383; AVX512VL-NEXT: vpsubb %ymm4, %ymm12, %ymm7
384; AVX512VL-NEXT: vpsllw $5, %ymm7, %ymm7
385; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
386; AVX512VL-NEXT: vpsrlw $2, %ymm3, %ymm5
387; AVX512VL-NEXT: vpand %ymm14, %ymm5, %ymm5
388; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
389; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
390; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm5
391; AVX512VL-NEXT: vpand %ymm15, %ymm5, %ymm5
392; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
393; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
394; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
395; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm3
396; AVX512VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
397; AVX512VL-NEXT: retq
398;
399; AVX512BW-LABEL: var_funnnel_v64i8:
400; AVX512BW: # %bb.0:
401; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
402; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
403; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
404; AVX512BW-NEXT: vpsubb %zmm4, %zmm5, %zmm5
405; AVX512BW-NEXT: vpsllw $5, %zmm5, %zmm5
406; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
407; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
408; AVX512BW-NEXT: vpmovb2m %zmm5, %k2
409; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm5
410; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
411; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
412; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm5
413; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
414; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
415; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm5
416; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
417; AVX512BW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
418; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
419; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
420; AVX512BW-NEXT: vpsllw $5, %zmm4, %zmm4
421; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
422; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
423; AVX512BW-NEXT: vpmovb2m %zmm4, %k2
424; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm4
425; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
426; AVX512BW-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
427; AVX512BW-NEXT: vpsllw $2, %zmm4, %zmm6
428; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
429; AVX512BW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
430; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
431; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
432; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
433; AVX512BW-NEXT: vporq %zmm1, %zmm4, %zmm1
434; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
435; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
436; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
437; AVX512BW-NEXT: retq
438;
Craig Topper6ffeeb72019-01-06 18:10:18 +0000439; AVX512VBMI2-LABEL: var_funnnel_v64i8:
440; AVX512VBMI2: # %bb.0:
441; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
442; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
443; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
444; AVX512VBMI2-NEXT: vpsubb %zmm4, %zmm5, %zmm5
445; AVX512VBMI2-NEXT: vpsllw $5, %zmm5, %zmm5
446; AVX512VBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm6
447; AVX512VBMI2-NEXT: vpmovb2m %zmm6, %k1
448; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k2
449; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm5
450; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
451; AVX512VBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
452; AVX512VBMI2-NEXT: vpsrlw $2, %zmm1, %zmm5
453; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
454; AVX512VBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
455; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm5
456; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
457; AVX512VBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm6
458; AVX512VBMI2-NEXT: vpmovb2m %zmm6, %k1
459; AVX512VBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
460; AVX512VBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
461; AVX512VBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm5
462; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k1
463; AVX512VBMI2-NEXT: vpmovb2m %zmm4, %k2
464; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm4
465; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
466; AVX512VBMI2-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
467; AVX512VBMI2-NEXT: vpsllw $2, %zmm4, %zmm6
468; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
469; AVX512VBMI2-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
470; AVX512VBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm5
471; AVX512VBMI2-NEXT: vpmovb2m %zmm5, %k1
472; AVX512VBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
473; AVX512VBMI2-NEXT: vporq %zmm1, %zmm4, %zmm1
474; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
475; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
476; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
477; AVX512VBMI2-NEXT: retq
478;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000479; AVX512VLBW-LABEL: var_funnnel_v64i8:
480; AVX512VLBW: # %bb.0:
481; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
482; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
483; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
484; AVX512VLBW-NEXT: vpsubb %zmm4, %zmm5, %zmm5
485; AVX512VLBW-NEXT: vpsllw $5, %zmm5, %zmm5
486; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
487; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
488; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k2
489; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm5
490; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
491; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
492; AVX512VLBW-NEXT: vpsrlw $2, %zmm1, %zmm5
493; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
494; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
495; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm5
496; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
497; AVX512VLBW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
498; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
499; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
500; AVX512VLBW-NEXT: vpsllw $5, %zmm4, %zmm4
501; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
502; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
503; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k2
504; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm4
505; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
506; AVX512VLBW-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
507; AVX512VLBW-NEXT: vpsllw $2, %zmm4, %zmm6
508; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
509; AVX512VLBW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
510; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
511; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
512; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
513; AVX512VLBW-NEXT: vporq %zmm1, %zmm4, %zmm1
514; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
515; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
516; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
517; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +0000518;
519; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
520; AVX512VLVBMI2: # %bb.0:
521; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
522; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
523; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
524; AVX512VLVBMI2-NEXT: vpsubb %zmm4, %zmm5, %zmm5
525; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm5, %zmm5
526; AVX512VLVBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm6
527; AVX512VLVBMI2-NEXT: vpmovb2m %zmm6, %k1
528; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k2
529; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm5
530; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
531; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
532; AVX512VLVBMI2-NEXT: vpsrlw $2, %zmm1, %zmm5
533; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
534; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
535; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm5
536; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
537; AVX512VLVBMI2-NEXT: vpaddb %zmm6, %zmm6, %zmm6
538; AVX512VLVBMI2-NEXT: vpmovb2m %zmm6, %k1
539; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
540; AVX512VLVBMI2-NEXT: vpsllw $5, %zmm4, %zmm4
541; AVX512VLVBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm5
542; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k1
543; AVX512VLVBMI2-NEXT: vpmovb2m %zmm4, %k2
544; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm4
545; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
546; AVX512VLVBMI2-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
547; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm4, %zmm6
548; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
549; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
550; AVX512VLVBMI2-NEXT: vpaddb %zmm5, %zmm5, %zmm5
551; AVX512VLVBMI2-NEXT: vpmovb2m %zmm5, %k1
552; AVX512VLVBMI2-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
553; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm4, %zmm1
554; AVX512VLVBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
555; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
556; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
557; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000558 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
559 ret <64 x i8> %res
560}
561
562;
563; Uniform Variable Shifts
564;
565
566define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +0000567; AVX512F-LABEL: splatvar_funnnel_v8i64:
568; AVX512F: # %bb.0:
569; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
570; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
571; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
572; AVX512F-NEXT: vpsllq %xmm4, %zmm0, %zmm5
573; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
574; AVX512F-NEXT: vpsubq %xmm4, %xmm6, %xmm4
575; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
576; AVX512F-NEXT: vporq %zmm1, %zmm5, %zmm1
577; AVX512F-NEXT: vptestnmq %zmm3, %zmm2, %k1
578; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
579; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
580; AVX512F-NEXT: retq
581;
582; AVX512VL-LABEL: splatvar_funnnel_v8i64:
583; AVX512VL: # %bb.0:
584; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
585; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
586; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
587; AVX512VL-NEXT: vpsllq %xmm4, %zmm0, %zmm5
588; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
589; AVX512VL-NEXT: vpsubq %xmm4, %xmm6, %xmm4
590; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
591; AVX512VL-NEXT: vporq %zmm1, %zmm5, %zmm1
592; AVX512VL-NEXT: vptestnmq %zmm3, %zmm2, %k1
593; AVX512VL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
594; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0
595; AVX512VL-NEXT: retq
596;
597; AVX512BW-LABEL: splatvar_funnnel_v8i64:
598; AVX512BW: # %bb.0:
599; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
600; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
601; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
602; AVX512BW-NEXT: vpsllq %xmm4, %zmm0, %zmm5
603; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
604; AVX512BW-NEXT: vpsubq %xmm4, %xmm6, %xmm4
605; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
606; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
607; AVX512BW-NEXT: vptestnmq %zmm3, %zmm2, %k1
608; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
609; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
610; AVX512BW-NEXT: retq
611;
612; AVX512VBMI2-LABEL: splatvar_funnnel_v8i64:
613; AVX512VBMI2: # %bb.0:
614; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
615; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
616; AVX512VBMI2-NEXT: retq
617;
618; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
619; AVX512VLBW: # %bb.0:
620; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
621; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
622; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
623; AVX512VLBW-NEXT: vpsllq %xmm4, %zmm0, %zmm5
624; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
625; AVX512VLBW-NEXT: vpsubq %xmm4, %xmm6, %xmm4
626; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
627; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
628; AVX512VLBW-NEXT: vptestnmq %zmm3, %zmm2, %k1
629; AVX512VLBW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
630; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
631; AVX512VLBW-NEXT: retq
632;
633; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i64:
634; AVX512VLVBMI2: # %bb.0:
635; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %zmm2
636; AVX512VLVBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
637; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000638 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
639 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %splat)
640 ret <8 x i64> %res
641}
642
643define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +0000644; AVX512F-LABEL: splatvar_funnnel_v16i32:
645; AVX512F: # %bb.0:
646; AVX512F-NEXT: vpbroadcastd %xmm2, %zmm2
647; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
648; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4
649; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
650; AVX512F-NEXT: vpslld %xmm5, %zmm0, %zmm5
651; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
652; AVX512F-NEXT: vpsubd %xmm4, %xmm6, %xmm4
653; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
654; AVX512F-NEXT: vpsrld %xmm4, %zmm1, %zmm1
655; AVX512F-NEXT: vpord %zmm1, %zmm5, %zmm1
656; AVX512F-NEXT: vptestnmd %zmm3, %zmm2, %k1
657; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
658; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
659; AVX512F-NEXT: retq
660;
661; AVX512VL-LABEL: splatvar_funnnel_v16i32:
662; AVX512VL: # %bb.0:
663; AVX512VL-NEXT: vpbroadcastd %xmm2, %zmm2
664; AVX512VL-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
665; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4
666; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
667; AVX512VL-NEXT: vpslld %xmm5, %zmm0, %zmm5
668; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
669; AVX512VL-NEXT: vpsubd %xmm4, %xmm6, %xmm4
670; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
671; AVX512VL-NEXT: vpsrld %xmm4, %zmm1, %zmm1
672; AVX512VL-NEXT: vpord %zmm1, %zmm5, %zmm1
673; AVX512VL-NEXT: vptestnmd %zmm3, %zmm2, %k1
674; AVX512VL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
675; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0
676; AVX512VL-NEXT: retq
677;
678; AVX512BW-LABEL: splatvar_funnnel_v16i32:
679; AVX512BW: # %bb.0:
680; AVX512BW-NEXT: vpbroadcastd %xmm2, %zmm2
681; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
682; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4
683; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
684; AVX512BW-NEXT: vpslld %xmm5, %zmm0, %zmm5
685; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
686; AVX512BW-NEXT: vpsubd %xmm4, %xmm6, %xmm4
687; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
688; AVX512BW-NEXT: vpsrld %xmm4, %zmm1, %zmm1
689; AVX512BW-NEXT: vpord %zmm1, %zmm5, %zmm1
690; AVX512BW-NEXT: vptestnmd %zmm3, %zmm2, %k1
691; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
692; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
693; AVX512BW-NEXT: retq
694;
695; AVX512VBMI2-LABEL: splatvar_funnnel_v16i32:
696; AVX512VBMI2: # %bb.0:
697; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
698; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
699; AVX512VBMI2-NEXT: retq
700;
701; AVX512VLBW-LABEL: splatvar_funnnel_v16i32:
702; AVX512VLBW: # %bb.0:
703; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %zmm2
704; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
705; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4
706; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
707; AVX512VLBW-NEXT: vpslld %xmm5, %zmm0, %zmm5
708; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
709; AVX512VLBW-NEXT: vpsubd %xmm4, %xmm6, %xmm4
710; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
711; AVX512VLBW-NEXT: vpsrld %xmm4, %zmm1, %zmm1
712; AVX512VLBW-NEXT: vpord %zmm1, %zmm5, %zmm1
713; AVX512VLBW-NEXT: vptestnmd %zmm3, %zmm2, %k1
714; AVX512VLBW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
715; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
716; AVX512VLBW-NEXT: retq
717;
718; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i32:
719; AVX512VLVBMI2: # %bb.0:
720; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %zmm2
721; AVX512VLVBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
722; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000723 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
724 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %splat)
725 ret <16 x i32> %res
726}
727
728define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
729; AVX512F-LABEL: splatvar_funnnel_v32i16:
730; AVX512F: # %bb.0:
731; AVX512F-NEXT: vpbroadcastw %xmm4, %ymm4
732; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
733; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
734; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm6
735; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
736; AVX512F-NEXT: vpsubw %xmm4, %xmm7, %xmm7
737; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
738; AVX512F-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
739; AVX512F-NEXT: vpor %ymm2, %ymm6, %ymm2
740; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
741; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
742; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
743; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm2
744; AVX512F-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
745; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
746; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
747; AVX512F-NEXT: retq
748;
749; AVX512VL-LABEL: splatvar_funnnel_v32i16:
750; AVX512VL: # %bb.0:
751; AVX512VL-NEXT: vpbroadcastw %xmm4, %ymm4
752; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
753; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
754; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm6
755; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
756; AVX512VL-NEXT: vpsubw %xmm4, %xmm7, %xmm7
757; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
758; AVX512VL-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
759; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
760; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
761; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
762; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
763; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm2
764; AVX512VL-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
765; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
766; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
767; AVX512VL-NEXT: retq
768;
769; AVX512BW-LABEL: splatvar_funnnel_v32i16:
770; AVX512BW: # %bb.0:
771; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
772; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
773; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
774; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
775; AVX512BW-NEXT: vpsllw %xmm5, %zmm0, %zmm5
776; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
777; AVX512BW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
778; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
779; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
780; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
781; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
782; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
783; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
784; AVX512BW-NEXT: retq
785;
Craig Topper6ffeeb72019-01-06 18:10:18 +0000786; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
787; AVX512VBMI2: # %bb.0:
788; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
789; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
790; AVX512VBMI2-NEXT: retq
791;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000792; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
793; AVX512VLBW: # %bb.0:
794; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
795; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
796; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
797; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
798; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm0, %zmm5
799; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
800; AVX512VLBW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
801; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
802; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
803; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
804; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
805; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
806; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
807; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +0000808;
809; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i16:
810; AVX512VLVBMI2: # %bb.0:
811; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %zmm2
812; AVX512VLVBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
813; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000814 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
815 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %splat)
816 ret <32 x i16> %res
817}
818
819define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
820; AVX512F-LABEL: splatvar_funnnel_v64i8:
821; AVX512F: # %bb.0:
822; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
823; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
824; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
825; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm6
826; AVX512F-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
827; AVX512F-NEXT: vpsllw %xmm5, %ymm9, %ymm8
828; AVX512F-NEXT: vpbroadcastb %xmm8, %ymm8
829; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6
830; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
831; AVX512F-NEXT: vpsubb %xmm4, %xmm7, %xmm7
832; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
833; AVX512F-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
834; AVX512F-NEXT: vpsrlw %xmm7, %ymm9, %ymm9
835; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9
836; AVX512F-NEXT: vpbroadcastb %xmm9, %ymm9
837; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
838; AVX512F-NEXT: vpor %ymm2, %ymm6, %ymm2
839; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
840; AVX512F-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
841; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
842; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm2
843; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2
844; AVX512F-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
845; AVX512F-NEXT: vpand %ymm9, %ymm3, %ymm3
846; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
847; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
848; AVX512F-NEXT: retq
849;
850; AVX512VL-LABEL: splatvar_funnnel_v64i8:
851; AVX512VL: # %bb.0:
852; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
853; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
854; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
855; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm6
856; AVX512VL-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
857; AVX512VL-NEXT: vpsllw %xmm5, %ymm9, %ymm8
858; AVX512VL-NEXT: vpbroadcastb %xmm8, %ymm8
859; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6
860; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
861; AVX512VL-NEXT: vpsubb %xmm4, %xmm7, %xmm7
862; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
863; AVX512VL-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
864; AVX512VL-NEXT: vpsrlw %xmm7, %ymm9, %ymm9
865; AVX512VL-NEXT: vpsrlw $8, %ymm9, %ymm9
866; AVX512VL-NEXT: vpbroadcastb %xmm9, %ymm9
867; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2
868; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
869; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
870; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
871; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
872; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm2
873; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2
874; AVX512VL-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
875; AVX512VL-NEXT: vpand %ymm9, %ymm3, %ymm3
876; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
877; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
878; AVX512VL-NEXT: retq
879;
880; AVX512BW-LABEL: splatvar_funnnel_v64i8:
881; AVX512BW: # %bb.0:
882; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
883; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
884; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
885; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
886; AVX512BW-NEXT: vpsllw %xmm5, %zmm0, %zmm6
887; AVX512BW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
888; AVX512BW-NEXT: vpsllw %xmm5, %zmm7, %zmm5
889; AVX512BW-NEXT: vpbroadcastb %xmm5, %zmm5
890; AVX512BW-NEXT: vpandq %zmm5, %zmm6, %zmm5
891; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
892; AVX512BW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
893; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
894; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
895; AVX512BW-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
896; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4
897; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
898; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1
899; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
900; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
901; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
902; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
903; AVX512BW-NEXT: retq
904;
Craig Topper6ffeeb72019-01-06 18:10:18 +0000905; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
906; AVX512VBMI2: # %bb.0:
907; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2
908; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
909; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
910; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
911; AVX512VBMI2-NEXT: vpsllw %xmm5, %zmm0, %zmm6
912; AVX512VBMI2-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
913; AVX512VBMI2-NEXT: vpsllw %xmm5, %zmm7, %zmm5
914; AVX512VBMI2-NEXT: vpbroadcastb %xmm5, %zmm5
915; AVX512VBMI2-NEXT: vpandq %zmm5, %zmm6, %zmm5
916; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
917; AVX512VBMI2-NEXT: vpsubb %xmm4, %xmm6, %xmm4
918; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
919; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
920; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
921; AVX512VBMI2-NEXT: vpsrlw $8, %zmm4, %zmm4
922; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
923; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1
924; AVX512VBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
925; AVX512VBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
926; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
927; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
928; AVX512VBMI2-NEXT: retq
929;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000930; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
931; AVX512VLBW: # %bb.0:
932; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
933; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
934; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
935; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
936; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm0, %zmm6
937; AVX512VLBW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
938; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm7, %zmm5
939; AVX512VLBW-NEXT: vpbroadcastb %xmm5, %zmm5
940; AVX512VLBW-NEXT: vpandq %zmm5, %zmm6, %zmm5
941; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
942; AVX512VLBW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
943; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
944; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
945; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
946; AVX512VLBW-NEXT: vpsrlw $8, %zmm4, %zmm4
947; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
948; AVX512VLBW-NEXT: vpandq %zmm4, %zmm1, %zmm1
949; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
950; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
951; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
952; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
953; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +0000954;
955; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
956; AVX512VLVBMI2: # %bb.0:
957; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2
958; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
959; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm2, %zmm4
960; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
961; AVX512VLVBMI2-NEXT: vpsllw %xmm5, %zmm0, %zmm6
962; AVX512VLVBMI2-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
963; AVX512VLVBMI2-NEXT: vpsllw %xmm5, %zmm7, %zmm5
964; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm5, %zmm5
965; AVX512VLVBMI2-NEXT: vpandq %zmm5, %zmm6, %zmm5
966; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
967; AVX512VLVBMI2-NEXT: vpsubb %xmm4, %xmm6, %xmm4
968; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
969; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
970; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
971; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm4, %zmm4
972; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
973; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1
974; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm5, %zmm1
975; AVX512VLVBMI2-NEXT: vptestnmb %zmm3, %zmm2, %k1
976; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
977; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
978; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +0000979 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
980 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
981 ret <64 x i8> %res
982}
983
984;
985; Constant Shifts
986;
987
988define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +0000989; AVX512F-LABEL: constant_funnnel_v8i64:
990; AVX512F: # %bb.0:
991; AVX512F-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
992; AVX512F-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
993; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
994; AVX512F-NEXT: retq
995;
996; AVX512VL-LABEL: constant_funnnel_v8i64:
997; AVX512VL: # %bb.0:
998; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
999; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
1000; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1001; AVX512VL-NEXT: retq
1002;
1003; AVX512BW-LABEL: constant_funnnel_v8i64:
1004; AVX512BW: # %bb.0:
1005; AVX512BW-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
1006; AVX512BW-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
1007; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1008; AVX512BW-NEXT: retq
1009;
1010; AVX512VBMI2-LABEL: constant_funnnel_v8i64:
1011; AVX512VBMI2: # %bb.0:
1012; AVX512VBMI2-NEXT: vpshldvq {{.*}}(%rip), %zmm1, %zmm0
1013; AVX512VBMI2-NEXT: retq
1014;
1015; AVX512VLBW-LABEL: constant_funnnel_v8i64:
1016; AVX512VLBW: # %bb.0:
1017; AVX512VLBW-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
1018; AVX512VLBW-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
1019; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1020; AVX512VLBW-NEXT: retq
1021;
1022; AVX512VLVBMI2-LABEL: constant_funnnel_v8i64:
1023; AVX512VLVBMI2: # %bb.0:
1024; AVX512VLVBMI2-NEXT: vpshldvq {{.*}}(%rip), %zmm1, %zmm0
1025; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001026 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
1027 ret <8 x i64> %res
1028}
1029
1030define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +00001031; AVX512F-LABEL: constant_funnnel_v16i32:
1032; AVX512F: # %bb.0:
1033; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1034; AVX512F-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1035; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
1036; AVX512F-NEXT: retq
1037;
1038; AVX512VL-LABEL: constant_funnnel_v16i32:
1039; AVX512VL: # %bb.0:
1040; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1041; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1042; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
1043; AVX512VL-NEXT: retq
1044;
1045; AVX512BW-LABEL: constant_funnnel_v16i32:
1046; AVX512BW: # %bb.0:
1047; AVX512BW-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1048; AVX512BW-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1049; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
1050; AVX512BW-NEXT: retq
1051;
1052; AVX512VBMI2-LABEL: constant_funnnel_v16i32:
1053; AVX512VBMI2: # %bb.0:
1054; AVX512VBMI2-NEXT: vpshldvd {{.*}}(%rip), %zmm1, %zmm0
1055; AVX512VBMI2-NEXT: retq
1056;
1057; AVX512VLBW-LABEL: constant_funnnel_v16i32:
1058; AVX512VLBW: # %bb.0:
1059; AVX512VLBW-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
1060; AVX512VLBW-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
1061; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
1062; AVX512VLBW-NEXT: retq
1063;
1064; AVX512VLVBMI2-LABEL: constant_funnnel_v16i32:
1065; AVX512VLVBMI2: # %bb.0:
1066; AVX512VLVBMI2-NEXT: vpshldvd {{.*}}(%rip), %zmm1, %zmm0
1067; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001068 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
1069 ret <16 x i32> %res
1070}
1071
1072define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
1073; AVX512F-LABEL: constant_funnnel_v32i16:
1074; AVX512F: # %bb.0:
1075; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1076; AVX512F-NEXT: vpmulhuw %ymm4, %ymm2, %ymm2
1077; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm5
1078; AVX512F-NEXT: vpor %ymm2, %ymm5, %ymm2
1079; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
1080; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
1081; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
1082; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm3
1083; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
1084; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
1085; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
1086; AVX512F-NEXT: retq
1087;
1088; AVX512VL-LABEL: constant_funnnel_v32i16:
1089; AVX512VL: # %bb.0:
1090; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1091; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm2, %ymm2
1092; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm5
1093; AVX512VL-NEXT: vpor %ymm2, %ymm5, %ymm2
1094; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
1095; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
1096; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
1097; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm3
1098; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
1099; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
1100; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
1101; AVX512VL-NEXT: retq
1102;
1103; AVX512BW-LABEL: constant_funnnel_v32i16:
1104; AVX512BW: # %bb.0:
1105; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
1106; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm2
1107; AVX512BW-NEXT: vporq %zmm1, %zmm2, %zmm1
1108; AVX512BW-NEXT: movl $65537, %eax # imm = 0x10001
1109; AVX512BW-NEXT: kmovd %eax, %k1
1110; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
1111; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
1112; AVX512BW-NEXT: retq
1113;
Craig Topper6ffeeb72019-01-06 18:10:18 +00001114; AVX512VBMI2-LABEL: constant_funnnel_v32i16:
1115; AVX512VBMI2: # %bb.0:
1116; AVX512VBMI2-NEXT: vpshldvw {{.*}}(%rip), %zmm1, %zmm0
1117; AVX512VBMI2-NEXT: retq
1118;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001119; AVX512VLBW-LABEL: constant_funnnel_v32i16:
1120; AVX512VLBW: # %bb.0:
1121; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
1122; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm2
1123; AVX512VLBW-NEXT: vporq %zmm1, %zmm2, %zmm1
1124; AVX512VLBW-NEXT: movl $65537, %eax # imm = 0x10001
1125; AVX512VLBW-NEXT: kmovd %eax, %k1
1126; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
1127; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
1128; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +00001129;
1130; AVX512VLVBMI2-LABEL: constant_funnnel_v32i16:
1131; AVX512VLVBMI2: # %bb.0:
1132; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %zmm1, %zmm0
1133; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001134 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1135 ret <32 x i16> %res
1136}
1137
1138define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1139; AVX512F-LABEL: constant_funnnel_v64i8:
1140; AVX512F: # %bb.0:
1141; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
1142; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1143; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
1144; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1145; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
1146; AVX512F-NEXT: vpsllw $2, %ymm4, %ymm7
1147; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1148; AVX512F-NEXT: vpand %ymm8, %ymm7, %ymm7
1149; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm9
1150; AVX512F-NEXT: vpblendvb %ymm9, %ymm7, %ymm4, %ymm4
1151; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
1152; AVX512F-NEXT: vpaddb %ymm9, %ymm9, %ymm10
1153; AVX512F-NEXT: vpblendvb %ymm10, %ymm7, %ymm4, %ymm4
1154; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
1155; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm11 = ymm2[8],ymm7[8],ymm2[9],ymm7[9],ymm2[10],ymm7[10],ymm2[11],ymm7[11],ymm2[12],ymm7[12],ymm2[13],ymm7[13],ymm2[14],ymm7[14],ymm2[15],ymm7[15],ymm2[24],ymm7[24],ymm2[25],ymm7[25],ymm2[26],ymm7[26],ymm2[27],ymm7[27],ymm2[28],ymm7[28],ymm2[29],ymm7[29],ymm2[30],ymm7[30],ymm2[31],ymm7[31]
1156; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
1157; AVX512F-NEXT: # ymm12 = mem[0,1,0,1]
1158; AVX512F-NEXT: vpmullw %ymm12, %ymm11, %ymm11
1159; AVX512F-NEXT: vpsrlw $8, %ymm11, %ymm11
1160; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm7[0],ymm2[1],ymm7[1],ymm2[2],ymm7[2],ymm2[3],ymm7[3],ymm2[4],ymm7[4],ymm2[5],ymm7[5],ymm2[6],ymm7[6],ymm2[7],ymm7[7],ymm2[16],ymm7[16],ymm2[17],ymm7[17],ymm2[18],ymm7[18],ymm2[19],ymm7[19],ymm2[20],ymm7[20],ymm2[21],ymm7[21],ymm2[22],ymm7[22],ymm2[23],ymm7[23]
1161; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm13 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1162; AVX512F-NEXT: # ymm13 = mem[0,1,0,1]
1163; AVX512F-NEXT: vpmullw %ymm13, %ymm2, %ymm2
1164; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
1165; AVX512F-NEXT: vpackuswb %ymm11, %ymm2, %ymm2
1166; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
1167; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
1168; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
1169; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
1170; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
1171; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
1172; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
1173; AVX512F-NEXT: vpand %ymm8, %ymm5, %ymm5
1174; AVX512F-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
1175; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
1176; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
1177; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm3[8],ymm7[8],ymm3[9],ymm7[9],ymm3[10],ymm7[10],ymm3[11],ymm7[11],ymm3[12],ymm7[12],ymm3[13],ymm7[13],ymm3[14],ymm7[14],ymm3[15],ymm7[15],ymm3[24],ymm7[24],ymm3[25],ymm7[25],ymm3[26],ymm7[26],ymm3[27],ymm7[27],ymm3[28],ymm7[28],ymm3[29],ymm7[29],ymm3[30],ymm7[30],ymm3[31],ymm7[31]
1178; AVX512F-NEXT: vpmullw %ymm12, %ymm5, %ymm5
1179; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
1180; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm7[0],ymm3[1],ymm7[1],ymm3[2],ymm7[2],ymm3[3],ymm7[3],ymm3[4],ymm7[4],ymm3[5],ymm7[5],ymm3[6],ymm7[6],ymm3[7],ymm7[7],ymm3[16],ymm7[16],ymm3[17],ymm7[17],ymm3[18],ymm7[18],ymm3[19],ymm7[19],ymm3[20],ymm7[20],ymm3[21],ymm7[21],ymm3[22],ymm7[22],ymm3[23],ymm7[23]
1181; AVX512F-NEXT: vpmullw %ymm13, %ymm3, %ymm3
1182; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
1183; AVX512F-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
1184; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
1185; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
1186; AVX512F-NEXT: retq
1187;
1188; AVX512VL-LABEL: constant_funnnel_v64i8:
1189; AVX512VL: # %bb.0:
1190; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
1191; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1192; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
1193; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1194; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
1195; AVX512VL-NEXT: vpsllw $2, %ymm4, %ymm7
1196; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
1197; AVX512VL-NEXT: vpand %ymm8, %ymm7, %ymm7
1198; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm9
1199; AVX512VL-NEXT: vpblendvb %ymm9, %ymm7, %ymm4, %ymm4
1200; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
1201; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm10
1202; AVX512VL-NEXT: vpblendvb %ymm10, %ymm7, %ymm4, %ymm4
1203; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm7 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1204; AVX512VL-NEXT: vpsrlw $8, %ymm7, %ymm7
1205; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
1206; AVX512VL-NEXT: # ymm11 = mem[0,1,0,1]
1207; AVX512VL-NEXT: vpmullw %ymm11, %ymm7, %ymm7
1208; AVX512VL-NEXT: vpsrlw $8, %ymm7, %ymm7
1209; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1210; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1211; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1212; AVX512VL-NEXT: # ymm12 = mem[0,1,0,1]
1213; AVX512VL-NEXT: vpmullw %ymm12, %ymm2, %ymm2
1214; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
1215; AVX512VL-NEXT: vpackuswb %ymm7, %ymm2, %ymm2
1216; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
1217; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
1218; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
1219; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
1220; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
1221; AVX512VL-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
1222; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
1223; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5
1224; AVX512VL-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
1225; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
1226; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
1227; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
1228; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
1229; AVX512VL-NEXT: vpmullw %ymm11, %ymm5, %ymm5
1230; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
1231; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
1232; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
1233; AVX512VL-NEXT: vpmullw %ymm12, %ymm3, %ymm3
1234; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
1235; AVX512VL-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
1236; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
1237; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
1238; AVX512VL-NEXT: retq
1239;
1240; AVX512BW-LABEL: constant_funnnel_v64i8:
1241; AVX512BW: # %bb.0:
1242; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1243; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1244; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
1245; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1246; AVX512BW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
1247; AVX512BW-NEXT: vpsllw $2, %zmm3, %zmm4
1248; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
1249; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1250; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1251; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
1252; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1253; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
1254; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
1255; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1256; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
1257; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1258; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
1259; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1260; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1261; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
1262; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1263; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
1264; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1
1265; AVX512BW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1266; AVX512BW-NEXT: kmovq %rax, %k1
1267; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
1268; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
1269; AVX512BW-NEXT: retq
1270;
Craig Topper6ffeeb72019-01-06 18:10:18 +00001271; AVX512VBMI2-LABEL: constant_funnnel_v64i8:
1272; AVX512VBMI2: # %bb.0:
1273; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1274; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1275; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1276; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1277; AVX512VBMI2-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
1278; AVX512VBMI2-NEXT: vpsllw $2, %zmm3, %zmm4
1279; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
1280; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1281; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1282; AVX512VBMI2-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
1283; AVX512VBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1284; AVX512VBMI2-NEXT: vpmovb2m %zmm2, %k1
1285; AVX512VBMI2-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
1286; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1287; AVX512VBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1288; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1289; AVX512VBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1290; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1291; AVX512VBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1292; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
1293; AVX512VBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1294; AVX512VBMI2-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
1295; AVX512VBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1
1296; AVX512VBMI2-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1297; AVX512VBMI2-NEXT: kmovq %rax, %k1
1298; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
1299; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1300; AVX512VBMI2-NEXT: retq
1301;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001302; AVX512VLBW-LABEL: constant_funnnel_v64i8:
1303; AVX512VLBW: # %bb.0:
1304; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1305; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1306; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
1307; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1308; AVX512VLBW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
1309; AVX512VLBW-NEXT: vpsllw $2, %zmm3, %zmm4
1310; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
1311; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1312; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1313; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
1314; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1315; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
1316; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
1317; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1318; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
1319; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1320; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
1321; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1322; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
1323; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
1324; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
1325; AVX512VLBW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
1326; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1
1327; AVX512VLBW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1328; AVX512VLBW-NEXT: kmovq %rax, %k1
1329; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
1330; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
1331; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +00001332;
1333; AVX512VLVBMI2-LABEL: constant_funnnel_v64i8:
1334; AVX512VLVBMI2: # %bb.0:
1335; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
1336; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1337; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm3
1338; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
1339; AVX512VLVBMI2-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
1340; AVX512VLVBMI2-NEXT: vpsllw $2, %zmm3, %zmm4
1341; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
1342; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1343; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1344; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
1345; AVX512VLVBMI2-NEXT: vpaddb %zmm2, %zmm2, %zmm2
1346; AVX512VLVBMI2-NEXT: vpmovb2m %zmm2, %k1
1347; AVX512VLVBMI2-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
1348; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
1349; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1350; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
1351; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm2, %zmm2
1352; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
1353; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1354; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
1355; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm1, %zmm1
1356; AVX512VLVBMI2-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
1357; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1
1358; AVX512VLVBMI2-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1359; AVX512VLVBMI2-NEXT: kmovq %rax, %k1
1360; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
1361; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0
1362; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001363 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1364 ret <64 x i8> %res
1365}
1366
1367;
1368; Uniform Constant Shifts
1369;
1370
1371define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +00001372; AVX512F-LABEL: splatconstant_funnnel_v8i64:
1373; AVX512F: # %bb.0:
1374; AVX512F-NEXT: vpsrlq $50, %zmm1, %zmm1
1375; AVX512F-NEXT: vpsllq $14, %zmm0, %zmm0
1376; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
1377; AVX512F-NEXT: retq
1378;
1379; AVX512VL-LABEL: splatconstant_funnnel_v8i64:
1380; AVX512VL: # %bb.0:
1381; AVX512VL-NEXT: vpsrlq $50, %zmm1, %zmm1
1382; AVX512VL-NEXT: vpsllq $14, %zmm0, %zmm0
1383; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
1384; AVX512VL-NEXT: retq
1385;
1386; AVX512BW-LABEL: splatconstant_funnnel_v8i64:
1387; AVX512BW: # %bb.0:
1388; AVX512BW-NEXT: vpsrlq $50, %zmm1, %zmm1
1389; AVX512BW-NEXT: vpsllq $14, %zmm0, %zmm0
1390; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1391; AVX512BW-NEXT: retq
1392;
1393; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i64:
1394; AVX512VBMI2: # %bb.0:
1395; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
1396; AVX512VBMI2-NEXT: retq
1397;
1398; AVX512VLBW-LABEL: splatconstant_funnnel_v8i64:
1399; AVX512VLBW: # %bb.0:
1400; AVX512VLBW-NEXT: vpsrlq $50, %zmm1, %zmm1
1401; AVX512VLBW-NEXT: vpsllq $14, %zmm0, %zmm0
1402; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1403; AVX512VLBW-NEXT: retq
1404;
1405; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i64:
1406; AVX512VLVBMI2: # %bb.0:
1407; AVX512VLVBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
1408; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001409 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
1410 ret <8 x i64> %res
1411}
1412
1413define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
Craig Topper6ffeeb72019-01-06 18:10:18 +00001414; AVX512F-LABEL: splatconstant_funnnel_v16i32:
1415; AVX512F: # %bb.0:
1416; AVX512F-NEXT: vpsrld $28, %zmm1, %zmm1
1417; AVX512F-NEXT: vpslld $4, %zmm0, %zmm0
1418; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
1419; AVX512F-NEXT: retq
1420;
1421; AVX512VL-LABEL: splatconstant_funnnel_v16i32:
1422; AVX512VL: # %bb.0:
1423; AVX512VL-NEXT: vpsrld $28, %zmm1, %zmm1
1424; AVX512VL-NEXT: vpslld $4, %zmm0, %zmm0
1425; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
1426; AVX512VL-NEXT: retq
1427;
1428; AVX512BW-LABEL: splatconstant_funnnel_v16i32:
1429; AVX512BW: # %bb.0:
1430; AVX512BW-NEXT: vpsrld $28, %zmm1, %zmm1
1431; AVX512BW-NEXT: vpslld $4, %zmm0, %zmm0
1432; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
1433; AVX512BW-NEXT: retq
1434;
1435; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i32:
1436; AVX512VBMI2: # %bb.0:
1437; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
1438; AVX512VBMI2-NEXT: retq
1439;
1440; AVX512VLBW-LABEL: splatconstant_funnnel_v16i32:
1441; AVX512VLBW: # %bb.0:
1442; AVX512VLBW-NEXT: vpsrld $28, %zmm1, %zmm1
1443; AVX512VLBW-NEXT: vpslld $4, %zmm0, %zmm0
1444; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0
1445; AVX512VLBW-NEXT: retq
1446;
1447; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i32:
1448; AVX512VLVBMI2: # %bb.0:
1449; AVX512VLVBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
1450; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001451 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
1452 ret <16 x i32> %res
1453}
1454
1455define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
1456; AVX512F-LABEL: splatconstant_funnnel_v32i16:
1457; AVX512F: # %bb.0:
1458; AVX512F-NEXT: vpsrlw $9, %ymm2, %ymm2
1459; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
1460; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1461; AVX512F-NEXT: vpsrlw $9, %ymm3, %ymm2
1462; AVX512F-NEXT: vpsllw $7, %ymm1, %ymm1
1463; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1464; AVX512F-NEXT: retq
1465;
1466; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
1467; AVX512VL: # %bb.0:
1468; AVX512VL-NEXT: vpsrlw $9, %ymm2, %ymm2
1469; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
1470; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1471; AVX512VL-NEXT: vpsrlw $9, %ymm3, %ymm2
1472; AVX512VL-NEXT: vpsllw $7, %ymm1, %ymm1
1473; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1474; AVX512VL-NEXT: retq
1475;
1476; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
1477; AVX512BW: # %bb.0:
1478; AVX512BW-NEXT: vpsrlw $9, %zmm1, %zmm1
1479; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
1480; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1481; AVX512BW-NEXT: retq
1482;
Craig Topper6ffeeb72019-01-06 18:10:18 +00001483; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16:
1484; AVX512VBMI2: # %bb.0:
1485; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
1486; AVX512VBMI2-NEXT: retq
1487;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001488; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
1489; AVX512VLBW: # %bb.0:
1490; AVX512VLBW-NEXT: vpsrlw $9, %zmm1, %zmm1
1491; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0
1492; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1493; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +00001494;
1495; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16:
1496; AVX512VLVBMI2: # %bb.0:
1497; AVX512VLVBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
1498; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001499 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1500 ret <32 x i16> %res
1501}
1502
1503define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
1504; AVX512F-LABEL: splatconstant_funnnel_v64i8:
1505; AVX512F: # %bb.0:
1506; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
Simon Pilgrim933673d2019-01-22 13:44:49 +00001507; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1508; AVX512F-NEXT: vpandn %ymm2, %ymm4, %ymm2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001509; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
Simon Pilgrim933673d2019-01-22 13:44:49 +00001510; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001511; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
1512; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm2
Simon Pilgrim933673d2019-01-22 13:44:49 +00001513; AVX512F-NEXT: vpandn %ymm2, %ymm4, %ymm2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001514; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
Simon Pilgrim933673d2019-01-22 13:44:49 +00001515; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001516; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
1517; AVX512F-NEXT: retq
1518;
1519; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
1520; AVX512VL: # %bb.0:
1521; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
Simon Pilgrim933673d2019-01-22 13:44:49 +00001522; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
1523; AVX512VL-NEXT: vpandn %ymm2, %ymm4, %ymm2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001524; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
Simon Pilgrim933673d2019-01-22 13:44:49 +00001525; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001526; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
1527; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm2
Simon Pilgrim933673d2019-01-22 13:44:49 +00001528; AVX512VL-NEXT: vpandn %ymm2, %ymm4, %ymm2
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001529; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
Simon Pilgrim933673d2019-01-22 13:44:49 +00001530; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001531; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
1532; AVX512VL-NEXT: retq
1533;
1534; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
1535; AVX512BW: # %bb.0:
1536; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
1537; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1538; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
1539; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1540; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
1541; AVX512BW-NEXT: retq
1542;
Craig Topper6ffeeb72019-01-06 18:10:18 +00001543; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
1544; AVX512VBMI2: # %bb.0:
1545; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1546; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1547; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1548; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1549; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1550; AVX512VBMI2-NEXT: retq
1551;
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001552; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
1553; AVX512VLBW: # %bb.0:
1554; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
1555; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1556; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
1557; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1558; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
1559; AVX512VLBW-NEXT: retq
Craig Topper6ffeeb72019-01-06 18:10:18 +00001560;
1561; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
1562; AVX512VLVBMI2: # %bb.0:
1563; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
1564; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
1565; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
1566; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
1567; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
1568; AVX512VLVBMI2-NEXT: retq
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001569 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
1570 ret <64 x i8> %res
1571}