blob: 58b471c1f5b74ffe74cd5a0678c7b2f56a2aca04 [file] [log] [blame]
Simon Pilgrimba8e84b2018-12-18 10:32:54 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
6
7declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
8declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
9declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
10declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
11
12;
13; Variable Shifts
14;
15
16define <8 x i64> @var_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
17; AVX512-LABEL: var_funnnel_v8i64:
18; AVX512: # %bb.0:
19; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
20; AVX512-NEXT: vpandq %zmm3, %zmm2, %zmm4
21; AVX512-NEXT: vpsllvq %zmm4, %zmm0, %zmm5
22; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm6 = [64,64,64,64,64,64,64,64]
23; AVX512-NEXT: vpsubq %zmm4, %zmm6, %zmm4
24; AVX512-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
25; AVX512-NEXT: vporq %zmm1, %zmm5, %zmm1
26; AVX512-NEXT: vptestnmq %zmm3, %zmm2, %k1
27; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
28; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
29; AVX512-NEXT: retq
30 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt)
31 ret <8 x i64> %res
32}
33
34define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
35; AVX512-LABEL: var_funnnel_v16i32:
36; AVX512: # %bb.0:
37; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
38; AVX512-NEXT: vpandd %zmm3, %zmm2, %zmm4
39; AVX512-NEXT: vpsllvd %zmm4, %zmm0, %zmm5
40; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
41; AVX512-NEXT: vpsubd %zmm4, %zmm6, %zmm4
42; AVX512-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
43; AVX512-NEXT: vpord %zmm1, %zmm5, %zmm1
44; AVX512-NEXT: vptestnmd %zmm3, %zmm2, %k1
45; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
46; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
47; AVX512-NEXT: retq
48 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt)
49 ret <16 x i32> %res
50}
51
52define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
53; AVX512F-LABEL: var_funnnel_v32i16:
54; AVX512F: # %bb.0:
55; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
56; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
57; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
58; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
59; AVX512F-NEXT: vpsllvd %zmm7, %zmm8, %zmm7
60; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
61; AVX512F-NEXT: vpsubw %ymm4, %ymm8, %ymm9
62; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
63; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
64; AVX512F-NEXT: vpsrlvd %zmm9, %zmm2, %zmm2
65; AVX512F-NEXT: vpord %zmm2, %zmm7, %zmm2
66; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
67; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
68; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
69; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
70; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm2
71; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
72; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
73; AVX512F-NEXT: vpsllvd %zmm4, %zmm5, %zmm4
74; AVX512F-NEXT: vpsubw %ymm2, %ymm8, %ymm5
75; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
76; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
77; AVX512F-NEXT: vpsrlvd %zmm5, %zmm3, %zmm3
78; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3
79; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
80; AVX512F-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
81; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
82; AVX512F-NEXT: retq
83;
84; AVX512VL-LABEL: var_funnnel_v32i16:
85; AVX512VL: # %bb.0:
86; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
87; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
88; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm7 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
89; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm8 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
90; AVX512VL-NEXT: vpsllvd %zmm7, %zmm8, %zmm7
91; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
92; AVX512VL-NEXT: vpsubw %ymm4, %ymm8, %ymm9
93; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm9 = ymm9[0],zero,ymm9[1],zero,ymm9[2],zero,ymm9[3],zero,ymm9[4],zero,ymm9[5],zero,ymm9[6],zero,ymm9[7],zero,ymm9[8],zero,ymm9[9],zero,ymm9[10],zero,ymm9[11],zero,ymm9[12],zero,ymm9[13],zero,ymm9[14],zero,ymm9[15],zero
94; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
95; AVX512VL-NEXT: vpsrlvd %zmm9, %zmm2, %zmm2
96; AVX512VL-NEXT: vpord %zmm2, %zmm7, %zmm2
97; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2
98; AVX512VL-NEXT: vpxor %xmm7, %xmm7, %xmm7
99; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm4, %ymm4
100; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
101; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm2
102; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
103; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
104; AVX512VL-NEXT: vpsllvd %zmm4, %zmm5, %zmm4
105; AVX512VL-NEXT: vpsubw %ymm2, %ymm8, %ymm5
106; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
107; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
108; AVX512VL-NEXT: vpsrlvd %zmm5, %zmm3, %zmm3
109; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3
110; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3
111; AVX512VL-NEXT: vpcmpeqw %ymm7, %ymm2, %ymm2
112; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
113; AVX512VL-NEXT: retq
114;
115; AVX512BW-LABEL: var_funnnel_v32i16:
116; AVX512BW: # %bb.0:
117; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
118; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
119; AVX512BW-NEXT: vpsllvw %zmm4, %zmm0, %zmm5
120; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
121; AVX512BW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
122; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
123; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
124; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
125; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
126; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
127; AVX512BW-NEXT: retq
128;
129; AVX512VLBW-LABEL: var_funnnel_v32i16:
130; AVX512VLBW: # %bb.0:
131; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
132; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
133; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm0, %zmm5
134; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
135; AVX512VLBW-NEXT: vpsubw %zmm4, %zmm6, %zmm4
136; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
137; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
138; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
139; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
140; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
141; AVX512VLBW-NEXT: retq
142 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt)
143 ret <32 x i16> %res
144}
145
146define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
147; AVX512F-LABEL: var_funnnel_v64i8:
148; AVX512F: # %bb.0:
149; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm7
150; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
151; AVX512F-NEXT: vpand %ymm6, %ymm7, %ymm8
152; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
153; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm9
154; AVX512F-NEXT: vpsllw $5, %ymm9, %ymm10
155; AVX512F-NEXT: vpblendvb %ymm10, %ymm8, %ymm0, %ymm8
156; AVX512F-NEXT: vpsllw $2, %ymm8, %ymm11
157; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
158; AVX512F-NEXT: vpand %ymm4, %ymm11, %ymm11
159; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
160; AVX512F-NEXT: vpblendvb %ymm10, %ymm11, %ymm8, %ymm8
161; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm11
162; AVX512F-NEXT: vpaddb %ymm10, %ymm10, %ymm10
163; AVX512F-NEXT: vpblendvb %ymm10, %ymm11, %ymm8, %ymm10
164; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm11
165; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
166; AVX512F-NEXT: vpand %ymm8, %ymm11, %ymm11
167; AVX512F-NEXT: vmovdqa {{.*#+}} ymm12 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
168; AVX512F-NEXT: vpsubb %ymm9, %ymm12, %ymm13
169; AVX512F-NEXT: vpsllw $5, %ymm13, %ymm13
170; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
171; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm11
172; AVX512F-NEXT: vmovdqa {{.*#+}} ymm14 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
173; AVX512F-NEXT: vpand %ymm14, %ymm11, %ymm11
174; AVX512F-NEXT: vpaddb %ymm13, %ymm13, %ymm13
175; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
176; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm11
177; AVX512F-NEXT: vmovdqa {{.*#+}} ymm15 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
178; AVX512F-NEXT: vpand %ymm15, %ymm11, %ymm11
179; AVX512F-NEXT: vpaddb %ymm13, %ymm13, %ymm13
180; AVX512F-NEXT: vpblendvb %ymm13, %ymm11, %ymm2, %ymm2
181; AVX512F-NEXT: vpor %ymm2, %ymm10, %ymm2
182; AVX512F-NEXT: vpxor %xmm10, %xmm10, %xmm10
183; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm9, %ymm9
184; AVX512F-NEXT: vpblendvb %ymm9, %ymm0, %ymm2, %ymm0
185; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
186; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
187; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
188; AVX512F-NEXT: vpsllw $5, %ymm5, %ymm6
189; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
190; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm7
191; AVX512F-NEXT: vpand %ymm4, %ymm7, %ymm4
192; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
193; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
194; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm4
195; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
196; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm2, %ymm2
197; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm4
198; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
199; AVX512F-NEXT: vpsubb %ymm5, %ymm12, %ymm6
200; AVX512F-NEXT: vpsllw $5, %ymm6, %ymm6
201; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
202; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm4
203; AVX512F-NEXT: vpand %ymm14, %ymm4, %ymm4
204; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
205; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
206; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm4
207; AVX512F-NEXT: vpand %ymm15, %ymm4, %ymm4
208; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm6
209; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
210; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
211; AVX512F-NEXT: vpcmpeqb %ymm10, %ymm5, %ymm3
212; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
213; AVX512F-NEXT: retq
214;
215; AVX512VL-LABEL: var_funnnel_v64i8:
216; AVX512VL: # %bb.0:
217; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
218; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
219; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
220; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
221; AVX512VL-NEXT: vpand %ymm8, %ymm4, %ymm4
222; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm9
223; AVX512VL-NEXT: vpblendvb %ymm9, %ymm6, %ymm0, %ymm6
224; AVX512VL-NEXT: vpsllw $2, %ymm6, %ymm10
225; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm11 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
226; AVX512VL-NEXT: vpand %ymm11, %ymm10, %ymm10
227; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
228; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
229; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm10
230; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm9
231; AVX512VL-NEXT: vpblendvb %ymm9, %ymm10, %ymm6, %ymm6
232; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm9
233; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
234; AVX512VL-NEXT: vpand %ymm10, %ymm9, %ymm9
235; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm12 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
236; AVX512VL-NEXT: vpsubb %ymm4, %ymm12, %ymm13
237; AVX512VL-NEXT: vpsllw $5, %ymm13, %ymm13
238; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
239; AVX512VL-NEXT: vpsrlw $2, %ymm2, %ymm9
240; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm14 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
241; AVX512VL-NEXT: vpand %ymm14, %ymm9, %ymm9
242; AVX512VL-NEXT: vpaddb %ymm13, %ymm13, %ymm13
243; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
244; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm9
245; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm15 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
246; AVX512VL-NEXT: vpand %ymm15, %ymm9, %ymm9
247; AVX512VL-NEXT: vpaddb %ymm13, %ymm13, %ymm13
248; AVX512VL-NEXT: vpblendvb %ymm13, %ymm9, %ymm2, %ymm2
249; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
250; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
251; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
252; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
253; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
254; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2
255; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm4
256; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm5
257; AVX512VL-NEXT: vpblendvb %ymm5, %ymm2, %ymm1, %ymm2
258; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm7
259; AVX512VL-NEXT: vpand %ymm11, %ymm7, %ymm7
260; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
261; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
262; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm7
263; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
264; AVX512VL-NEXT: vpblendvb %ymm5, %ymm7, %ymm2, %ymm2
265; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm5
266; AVX512VL-NEXT: vpand %ymm10, %ymm5, %ymm5
267; AVX512VL-NEXT: vpsubb %ymm4, %ymm12, %ymm7
268; AVX512VL-NEXT: vpsllw $5, %ymm7, %ymm7
269; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
270; AVX512VL-NEXT: vpsrlw $2, %ymm3, %ymm5
271; AVX512VL-NEXT: vpand %ymm14, %ymm5, %ymm5
272; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
273; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
274; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm5
275; AVX512VL-NEXT: vpand %ymm15, %ymm5, %ymm5
276; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm7
277; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm3, %ymm3
278; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
279; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm3
280; AVX512VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
281; AVX512VL-NEXT: retq
282;
283; AVX512BW-LABEL: var_funnnel_v64i8:
284; AVX512BW: # %bb.0:
285; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
286; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
287; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
288; AVX512BW-NEXT: vpsubb %zmm4, %zmm5, %zmm5
289; AVX512BW-NEXT: vpsllw $5, %zmm5, %zmm5
290; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
291; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
292; AVX512BW-NEXT: vpmovb2m %zmm5, %k2
293; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm5
294; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
295; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
296; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm5
297; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
298; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
299; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm5
300; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
301; AVX512BW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
302; AVX512BW-NEXT: vpmovb2m %zmm6, %k1
303; AVX512BW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
304; AVX512BW-NEXT: vpsllw $5, %zmm4, %zmm4
305; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
306; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
307; AVX512BW-NEXT: vpmovb2m %zmm4, %k2
308; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm4
309; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
310; AVX512BW-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
311; AVX512BW-NEXT: vpsllw $2, %zmm4, %zmm6
312; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
313; AVX512BW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
314; AVX512BW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
315; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
316; AVX512BW-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
317; AVX512BW-NEXT: vporq %zmm1, %zmm4, %zmm1
318; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
319; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
320; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
321; AVX512BW-NEXT: retq
322;
323; AVX512VLBW-LABEL: var_funnnel_v64i8:
324; AVX512VLBW: # %bb.0:
325; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
326; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
327; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
328; AVX512VLBW-NEXT: vpsubb %zmm4, %zmm5, %zmm5
329; AVX512VLBW-NEXT: vpsllw $5, %zmm5, %zmm5
330; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm6
331; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
332; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k2
333; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm5
334; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
335; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k2}
336; AVX512VLBW-NEXT: vpsrlw $2, %zmm1, %zmm5
337; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
338; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
339; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm5
340; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm5, %zmm5
341; AVX512VLBW-NEXT: vpaddb %zmm6, %zmm6, %zmm6
342; AVX512VLBW-NEXT: vpmovb2m %zmm6, %k1
343; AVX512VLBW-NEXT: vmovdqu8 %zmm5, %zmm1 {%k1}
344; AVX512VLBW-NEXT: vpsllw $5, %zmm4, %zmm4
345; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm5
346; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
347; AVX512VLBW-NEXT: vpmovb2m %zmm4, %k2
348; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm4
349; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
350; AVX512VLBW-NEXT: vpblendmb %zmm4, %zmm0, %zmm4 {%k2}
351; AVX512VLBW-NEXT: vpsllw $2, %zmm4, %zmm6
352; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm6, %zmm6
353; AVX512VLBW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1}
354; AVX512VLBW-NEXT: vpaddb %zmm5, %zmm5, %zmm5
355; AVX512VLBW-NEXT: vpmovb2m %zmm5, %k1
356; AVX512VLBW-NEXT: vpaddb %zmm4, %zmm4, %zmm4 {%k1}
357; AVX512VLBW-NEXT: vporq %zmm1, %zmm4, %zmm1
358; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
359; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
360; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
361; AVX512VLBW-NEXT: retq
362 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
363 ret <64 x i8> %res
364}
365
366;
367; Uniform Variable Shifts
368;
369
370define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
371; AVX512-LABEL: splatvar_funnnel_v8i64:
372; AVX512: # %bb.0:
373; AVX512-NEXT: vpbroadcastq %xmm2, %zmm2
374; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
375; AVX512-NEXT: vpandq %zmm3, %zmm2, %zmm4
376; AVX512-NEXT: vpsllq %xmm4, %zmm0, %zmm5
377; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = [64,64]
378; AVX512-NEXT: vpsubq %xmm4, %xmm6, %xmm4
379; AVX512-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
380; AVX512-NEXT: vporq %zmm1, %zmm5, %zmm1
381; AVX512-NEXT: vptestnmq %zmm3, %zmm2, %k1
382; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
383; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
384; AVX512-NEXT: retq
385 %splat = shufflevector <8 x i64> %amt, <8 x i64> undef, <8 x i32> zeroinitializer
386 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %splat)
387 ret <8 x i64> %res
388}
389
390define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %amt) nounwind {
391; AVX512-LABEL: splatvar_funnnel_v16i32:
392; AVX512: # %bb.0:
393; AVX512-NEXT: vpbroadcastd %xmm2, %zmm2
394; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
395; AVX512-NEXT: vpandd %zmm3, %zmm2, %zmm4
396; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero
397; AVX512-NEXT: vpslld %xmm5, %zmm0, %zmm5
398; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm6 = [32,32,32,32]
399; AVX512-NEXT: vpsubd %xmm4, %xmm6, %xmm4
400; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
401; AVX512-NEXT: vpsrld %xmm4, %zmm1, %zmm1
402; AVX512-NEXT: vpord %zmm1, %zmm5, %zmm1
403; AVX512-NEXT: vptestnmd %zmm3, %zmm2, %k1
404; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
405; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
406; AVX512-NEXT: retq
407 %splat = shufflevector <16 x i32> %amt, <16 x i32> undef, <16 x i32> zeroinitializer
408 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> %splat)
409 ret <16 x i32> %res
410}
411
412define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
413; AVX512F-LABEL: splatvar_funnnel_v32i16:
414; AVX512F: # %bb.0:
415; AVX512F-NEXT: vpbroadcastw %xmm4, %ymm4
416; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
417; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
418; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm6
419; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
420; AVX512F-NEXT: vpsubw %xmm4, %xmm7, %xmm7
421; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
422; AVX512F-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
423; AVX512F-NEXT: vpor %ymm2, %ymm6, %ymm2
424; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
425; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
426; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
427; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm2
428; AVX512F-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
429; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
430; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
431; AVX512F-NEXT: retq
432;
433; AVX512VL-LABEL: splatvar_funnnel_v32i16:
434; AVX512VL: # %bb.0:
435; AVX512VL-NEXT: vpbroadcastw %xmm4, %ymm4
436; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
437; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
438; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm6
439; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [16,16,16,16,16,16,16,16]
440; AVX512VL-NEXT: vpsubw %xmm4, %xmm7, %xmm7
441; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero
442; AVX512VL-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
443; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
444; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
445; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm4, %ymm4
446; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
447; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm2
448; AVX512VL-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
449; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
450; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
451; AVX512VL-NEXT: retq
452;
453; AVX512BW-LABEL: splatvar_funnnel_v32i16:
454; AVX512BW: # %bb.0:
455; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
456; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
457; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
458; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
459; AVX512BW-NEXT: vpsllw %xmm5, %zmm0, %zmm5
460; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
461; AVX512BW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
462; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
463; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
464; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
465; AVX512BW-NEXT: vptestnmw %zmm3, %zmm2, %k1
466; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
467; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
468; AVX512BW-NEXT: retq
469;
470; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
471; AVX512VLBW: # %bb.0:
472; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
473; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
474; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
475; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
476; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm0, %zmm5
477; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [16,16,16,16,16,16,16,16]
478; AVX512VLBW-NEXT: vpsubw %xmm4, %xmm6, %xmm4
479; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
480; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
481; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
482; AVX512VLBW-NEXT: vptestnmw %zmm3, %zmm2, %k1
483; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
484; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
485; AVX512VLBW-NEXT: retq
486 %splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
487 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %splat)
488 ret <32 x i16> %res
489}
490
491define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
492; AVX512F-LABEL: splatvar_funnnel_v64i8:
493; AVX512F: # %bb.0:
494; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
495; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
496; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
497; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm6
498; AVX512F-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
499; AVX512F-NEXT: vpsllw %xmm5, %ymm9, %ymm8
500; AVX512F-NEXT: vpbroadcastb %xmm8, %ymm8
501; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6
502; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
503; AVX512F-NEXT: vpsubb %xmm4, %xmm7, %xmm7
504; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
505; AVX512F-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
506; AVX512F-NEXT: vpsrlw %xmm7, %ymm9, %ymm9
507; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9
508; AVX512F-NEXT: vpbroadcastb %xmm9, %ymm9
509; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
510; AVX512F-NEXT: vpor %ymm2, %ymm6, %ymm2
511; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
512; AVX512F-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
513; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
514; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm2
515; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2
516; AVX512F-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
517; AVX512F-NEXT: vpand %ymm9, %ymm3, %ymm3
518; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
519; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
520; AVX512F-NEXT: retq
521;
522; AVX512VL-LABEL: splatvar_funnnel_v64i8:
523; AVX512VL: # %bb.0:
524; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
525; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
526; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
527; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm6
528; AVX512VL-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9
529; AVX512VL-NEXT: vpsllw %xmm5, %ymm9, %ymm8
530; AVX512VL-NEXT: vpbroadcastb %xmm8, %ymm8
531; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6
532; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
533; AVX512VL-NEXT: vpsubb %xmm4, %xmm7, %xmm7
534; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero
535; AVX512VL-NEXT: vpsrlw %xmm7, %ymm2, %ymm2
536; AVX512VL-NEXT: vpsrlw %xmm7, %ymm9, %ymm9
537; AVX512VL-NEXT: vpsrlw $8, %ymm9, %ymm9
538; AVX512VL-NEXT: vpbroadcastb %xmm9, %ymm9
539; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2
540; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2
541; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
542; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4
543; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0
544; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm2
545; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2
546; AVX512VL-NEXT: vpsrlw %xmm7, %ymm3, %ymm3
547; AVX512VL-NEXT: vpand %ymm9, %ymm3, %ymm3
548; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
549; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
550; AVX512VL-NEXT: retq
551;
552; AVX512BW-LABEL: splatvar_funnnel_v64i8:
553; AVX512BW: # %bb.0:
554; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
555; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
556; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
557; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
558; AVX512BW-NEXT: vpsllw %xmm5, %zmm0, %zmm6
559; AVX512BW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
560; AVX512BW-NEXT: vpsllw %xmm5, %zmm7, %zmm5
561; AVX512BW-NEXT: vpbroadcastb %xmm5, %zmm5
562; AVX512BW-NEXT: vpandq %zmm5, %zmm6, %zmm5
563; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
564; AVX512BW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
565; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
566; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
567; AVX512BW-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
568; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4
569; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
570; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1
571; AVX512BW-NEXT: vporq %zmm1, %zmm5, %zmm1
572; AVX512BW-NEXT: vptestnmb %zmm3, %zmm2, %k1
573; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
574; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
575; AVX512BW-NEXT: retq
576;
577; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
578; AVX512VLBW: # %bb.0:
579; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
580; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
581; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
582; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
583; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm0, %zmm6
584; AVX512VLBW-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7
585; AVX512VLBW-NEXT: vpsllw %xmm5, %zmm7, %zmm5
586; AVX512VLBW-NEXT: vpbroadcastb %xmm5, %zmm5
587; AVX512VLBW-NEXT: vpandq %zmm5, %zmm6, %zmm5
588; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
589; AVX512VLBW-NEXT: vpsubb %xmm4, %xmm6, %xmm4
590; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
591; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
592; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm7, %zmm4
593; AVX512VLBW-NEXT: vpsrlw $8, %zmm4, %zmm4
594; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
595; AVX512VLBW-NEXT: vpandq %zmm4, %zmm1, %zmm1
596; AVX512VLBW-NEXT: vporq %zmm1, %zmm5, %zmm1
597; AVX512VLBW-NEXT: vptestnmb %zmm3, %zmm2, %k1
598; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
599; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
600; AVX512VLBW-NEXT: retq
601 %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
602 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
603 ret <64 x i8> %res
604}
605
606;
607; Constant Shifts
608;
609
610define <8 x i64> @constant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
611; AVX512-LABEL: constant_funnnel_v8i64:
612; AVX512: # %bb.0:
613; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
614; AVX512-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
615; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
616; AVX512-NEXT: retq
617 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>)
618 ret <8 x i64> %res
619}
620
621define <16 x i32> @constant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
622; AVX512-LABEL: constant_funnnel_v16i32:
623; AVX512: # %bb.0:
624; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
625; AVX512-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
626; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
627; AVX512-NEXT: retq
628 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>)
629 ret <16 x i32> %res
630}
631
632define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
633; AVX512F-LABEL: constant_funnnel_v32i16:
634; AVX512F: # %bb.0:
635; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
636; AVX512F-NEXT: vpmulhuw %ymm4, %ymm2, %ymm2
637; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm5
638; AVX512F-NEXT: vpor %ymm2, %ymm5, %ymm2
639; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
640; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
641; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
642; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm3
643; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2
644; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
645; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
646; AVX512F-NEXT: retq
647;
648; AVX512VL-LABEL: constant_funnnel_v32i16:
649; AVX512VL: # %bb.0:
650; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
651; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm2, %ymm2
652; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm5
653; AVX512VL-NEXT: vpor %ymm2, %ymm5, %ymm2
654; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
655; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
656; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm2
657; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm3
658; AVX512VL-NEXT: vpor %ymm2, %ymm3, %ymm2
659; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
660; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
661; AVX512VL-NEXT: retq
662;
663; AVX512BW-LABEL: constant_funnnel_v32i16:
664; AVX512BW: # %bb.0:
665; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
666; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm2
667; AVX512BW-NEXT: vporq %zmm1, %zmm2, %zmm1
668; AVX512BW-NEXT: movl $65537, %eax # imm = 0x10001
669; AVX512BW-NEXT: kmovd %eax, %k1
670; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
671; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
672; AVX512BW-NEXT: retq
673;
674; AVX512VLBW-LABEL: constant_funnnel_v32i16:
675; AVX512VLBW: # %bb.0:
676; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
677; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm2
678; AVX512VLBW-NEXT: vporq %zmm1, %zmm2, %zmm1
679; AVX512VLBW-NEXT: movl $65537, %eax # imm = 0x10001
680; AVX512VLBW-NEXT: kmovd %eax, %k1
681; AVX512VLBW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
682; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
683; AVX512VLBW-NEXT: retq
684 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
685 ret <32 x i16> %res
686}
687
688define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
689; AVX512F-LABEL: constant_funnnel_v64i8:
690; AVX512F: # %bb.0:
691; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
692; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
693; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
694; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
695; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
696; AVX512F-NEXT: vpsllw $2, %ymm4, %ymm7
697; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
698; AVX512F-NEXT: vpand %ymm8, %ymm7, %ymm7
699; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm9
700; AVX512F-NEXT: vpblendvb %ymm9, %ymm7, %ymm4, %ymm4
701; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
702; AVX512F-NEXT: vpaddb %ymm9, %ymm9, %ymm10
703; AVX512F-NEXT: vpblendvb %ymm10, %ymm7, %ymm4, %ymm4
704; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7
705; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm11 = ymm2[8],ymm7[8],ymm2[9],ymm7[9],ymm2[10],ymm7[10],ymm2[11],ymm7[11],ymm2[12],ymm7[12],ymm2[13],ymm7[13],ymm2[14],ymm7[14],ymm2[15],ymm7[15],ymm2[24],ymm7[24],ymm2[25],ymm7[25],ymm2[26],ymm7[26],ymm2[27],ymm7[27],ymm2[28],ymm7[28],ymm2[29],ymm7[29],ymm2[30],ymm7[30],ymm2[31],ymm7[31]
706; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
707; AVX512F-NEXT: # ymm12 = mem[0,1,0,1]
708; AVX512F-NEXT: vpmullw %ymm12, %ymm11, %ymm11
709; AVX512F-NEXT: vpsrlw $8, %ymm11, %ymm11
710; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm7[0],ymm2[1],ymm7[1],ymm2[2],ymm7[2],ymm2[3],ymm7[3],ymm2[4],ymm7[4],ymm2[5],ymm7[5],ymm2[6],ymm7[6],ymm2[7],ymm7[7],ymm2[16],ymm7[16],ymm2[17],ymm7[17],ymm2[18],ymm7[18],ymm2[19],ymm7[19],ymm2[20],ymm7[20],ymm2[21],ymm7[21],ymm2[22],ymm7[22],ymm2[23],ymm7[23]
711; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm13 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
712; AVX512F-NEXT: # ymm13 = mem[0,1,0,1]
713; AVX512F-NEXT: vpmullw %ymm13, %ymm2, %ymm2
714; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
715; AVX512F-NEXT: vpackuswb %ymm11, %ymm2, %ymm2
716; AVX512F-NEXT: vpor %ymm2, %ymm4, %ymm2
717; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
718; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
719; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
720; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
721; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
722; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
723; AVX512F-NEXT: vpand %ymm8, %ymm5, %ymm5
724; AVX512F-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
725; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
726; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
727; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm3[8],ymm7[8],ymm3[9],ymm7[9],ymm3[10],ymm7[10],ymm3[11],ymm7[11],ymm3[12],ymm7[12],ymm3[13],ymm7[13],ymm3[14],ymm7[14],ymm3[15],ymm7[15],ymm3[24],ymm7[24],ymm3[25],ymm7[25],ymm3[26],ymm7[26],ymm3[27],ymm7[27],ymm3[28],ymm7[28],ymm3[29],ymm7[29],ymm3[30],ymm7[30],ymm3[31],ymm7[31]
728; AVX512F-NEXT: vpmullw %ymm12, %ymm5, %ymm5
729; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
730; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm7[0],ymm3[1],ymm7[1],ymm3[2],ymm7[2],ymm3[3],ymm7[3],ymm3[4],ymm7[4],ymm3[5],ymm7[5],ymm3[6],ymm7[6],ymm3[7],ymm7[7],ymm3[16],ymm7[16],ymm3[17],ymm7[17],ymm3[18],ymm7[18],ymm3[19],ymm7[19],ymm3[20],ymm7[20],ymm3[21],ymm7[21],ymm3[22],ymm7[22],ymm3[23],ymm7[23]
731; AVX512F-NEXT: vpmullw %ymm13, %ymm3, %ymm3
732; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
733; AVX512F-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
734; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2
735; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
736; AVX512F-NEXT: retq
737;
738; AVX512VL-LABEL: constant_funnnel_v64i8:
739; AVX512VL: # %bb.0:
740; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
741; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
742; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
743; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
744; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm4
745; AVX512VL-NEXT: vpsllw $2, %ymm4, %ymm7
746; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
747; AVX512VL-NEXT: vpand %ymm8, %ymm7, %ymm7
748; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm9
749; AVX512VL-NEXT: vpblendvb %ymm9, %ymm7, %ymm4, %ymm4
750; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
751; AVX512VL-NEXT: vpaddb %ymm9, %ymm9, %ymm10
752; AVX512VL-NEXT: vpblendvb %ymm10, %ymm7, %ymm4, %ymm4
753; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm7 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
754; AVX512VL-NEXT: vpsrlw $8, %ymm7, %ymm7
755; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm11 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
756; AVX512VL-NEXT: # ymm11 = mem[0,1,0,1]
757; AVX512VL-NEXT: vpmullw %ymm11, %ymm7, %ymm7
758; AVX512VL-NEXT: vpsrlw $8, %ymm7, %ymm7
759; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
760; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
761; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm12 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
762; AVX512VL-NEXT: # ymm12 = mem[0,1,0,1]
763; AVX512VL-NEXT: vpmullw %ymm12, %ymm2, %ymm2
764; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
765; AVX512VL-NEXT: vpackuswb %ymm7, %ymm2, %ymm2
766; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2
767; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
768; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
769; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
770; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
771; AVX512VL-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm2
772; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
773; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5
774; AVX512VL-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
775; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
776; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
777; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
778; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
779; AVX512VL-NEXT: vpmullw %ymm11, %ymm5, %ymm5
780; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5
781; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
782; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
783; AVX512VL-NEXT: vpmullw %ymm12, %ymm3, %ymm3
784; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
785; AVX512VL-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
786; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
787; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
788; AVX512VL-NEXT: retq
789;
790; AVX512BW-LABEL: constant_funnnel_v64i8:
791; AVX512BW: # %bb.0:
792; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
793; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
794; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
795; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
796; AVX512BW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
797; AVX512BW-NEXT: vpsllw $2, %zmm3, %zmm4
798; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
799; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
800; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
801; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
802; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
803; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
804; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
805; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
806; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
807; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
808; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
809; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
810; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
811; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
812; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
813; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
814; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1
815; AVX512BW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
816; AVX512BW-NEXT: kmovq %rax, %k1
817; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
818; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
819; AVX512BW-NEXT: retq
820;
821; AVX512VLBW-LABEL: constant_funnnel_v64i8:
822; AVX512VLBW: # %bb.0:
823; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256,8192,24640,41088,57536,57344,41152,24704,8256]
824; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
825; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
826; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
827; AVX512VLBW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
828; AVX512VLBW-NEXT: vpsllw $2, %zmm3, %zmm4
829; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
830; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
831; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
832; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
833; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
834; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
835; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
836; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
837; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
838; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
839; AVX512VLBW-NEXT: vpsrlw $8, %zmm2, %zmm2
840; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
841; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
842; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
843; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1
844; AVX512VLBW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
845; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1
846; AVX512VLBW-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
847; AVX512VLBW-NEXT: kmovq %rax, %k1
848; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
849; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0
850; AVX512VLBW-NEXT: retq
851 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
852 ret <64 x i8> %res
853}
854
855;
856; Uniform Constant Shifts
857;
858
859define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
860; AVX512-LABEL: splatconstant_funnnel_v8i64:
861; AVX512: # %bb.0:
862; AVX512-NEXT: vpsrlq $50, %zmm1, %zmm1
863; AVX512-NEXT: vpsllq $14, %zmm0, %zmm0
864; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
865; AVX512-NEXT: retq
866 %res = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>)
867 ret <8 x i64> %res
868}
869
870define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
871; AVX512-LABEL: splatconstant_funnnel_v16i32:
872; AVX512: # %bb.0:
873; AVX512-NEXT: vpsrld $28, %zmm1, %zmm1
874; AVX512-NEXT: vpslld $4, %zmm0, %zmm0
875; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
876; AVX512-NEXT: retq
877 %res = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>)
878 ret <16 x i32> %res
879}
880
881define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
882; AVX512F-LABEL: splatconstant_funnnel_v32i16:
883; AVX512F: # %bb.0:
884; AVX512F-NEXT: vpsrlw $9, %ymm2, %ymm2
885; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
886; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
887; AVX512F-NEXT: vpsrlw $9, %ymm3, %ymm2
888; AVX512F-NEXT: vpsllw $7, %ymm1, %ymm1
889; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
890; AVX512F-NEXT: retq
891;
892; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
893; AVX512VL: # %bb.0:
894; AVX512VL-NEXT: vpsrlw $9, %ymm2, %ymm2
895; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
896; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
897; AVX512VL-NEXT: vpsrlw $9, %ymm3, %ymm2
898; AVX512VL-NEXT: vpsllw $7, %ymm1, %ymm1
899; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
900; AVX512VL-NEXT: retq
901;
902; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
903; AVX512BW: # %bb.0:
904; AVX512BW-NEXT: vpsrlw $9, %zmm1, %zmm1
905; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
906; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
907; AVX512BW-NEXT: retq
908;
909; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
910; AVX512VLBW: # %bb.0:
911; AVX512VLBW-NEXT: vpsrlw $9, %zmm1, %zmm1
912; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0
913; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
914; AVX512VLBW-NEXT: retq
915 %res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
916 ret <32 x i16> %res
917}
918
919define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
920; AVX512F-LABEL: splatconstant_funnnel_v64i8:
921; AVX512F: # %bb.0:
922; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
923; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
924; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
925; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
926; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
927; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
928; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
929; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm2
930; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
931; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
932; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
933; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1
934; AVX512F-NEXT: retq
935;
936; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
937; AVX512VL: # %bb.0:
938; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
939; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
940; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
941; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
942; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
943; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
944; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
945; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm2
946; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
947; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
948; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
949; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1
950; AVX512VL-NEXT: retq
951;
952; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
953; AVX512BW: # %bb.0:
954; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
955; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
956; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
957; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
958; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
959; AVX512BW-NEXT: retq
960;
961; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
962; AVX512VLBW: # %bb.0:
963; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
964; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
965; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
966; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
967; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
968; AVX512VLBW-NEXT: retq
969 %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
970 ret <64 x i8> %res
971}