blob: 7dff17b8c4104528233595945245377d684d7698 [file] [log] [blame]
Simon Pilgrim2899ec82017-07-16 19:26:49 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VLBW
6
7;
8; Variable Rotates
9;
10
11define <8 x i64> @var_rotate_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
12; AVX512-LABEL: var_rotate_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000013; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +000014; AVX512-NEXT: vprolvq %zmm1, %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +000015; AVX512-NEXT: retq
16 %b64 = sub <8 x i64> <i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64, i64 64>, %b
17 %shl = shl <8 x i64> %a, %b
18 %lshr = lshr <8 x i64> %a, %b64
19 %or = or <8 x i64> %shl, %lshr
20 ret <8 x i64> %or
21}
22
23define <16 x i32> @var_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
24; AVX512-LABEL: var_rotate_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000025; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +000026; AVX512-NEXT: vprolvd %zmm1, %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +000027; AVX512-NEXT: retq
28 %b32 = sub <16 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %b
29 %shl = shl <16 x i32> %a, %b
30 %lshr = lshr <16 x i32> %a, %b32
31 %or = or <16 x i32> %shl, %lshr
32 ret <16 x i32> %or
33}
34
35define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
36; AVX512F-LABEL: var_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000037; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +000038; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
39; AVX512F-NEXT: vpsubw %ymm2, %ymm4, %ymm5
40; AVX512F-NEXT: vpsubw %ymm3, %ymm4, %ymm4
41; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
42; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
43; AVX512F-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
44; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
45; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
46; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
47; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
48; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
49; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
50; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
51; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
52; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
53; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
54; AVX512F-NEXT: vpsrlvd %zmm3, %zmm0, %zmm0
55; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
56; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
57; AVX512F-NEXT: retq
58;
59; AVX512VL-LABEL: var_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000060; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +000061; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
62; AVX512VL-NEXT: vpsubw %ymm2, %ymm4, %ymm5
63; AVX512VL-NEXT: vpsubw %ymm3, %ymm4, %ymm4
64; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero
65; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
66; AVX512VL-NEXT: vpsllvd %zmm3, %zmm1, %zmm3
67; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3
68; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
69; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
70; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2
71; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2
72; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero
73; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
74; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
75; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
76; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero
77; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm0, %zmm0
78; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
79; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
80; AVX512VL-NEXT: retq
81;
82; AVX512BW-LABEL: var_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000083; AVX512BW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +000084; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
Simon Pilgrim2899ec82017-07-16 19:26:49 +000085; AVX512BW-NEXT: vpsubw %zmm1, %zmm2, %zmm2
86; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1
87; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
88; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
89; AVX512BW-NEXT: retq
90;
91; AVX512VLBW-LABEL: var_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; AVX512VLBW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +000093; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
Simon Pilgrim2899ec82017-07-16 19:26:49 +000094; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm2, %zmm2
95; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1
96; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
97; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
98; AVX512VLBW-NEXT: retq
99 %b16 = sub <32 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
100 %shl = shl <32 x i16> %a, %b
101 %lshr = lshr <32 x i16> %a, %b16
102 %or = or <32 x i16> %shl, %lshr
103 ret <32 x i16> %or
104}
105
106define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
107; AVX512F-LABEL: var_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000109; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
110; AVX512F-NEXT: vpsubb %ymm2, %ymm5, %ymm4
111; AVX512F-NEXT: vpsubb %ymm3, %ymm5, %ymm5
112; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm6
113; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
114; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
115; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
116; AVX512F-NEXT: vpblendvb %ymm3, %ymm6, %ymm1, %ymm6
117; AVX512F-NEXT: vpsllw $2, %ymm6, %ymm8
118; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
119; AVX512F-NEXT: vpand %ymm9, %ymm8, %ymm8
120; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
121; AVX512F-NEXT: vpblendvb %ymm3, %ymm8, %ymm6, %ymm6
122; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm8
123; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
124; AVX512F-NEXT: vpblendvb %ymm3, %ymm8, %ymm6, %ymm3
125; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm6
126; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
127; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
128; AVX512F-NEXT: vpblendvb %ymm2, %ymm6, %ymm0, %ymm6
129; AVX512F-NEXT: vpsllw $2, %ymm6, %ymm7
130; AVX512F-NEXT: vpand %ymm9, %ymm7, %ymm7
131; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
132; AVX512F-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6
133; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm7
134; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm2
135; AVX512F-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm2
136; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm6
137; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
138; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
139; AVX512F-NEXT: vpsllw $5, %ymm5, %ymm5
140; AVX512F-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
141; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm6
142; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
143; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6
144; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
145; AVX512F-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
146; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm6
147; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
148; AVX512F-NEXT: vpand %ymm9, %ymm6, %ymm6
149; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
150; AVX512F-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
151; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
152; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
153; AVX512F-NEXT: vpand %ymm7, %ymm3, %ymm3
154; AVX512F-NEXT: vpsllw $5, %ymm4, %ymm4
155; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
156; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm3
157; AVX512F-NEXT: vpand %ymm8, %ymm3, %ymm3
158; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm4
159; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
160; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm3
161; AVX512F-NEXT: vpand %ymm9, %ymm3, %ymm3
162; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm4
163; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
164; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
165; AVX512F-NEXT: retq
166;
167; AVX512VL-LABEL: var_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000168; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000169; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
170; AVX512VL-NEXT: vpsubb %ymm2, %ymm5, %ymm4
171; AVX512VL-NEXT: vpsubb %ymm3, %ymm5, %ymm5
172; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm6
173; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
174; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
175; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
176; AVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm1, %ymm6
177; AVX512VL-NEXT: vpsllw $2, %ymm6, %ymm8
178; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
179; AVX512VL-NEXT: vpand %ymm9, %ymm8, %ymm8
180; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
181; AVX512VL-NEXT: vpblendvb %ymm3, %ymm8, %ymm6, %ymm6
182; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm8
183; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
184; AVX512VL-NEXT: vpblendvb %ymm3, %ymm8, %ymm6, %ymm3
185; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm6
186; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
187; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
188; AVX512VL-NEXT: vpblendvb %ymm2, %ymm6, %ymm0, %ymm6
189; AVX512VL-NEXT: vpsllw $2, %ymm6, %ymm7
190; AVX512VL-NEXT: vpand %ymm9, %ymm7, %ymm7
191; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
192; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6
193; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm7
194; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
195; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm2
196; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm6
197; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
198; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
199; AVX512VL-NEXT: vpsllw $5, %ymm5, %ymm5
200; AVX512VL-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
201; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm6
202; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
203; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6
204; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
205; AVX512VL-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
206; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm6
207; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
208; AVX512VL-NEXT: vpand %ymm9, %ymm6, %ymm6
209; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
210; AVX512VL-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
211; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm5
212; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5
213; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm4
214; AVX512VL-NEXT: vpblendvb %ymm4, %ymm5, %ymm0, %ymm0
215; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm5
216; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5
217; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm4
218; AVX512VL-NEXT: vpblendvb %ymm4, %ymm5, %ymm0, %ymm0
219; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm5
220; AVX512VL-NEXT: vpand %ymm9, %ymm5, %ymm5
221; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm4
222; AVX512VL-NEXT: vpblendvb %ymm4, %ymm5, %ymm0, %ymm0
223; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
224; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
225; AVX512VL-NEXT: retq
226;
227; AVX512BW-LABEL: var_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000228; AVX512BW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +0000229; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000230; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm2
231; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm3
232; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
233; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
234; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
235; AVX512BW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
236; AVX512BW-NEXT: vpsllw $2, %zmm3, %zmm4
237; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
238; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
239; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
240; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
241; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
242; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
243; AVX512BW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
244; AVX512BW-NEXT: vpsllw $5, %zmm2, %zmm1
245; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
246; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
247; AVX512BW-NEXT: vpmovb2m %zmm1, %k2
248; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
249; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
250; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
251; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1
252; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
253; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
254; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm1
255; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
256; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
257; AVX512BW-NEXT: vpmovb2m %zmm2, %k1
258; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
259; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
260; AVX512BW-NEXT: retq
261;
262; AVX512VLBW-LABEL: var_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000263; AVX512VLBW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +0000264; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000265; AVX512VLBW-NEXT: vpsubb %zmm1, %zmm2, %zmm2
266; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm3
267; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
268; AVX512VLBW-NEXT: vpsllw $5, %zmm1, %zmm1
269; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
270; AVX512VLBW-NEXT: vpblendmb %zmm3, %zmm0, %zmm3 {%k1}
271; AVX512VLBW-NEXT: vpsllw $2, %zmm3, %zmm4
272; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm4, %zmm4
273; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
274; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
275; AVX512VLBW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1}
276; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
277; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
278; AVX512VLBW-NEXT: vpaddb %zmm3, %zmm3, %zmm3 {%k1}
279; AVX512VLBW-NEXT: vpsllw $5, %zmm2, %zmm1
280; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm2
281; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
282; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k2
283; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
284; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
285; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k2}
286; AVX512VLBW-NEXT: vpsrlw $2, %zmm0, %zmm1
287; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
288; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
289; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm1
290; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
291; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2
292; AVX512VLBW-NEXT: vpmovb2m %zmm2, %k1
293; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
294; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
295; AVX512VLBW-NEXT: retq
296 %b8 = sub <64 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
297 %shl = shl <64 x i8> %a, %b
298 %lshr = lshr <64 x i8> %a, %b8
299 %or = or <64 x i8> %shl, %lshr
300 ret <64 x i8> %or
301}
302
303;
304; Constant Rotates
305;
306
307define <8 x i64> @constant_rotate_v8i64(<8 x i64> %a) nounwind {
308; AVX512-LABEL: constant_rotate_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000309; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000310; AVX512-NEXT: vprolvq {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000311; AVX512-NEXT: retq
312 %shl = shl <8 x i64> %a, <i64 4, i64 14, i64 50, i64 60, i64 4, i64 14, i64 50, i64 60>
Simon Pilgrim11199b22017-07-17 10:35:51 +0000313 %lshr = lshr <8 x i64> %a, <i64 60, i64 50, i64 14, i64 4, i64 60, i64 50, i64 14, i64 4>
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000314 %or = or <8 x i64> %shl, %lshr
315 ret <8 x i64> %or
316}
317
318define <16 x i32> @constant_rotate_v16i32(<16 x i32> %a) nounwind {
319; AVX512-LABEL: constant_rotate_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000320; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000321; AVX512-NEXT: vprolvd {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000322; AVX512-NEXT: retq
323 %shl = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
324 %lshr = lshr <16 x i32> %a, <i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21>
325 %or = or <16 x i32> %shl, %lshr
326 ret <16 x i32> %or
327}
328
329define <32 x i16> @constant_rotate_v32i16(<32 x i16> %a) nounwind {
330; AVX512F-LABEL: constant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000331; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000332; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
333; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm3
334; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm2
335; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
Craig Topper21c8a8f2018-01-18 07:44:06 +0000336; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000337; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
338; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
339; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
340; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
341; AVX512F-NEXT: vpsrlvd %zmm4, %zmm0, %zmm0
342; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
343; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
344; AVX512F-NEXT: retq
345;
346; AVX512VL-LABEL: constant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000347; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000348; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
349; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm3
350; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm2
351; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
Craig Topper21c8a8f2018-01-18 07:44:06 +0000352; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000353; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1
354; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
355; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
356; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm0, %zmm0
357; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
358; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
359; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
360; AVX512VL-NEXT: retq
361;
362; AVX512BW-LABEL: constant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000363; AVX512BW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000364; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
365; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
366; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
367; AVX512BW-NEXT: retq
368;
369; AVX512VLBW-LABEL: constant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000370; AVX512VLBW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000371; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
372; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
373; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
374; AVX512VLBW-NEXT: retq
375 %shl = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
376 %lshr = lshr <32 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1>
377 %or = or <32 x i16> %shl, %lshr
378 ret <32 x i16> %or
379}
380
381define <64 x i8> @constant_rotate_v64i8(<64 x i8> %a) nounwind {
382; AVX512F-LABEL: constant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000383; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000384; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
385; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
386; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
387; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
388; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
389; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm5
390; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
391; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
392; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm7
393; AVX512F-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
394; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm5
395; AVX512F-NEXT: vpaddb %ymm7, %ymm7, %ymm8
396; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
397; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm5
398; AVX512F-NEXT: vpand %ymm3, %ymm5, %ymm3
399; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm3
400; AVX512F-NEXT: vpsllw $2, %ymm3, %ymm4
401; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
402; AVX512F-NEXT: vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
403; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm4
404; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
405; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm4
406; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
407; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
408; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
409; AVX512F-NEXT: vpblendvb %ymm6, %ymm4, %ymm1, %ymm1
410; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm4
411; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
412; AVX512F-NEXT: vpand %ymm7, %ymm4, %ymm4
413; AVX512F-NEXT: vpaddb %ymm6, %ymm6, %ymm8
414; AVX512F-NEXT: vpblendvb %ymm8, %ymm4, %ymm1, %ymm1
415; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm4
416; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
417; AVX512F-NEXT: vpand %ymm9, %ymm4, %ymm4
418; AVX512F-NEXT: vpaddb %ymm8, %ymm8, %ymm10
419; AVX512F-NEXT: vpblendvb %ymm10, %ymm4, %ymm1, %ymm1
420; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
421; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
422; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2
423; AVX512F-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
424; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm2
425; AVX512F-NEXT: vpand %ymm7, %ymm2, %ymm2
426; AVX512F-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
427; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm2
428; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2
429; AVX512F-NEXT: vpblendvb %ymm10, %ymm2, %ymm0, %ymm0
430; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
431; AVX512F-NEXT: retq
432;
433; AVX512VL-LABEL: constant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000434; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000435; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
436; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
437; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
438; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
439; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm2
440; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm5
441; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
442; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5
443; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm7
444; AVX512VL-NEXT: vpblendvb %ymm7, %ymm5, %ymm2, %ymm2
445; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm5
446; AVX512VL-NEXT: vpaddb %ymm7, %ymm7, %ymm8
447; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
448; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5
449; AVX512VL-NEXT: vpand %ymm3, %ymm5, %ymm3
450; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm3
451; AVX512VL-NEXT: vpsllw $2, %ymm3, %ymm4
452; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
453; AVX512VL-NEXT: vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
454; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm4
455; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm3, %ymm3
456; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm4
457; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
458; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
459; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
460; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm1, %ymm1
461; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm4
462; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
463; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
464; AVX512VL-NEXT: vpaddb %ymm6, %ymm6, %ymm8
465; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm1, %ymm1
466; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm4
467; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
468; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
469; AVX512VL-NEXT: vpaddb %ymm8, %ymm8, %ymm10
470; AVX512VL-NEXT: vpblendvb %ymm10, %ymm4, %ymm1, %ymm1
471; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
472; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
473; AVX512VL-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
474; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm4
475; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4
476; AVX512VL-NEXT: vpblendvb %ymm8, %ymm4, %ymm0, %ymm0
477; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
478; AVX512VL-NEXT: vpand %ymm9, %ymm4, %ymm4
479; AVX512VL-NEXT: vpblendvb %ymm10, %ymm4, %ymm0, %ymm0
480; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0
481; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
482; AVX512VL-NEXT: retq
483;
484; AVX512BW-LABEL: constant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000485; AVX512BW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +0000486; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000487; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
488; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
489; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
490; AVX512BW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
491; AVX512BW-NEXT: vpsllw $2, %zmm2, %zmm3
492; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
493; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
494; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
495; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
496; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
497; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
498; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
Craig Toppercb0e7492017-07-31 17:35:44 +0000499; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000500; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
501; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm3
502; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
503; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
504; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm3
505; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
506; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
507; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
508; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
509; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm3
510; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
511; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
512; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
513; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
514; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
515; AVX512BW-NEXT: retq
516;
517; AVX512VLBW-LABEL: constant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000518; AVX512VLBW: # %bb.0:
Craig Toppercb0e7492017-07-31 17:35:44 +0000519; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000520; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
521; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
522; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
523; AVX512VLBW-NEXT: vpblendmb %zmm2, %zmm0, %zmm2 {%k1}
524; AVX512VLBW-NEXT: vpsllw $2, %zmm2, %zmm3
525; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
526; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
527; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
528; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
529; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
530; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
531; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm2 {%k1}
Craig Toppercb0e7492017-07-31 17:35:44 +0000532; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536,57600,41152,24704,8256,8192,24640,41088,57536]
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000533; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
534; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm3
535; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
536; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
537; AVX512VLBW-NEXT: vpsrlw $2, %zmm0, %zmm3
538; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
539; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
540; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
541; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
542; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm3
543; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm3, %zmm3
544; AVX512VLBW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
545; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
546; AVX512VLBW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
547; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
548; AVX512VLBW-NEXT: retq
549 %shl = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
550 %lshr = lshr <64 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
551 %or = or <64 x i8> %shl, %lshr
552 ret <64 x i8> %or
553}
554
555;
556; Uniform Constant Rotates
557;
558
559define <8 x i64> @splatconstant_rotate_v8i64(<8 x i64> %a) nounwind {
560; AVX512-LABEL: splatconstant_rotate_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000561; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000562; AVX512-NEXT: vprolq $14, %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000563; AVX512-NEXT: retq
564 %shl = shl <8 x i64> %a, <i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14, i64 14>
565 %lshr = lshr <8 x i64> %a, <i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50, i64 50>
566 %or = or <8 x i64> %shl, %lshr
567 ret <8 x i64> %or
568}
569
570define <16 x i32> @splatconstant_rotate_v16i32(<16 x i32> %a) nounwind {
571; AVX512-LABEL: splatconstant_rotate_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000572; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000573; AVX512-NEXT: vprold $4, %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000574; AVX512-NEXT: retq
575 %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
576 %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
577 %or = or <16 x i32> %shl, %lshr
578 ret <16 x i32> %or
579}
580
581define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind {
582; AVX512F-LABEL: splatconstant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000583; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000584; AVX512F-NEXT: vpsllw $7, %ymm1, %ymm2
585; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm3
586; AVX512F-NEXT: vpsrlw $9, %ymm1, %ymm1
587; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
588; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm0
589; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
590; AVX512F-NEXT: retq
591;
592; AVX512VL-LABEL: splatconstant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000593; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000594; AVX512VL-NEXT: vpsllw $7, %ymm1, %ymm2
595; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm3
596; AVX512VL-NEXT: vpsrlw $9, %ymm1, %ymm1
597; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm0
598; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0
599; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
600; AVX512VL-NEXT: retq
601;
602; AVX512BW-LABEL: splatconstant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000603; AVX512BW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000604; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm1
605; AVX512BW-NEXT: vpsrlw $9, %zmm0, %zmm0
606; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
607; AVX512BW-NEXT: retq
608;
609; AVX512VLBW-LABEL: splatconstant_rotate_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000610; AVX512VLBW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000611; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm1
612; AVX512VLBW-NEXT: vpsrlw $9, %zmm0, %zmm0
613; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
614; AVX512VLBW-NEXT: retq
615 %shl = shl <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
616 %lshr = lshr <32 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
617 %or = or <32 x i16> %shl, %lshr
618 ret <32 x i16> %or
619}
620
621define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
622; AVX512F-LABEL: splatconstant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000623; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000624; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
625; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
626; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
627; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
628; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
629; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
630; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
631; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
632; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
633; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
634; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
635; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
636; AVX512F-NEXT: retq
637;
638; AVX512VL-LABEL: splatconstant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000639; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000640; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
641; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
642; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
643; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
644; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
645; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
646; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
647; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
648; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
649; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
650; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
651; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0
652; AVX512VL-NEXT: retq
653;
654; AVX512BW-LABEL: splatconstant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000655; AVX512BW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000656; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
657; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
658; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
659; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
660; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
661; AVX512BW-NEXT: retq
662;
663; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000664; AVX512VLBW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000665; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
666; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
667; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
668; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
669; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
670; AVX512VLBW-NEXT: retq
671 %shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
672 %lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
673 %or = or <64 x i8> %shl, %lshr
674 ret <64 x i8> %or
675}
676
677;
678; Masked Uniform Constant Rotates
679;
680
681define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind {
682; AVX512-LABEL: splatconstant_rotate_mask_v8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000683; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000684; AVX512-NEXT: vprolq $15, %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000685; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
686; AVX512-NEXT: retq
687 %shl = shl <8 x i64> %a, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
688 %lshr = lshr <8 x i64> %a, <i64 49, i64 49, i64 49, i64 49, i64 49, i64 49, i64 49, i64 49>
689 %rmask = and <8 x i64> %lshr, <i64 255, i64 127, i64 127, i64 255, i64 255, i64 127, i64 127, i64 255>
690 %lmask = and <8 x i64> %shl, <i64 33, i64 65, i64 129, i64 257, i64 33, i64 65, i64 129, i64 257>
691 %or = or <8 x i64> %lmask, %rmask
692 ret <8 x i64> %or
693}
694
695define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind {
696; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000697; AVX512: # %bb.0:
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +0000698; AVX512-NEXT: vprold $4, %zmm0, %zmm0
Craig Topperafce0ba2017-08-30 16:38:33 +0000699; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000700; AVX512-NEXT: retq
701 %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
702 %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
703 %rmask = and <16 x i32> %lshr, <i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511, i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511>
704 %lmask = and <16 x i32> %shl, <i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3, i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3>
705 %or = or <16 x i32> %lmask, %rmask
706 ret <16 x i32> %or
707}
708
709define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
710; AVX512F-LABEL: splatconstant_rotate_mask_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000711; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000712; AVX512F-NEXT: vpsllw $5, %ymm0, %ymm2
713; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm3
714; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0
715; AVX512F-NEXT: vpsrlw $11, %ymm1, %ymm1
716; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
717; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
718; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
719; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33]
720; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
721; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
722; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
723; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
724; AVX512F-NEXT: retq
725;
726; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000727; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000728; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm2
729; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm3
730; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0
731; AVX512VL-NEXT: vpsrlw $11, %ymm1, %ymm1
732; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
733; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
734; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
735; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33]
736; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
737; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
738; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
739; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
740; AVX512VL-NEXT: retq
741;
742; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000743; AVX512BW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000744; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1
745; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0
746; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
747; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
748; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
749; AVX512BW-NEXT: retq
750;
751; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000752; AVX512VLBW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000753; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1
754; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0
755; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
756; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
757; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
758; AVX512VLBW-NEXT: retq
759 %shl = shl <32 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
760 %lshr = lshr <32 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
761 %rmask = and <32 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
762 %lmask = and <32 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
763 %or = or <32 x i16> %lmask, %rmask
764 ret <32 x i16> %or
765}
766
767define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
768; AVX512F-LABEL: splatconstant_rotate_mask_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000769; AVX512F: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000770; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
771; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm3
772; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
773; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
774; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
775; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
776; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
777; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
778; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33]
779; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
780; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
781; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
782; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
783; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
784; AVX512F-NEXT: retq
785;
786; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000787; AVX512VL: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000788; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
789; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm3
790; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
791; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
792; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55]
793; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
794; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
795; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
796; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33,33]
797; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
798; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
799; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
800; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
801; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
802; AVX512VL-NEXT: retq
803;
804; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000805; AVX512BW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000806; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
807; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
808; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
809; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
810; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
811; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
812; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
813; AVX512BW-NEXT: retq
814;
815; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000816; AVX512VLBW: # %bb.0:
Simon Pilgrim2899ec82017-07-16 19:26:49 +0000817; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
818; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
819; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
820; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
821; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
822; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
823; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
824; AVX512VLBW-NEXT: retq
825 %shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
826 %lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
827 %rmask = and <64 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
828 %lmask = and <64 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
829 %or = or <64 x i8> %lmask, %rmask
830 ret <64 x i8> %or
831}