blob: 4de8dfd5d6089cc02ee3a48f9579281fad84b384 [file] [log] [blame]
Simon Pilgrim5aa70e72017-07-17 10:09:48 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4
5declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9
10; Tests showing failure to replace variable rotates with immediate splat versions.
11
12define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
13; KNL-LABEL: test_splat_rol_v16i32:
14; KNL: # BB#0:
15; KNL-NEXT: kmovw %edi, %k1
16; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
17; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm3
18; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
19; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
20; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
21; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
22; KNL-NEXT: retq
23;
24; SKX-LABEL: test_splat_rol_v16i32:
25; SKX: # BB#0:
26; SKX-NEXT: kmovd %edi, %k1
27; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
28; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm3
29; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
30; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
31; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
32; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
33; SKX-NEXT: retq
34 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
35 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
36 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
37 %res3 = add <16 x i32> %res, %res1
38 %res4 = add <16 x i32> %res3, %res2
39 ret <16 x i32> %res4
40}
41
42define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
43; KNL-LABEL: test_splat_rol_v8i64:
44; KNL: # BB#0:
45; KNL-NEXT: kmovw %edi, %k1
46; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
47; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm3
48; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
49; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
50; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
51; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
52; KNL-NEXT: retq
53;
54; SKX-LABEL: test_splat_rol_v8i64:
55; SKX: # BB#0:
56; SKX-NEXT: kmovd %edi, %k1
57; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
58; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm3
59; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
60; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
61; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
62; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
63; SKX-NEXT: retq
64 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
65 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
66 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
67 %res3 = add <8 x i64> %res, %res1
68 %res4 = add <8 x i64> %res3, %res2
69 ret <8 x i64> %res4
70}
71
72define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
73; KNL-LABEL: test_splat_ror_v16i32:
74; KNL: # BB#0:
75; KNL-NEXT: kmovw %edi, %k1
76; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
77; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm3
78; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
79; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
80; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
81; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
82; KNL-NEXT: retq
83;
84; SKX-LABEL: test_splat_ror_v16i32:
85; SKX: # BB#0:
86; SKX-NEXT: kmovd %edi, %k1
87; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
88; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm3
89; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
90; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
91; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
92; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
93; SKX-NEXT: retq
94 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
95 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
96 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
97 %res3 = add <16 x i32> %res, %res1
98 %res4 = add <16 x i32> %res3, %res2
99 ret <16 x i32> %res4
100}
101
102define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
103; KNL-LABEL: test_splat_ror_v8i64:
104; KNL: # BB#0:
105; KNL-NEXT: kmovw %edi, %k1
106; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
107; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm3
108; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
109; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
110; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
111; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
112; KNL-NEXT: retq
113;
114; SKX-LABEL: test_splat_ror_v8i64:
115; SKX: # BB#0:
116; SKX-NEXT: kmovd %edi, %k1
117; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
118; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm3
119; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
120; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
121; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
122; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
123; SKX-NEXT: retq
124 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
125 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
126 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
127 %res3 = add <8 x i64> %res, %res1
128 %res4 = add <8 x i64> %res3, %res2
129 ret <8 x i64> %res4
130}
131
132; Tests showing failure to replace out-of-bounds variable rotates with in-bounds immediate splat versions.
133
134define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
135; KNL-LABEL: test_splat_bounds_rol_v16i32:
136; KNL: # BB#0:
137; KNL-NEXT: kmovw %edi, %k1
138; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
139; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
140; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
141; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
142; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
143; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
144; KNL-NEXT: retq
145;
146; SKX-LABEL: test_splat_bounds_rol_v16i32:
147; SKX: # BB#0:
148; SKX-NEXT: kmovd %edi, %k1
149; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
150; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
151; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
152; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
153; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
154; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
155; SKX-NEXT: retq
156 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
157 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
158 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
159 %res3 = add <16 x i32> %res, %res1
160 %res4 = add <16 x i32> %res3, %res2
161 ret <16 x i32> %res4
162}
163
164define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
165; KNL-LABEL: test_splat_bounds_rol_v8i64:
166; KNL: # BB#0:
167; KNL-NEXT: kmovw %edi, %k1
168; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
169; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
170; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
171; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
172; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0
173; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
174; KNL-NEXT: retq
175;
176; SKX-LABEL: test_splat_bounds_rol_v8i64:
177; SKX: # BB#0:
178; SKX-NEXT: kmovd %edi, %k1
179; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
180; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
181; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
182; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
183; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0
184; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
185; SKX-NEXT: retq
186 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
187 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
188 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
189 %res3 = add <8 x i64> %res, %res1
190 %res4 = add <8 x i64> %res3, %res2
191 ret <8 x i64> %res4
192}
193
194define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
195; KNL-LABEL: test_splat_bounds_ror_v16i32:
196; KNL: # BB#0:
197; KNL-NEXT: kmovw %edi, %k1
198; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
199; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
200; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
201; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
202; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
203; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
204; KNL-NEXT: retq
205;
206; SKX-LABEL: test_splat_bounds_ror_v16i32:
207; SKX: # BB#0:
208; SKX-NEXT: kmovd %edi, %k1
209; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
210; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
211; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
212; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
213; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
214; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
215; SKX-NEXT: retq
216 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
217 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
218 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
219 %res3 = add <16 x i32> %res, %res1
220 %res4 = add <16 x i32> %res3, %res2
221 ret <16 x i32> %res4
222}
223
224define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
225; KNL-LABEL: test_splat_bounds_ror_v8i64:
226; KNL: # BB#0:
227; KNL-NEXT: kmovw %edi, %k1
228; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
229; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
230; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
231; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
232; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0
233; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
234; KNL-NEXT: retq
235;
236; SKX-LABEL: test_splat_bounds_ror_v8i64:
237; SKX: # BB#0:
238; SKX-NEXT: kmovd %edi, %k1
239; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
240; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
241; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
242; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
243; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0
244; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
245; SKX-NEXT: retq
246 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
247 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
248 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
249 %res3 = add <8 x i64> %res, %res1
250 %res4 = add <8 x i64> %res3, %res2
251 ret <8 x i64> %res4
252}