blob: 447fb6307f21e009fe3561ad1078456761eadbee [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
4; CHECK: test_vshr_n_s8
5; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
7 ret <8 x i8> %vshr_n
8}
9
10define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
11; CHECK: test_vshr_n_s16
12; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
14 ret <4 x i16> %vshr_n
15}
16
17define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
18; CHECK: test_vshr_n_s32
19; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
21 ret <2 x i32> %vshr_n
22}
23
24define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
25; CHECK: test_vshrq_n_s8
26; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
28 ret <16 x i8> %vshr_n
29}
30
31define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
32; CHECK: test_vshrq_n_s16
33; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
35 ret <8 x i16> %vshr_n
36}
37
38define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
39; CHECK: test_vshrq_n_s32
40; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
42 ret <4 x i32> %vshr_n
43}
44
45define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
46; CHECK: test_vshrq_n_s64
47; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
49 ret <2 x i64> %vshr_n
50}
51
52define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
53; CHECK: test_vshr_n_u8
54; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
56 ret <8 x i8> %vshr_n
57}
58
59define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
60; CHECK: test_vshr_n_u16
61; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
63 ret <4 x i16> %vshr_n
64}
65
66define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
67; CHECK: test_vshr_n_u32
68; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
70 ret <2 x i32> %vshr_n
71}
72
73define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
74; CHECK: test_vshrq_n_u8
75; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
77 ret <16 x i8> %vshr_n
78}
79
80define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
81; CHECK: test_vshrq_n_u16
82; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
84 ret <8 x i16> %vshr_n
85}
86
87define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
88; CHECK: test_vshrq_n_u32
89; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
91 ret <4 x i32> %vshr_n
92}
93
94define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
95; CHECK: test_vshrq_n_u64
96; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
98 ret <2 x i64> %vshr_n
99}
100
101define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
102; CHECK: test_vsra_n_s8
103; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
105 %1 = add <8 x i8> %vsra_n, %a
106 ret <8 x i8> %1
107}
108
109define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
110; CHECK: test_vsra_n_s16
111; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
113 %1 = add <4 x i16> %vsra_n, %a
114 ret <4 x i16> %1
115}
116
117define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
118; CHECK: test_vsra_n_s32
119; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
121 %1 = add <2 x i32> %vsra_n, %a
122 ret <2 x i32> %1
123}
124
125define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
126; CHECK: test_vsraq_n_s8
127; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
129 %1 = add <16 x i8> %vsra_n, %a
130 ret <16 x i8> %1
131}
132
133define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
134; CHECK: test_vsraq_n_s16
135; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
137 %1 = add <8 x i16> %vsra_n, %a
138 ret <8 x i16> %1
139}
140
141define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
142; CHECK: test_vsraq_n_s32
143; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
145 %1 = add <4 x i32> %vsra_n, %a
146 ret <4 x i32> %1
147}
148
149define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
150; CHECK: test_vsraq_n_s64
151; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
153 %1 = add <2 x i64> %vsra_n, %a
154 ret <2 x i64> %1
155}
156
157define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
158; CHECK: test_vsra_n_u8
159; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
161 %1 = add <8 x i8> %vsra_n, %a
162 ret <8 x i8> %1
163}
164
165define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
166; CHECK: test_vsra_n_u16
167; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
169 %1 = add <4 x i16> %vsra_n, %a
170 ret <4 x i16> %1
171}
172
173define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
174; CHECK: test_vsra_n_u32
175; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
177 %1 = add <2 x i32> %vsra_n, %a
178 ret <2 x i32> %1
179}
180
181define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
182; CHECK: test_vsraq_n_u8
183; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
185 %1 = add <16 x i8> %vsra_n, %a
186 ret <16 x i8> %1
187}
188
189define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
190; CHECK: test_vsraq_n_u16
191; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
193 %1 = add <8 x i16> %vsra_n, %a
194 ret <8 x i16> %1
195}
196
197define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
198; CHECK: test_vsraq_n_u32
199; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
201 %1 = add <4 x i32> %vsra_n, %a
202 ret <4 x i32> %1
203}
204
205define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
206; CHECK: test_vsraq_n_u64
207; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
209 %1 = add <2 x i64> %vsra_n, %a
210 ret <2 x i64> %1
211}
212
213define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
214; CHECK: test_vshrn_n_s16
215; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
216 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
217 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
218 ret <8 x i8> %vshrn_n
219}
220
221define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
222; CHECK: test_vshrn_n_s32
223; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
224 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
225 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
226 ret <4 x i16> %vshrn_n
227}
228
229define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
230; CHECK: test_vshrn_n_s64
231; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
232 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
233 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
234 ret <2 x i32> %vshrn_n
235}
236
237define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
238; CHECK: test_vshrn_n_u16
239; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
240 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
241 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
242 ret <8 x i8> %vshrn_n
243}
244
245define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
246; CHECK: test_vshrn_n_u32
247; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
248 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
249 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
250 ret <4 x i16> %vshrn_n
251}
252
253define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
254; CHECK: test_vshrn_n_u64
255; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
256 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
257 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
258 ret <2 x i32> %vshrn_n
259}
260
261define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
262; CHECK: test_vshrn_high_n_s16
263; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
264 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
265 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
266 %2 = bitcast <8 x i8> %a to <1 x i64>
267 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
268 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
269 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
270 ret <16 x i8> %4
271}
272
273define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
274; CHECK: test_vshrn_high_n_s32
275; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
276 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
277 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
278 %2 = bitcast <4 x i16> %a to <1 x i64>
279 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
280 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
281 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
282 ret <8 x i16> %4
283}
284
285define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
286; CHECK: test_vshrn_high_n_s64
287; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
288 %1 = bitcast <2 x i32> %a to <1 x i64>
289 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
290 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
291 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
292 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
293 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
294 ret <4 x i32> %4
295}
296
297define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
298; CHECK: test_vshrn_high_n_u16
299; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
300 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
301 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
302 %2 = bitcast <8 x i8> %a to <1 x i64>
303 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
304 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
305 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
306 ret <16 x i8> %4
307}
308
309define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
310; CHECK: test_vshrn_high_n_u32
311; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
312 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
313 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
314 %2 = bitcast <4 x i16> %a to <1 x i64>
315 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
316 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
317 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
318 ret <8 x i16> %4
319}
320
321define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
322; CHECK: test_vshrn_high_n_u64
323; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
324 %1 = bitcast <2 x i32> %a to <1 x i64>
325 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
326 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
327 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
328 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
329 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
330 ret <4 x i32> %4
331}
332
333define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
334; CHECK: test_vqshrun_high_n_s16
335; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
336 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
337 %1 = bitcast <8 x i8> %a to <1 x i64>
338 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
339 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
340 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
341 ret <16 x i8> %3
342}
343
344define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
345; CHECK: test_vqshrun_high_n_s32
346; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
347 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
348 %1 = bitcast <4 x i16> %a to <1 x i64>
349 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
350 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
351 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
352 ret <8 x i16> %3
353}
354
355define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
356; CHECK: test_vqshrun_high_n_s64
357; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
358 %1 = bitcast <2 x i32> %a to <1 x i64>
359 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
360 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
361 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
362 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
363 ret <4 x i32> %3
364}
365
366define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
367; CHECK: test_vrshrn_high_n_s16
368; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
369 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
370 %1 = bitcast <8 x i8> %a to <1 x i64>
371 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
372 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
373 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
374 ret <16 x i8> %3
375}
376
377define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
378; CHECK: test_vrshrn_high_n_s32
379; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
380 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
381 %1 = bitcast <4 x i16> %a to <1 x i64>
382 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
383 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
384 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
385 ret <8 x i16> %3
386}
387
388define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
389; CHECK: test_vrshrn_high_n_s64
390; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
391 %1 = bitcast <2 x i32> %a to <1 x i64>
392 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
393 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
394 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
395 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
396 ret <4 x i32> %3
397}
398
399define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
400; CHECK: test_vqrshrun_high_n_s16
401; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
402 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
403 %1 = bitcast <8 x i8> %a to <1 x i64>
404 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
405 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
406 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
407 ret <16 x i8> %3
408}
409
410define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
411; CHECK: test_vqrshrun_high_n_s32
412; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
413 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
414 %1 = bitcast <4 x i16> %a to <1 x i64>
415 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
416 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
417 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
418 ret <8 x i16> %3
419}
420
421define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
422; CHECK: test_vqrshrun_high_n_s64
423; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
424 %1 = bitcast <2 x i32> %a to <1 x i64>
425 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
426 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
427 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
428 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
429 ret <4 x i32> %3
430}
431
432define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
433; CHECK: test_vqshrn_high_n_s16
434; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
435 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
436 %1 = bitcast <8 x i8> %a to <1 x i64>
437 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
438 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
439 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
440 ret <16 x i8> %3
441}
442
443define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
444; CHECK: test_vqshrn_high_n_s32
445; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
446 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
447 %1 = bitcast <4 x i16> %a to <1 x i64>
448 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
449 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
450 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
451 ret <8 x i16> %3
452}
453
454define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
455; CHECK: test_vqshrn_high_n_s64
456; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
457 %1 = bitcast <2 x i32> %a to <1 x i64>
458 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
459 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
460 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
461 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
462 ret <4 x i32> %3
463}
464
465define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
466; CHECK: test_vqshrn_high_n_u16
467; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
468 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
469 %1 = bitcast <8 x i8> %a to <1 x i64>
470 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
471 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
472 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
473 ret <16 x i8> %3
474}
475
476define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
477; CHECK: test_vqshrn_high_n_u32
478; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
479 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
480 %1 = bitcast <4 x i16> %a to <1 x i64>
481 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
482 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
483 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
484 ret <8 x i16> %3
485}
486
487define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
488; CHECK: test_vqshrn_high_n_u64
489; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
490 %1 = bitcast <2 x i32> %a to <1 x i64>
491 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
492 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
493 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
494 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
495 ret <4 x i32> %3
496}
497
498define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
499; CHECK: test_vqrshrn_high_n_s16
500; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
501 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
502 %1 = bitcast <8 x i8> %a to <1 x i64>
503 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
504 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
505 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
506 ret <16 x i8> %3
507}
508
509define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
510; CHECK: test_vqrshrn_high_n_s32
511; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
512 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
513 %1 = bitcast <4 x i16> %a to <1 x i64>
514 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
515 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
516 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
517 ret <8 x i16> %3
518}
519
520define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
521; CHECK: test_vqrshrn_high_n_s64
522; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
523 %1 = bitcast <2 x i32> %a to <1 x i64>
524 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
525 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
526 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
527 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
528 ret <4 x i32> %3
529}
530
531define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
532; CHECK: test_vqrshrn_high_n_u16
533; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
534 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
535 %1 = bitcast <8 x i8> %a to <1 x i64>
536 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
537 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
538 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
539 ret <16 x i8> %3
540}
541
542define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
543; CHECK: test_vqrshrn_high_n_u32
544; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
545 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
546 %1 = bitcast <4 x i16> %a to <1 x i64>
547 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
548 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
549 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
550 ret <8 x i16> %3
551}
552
553define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
554; CHECK: test_vqrshrn_high_n_u64
555; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
556 %1 = bitcast <2 x i32> %a to <1 x i64>
557 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
558 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
559 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
560 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
561 ret <4 x i32> %3
562}
563
564
565
566declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
567
568declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
569
570declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
571
572declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
573
574declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
575
576declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
577
578declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
579
580declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
581
582declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
583
584declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
585
586declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
587
588declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
589
590declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
591
592declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
593
594declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
595
596declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
597
598declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
599
600declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
601
602declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
603
604declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
605
606declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
607
608declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
609
610declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
611
612declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
613
614declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
615
616declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
617
618declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
619
620declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
621
622declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
623
624declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
625
626declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
627
628declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
629
630declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
631
632define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
633; CHECK-LABEL: test_vcvt_n_s64_f64
634; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
635 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
636 ret <1 x i64> %1
637}
638
639define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
640; CHECK-LABEL: test_vcvt_n_u64_f64
641; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
642 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
643 ret <1 x i64> %1
644}
645
646define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
647; CHECK-LABEL: test_vcvt_n_f64_s64
648; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
649 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
650 ret <1 x double> %1
651}
652
653define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
654; CHECK-LABEL: test_vcvt_n_f64_u64
655; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
656 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
657 ret <1 x double> %1
658}
659
660declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
661declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
662declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
663declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)