blob: 19d1b219646cd7e90c0c53317077163f0b2e07dc [file] [log] [blame]
Hao Liud4aede02013-09-04 09:28:24 +00001; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
4; CHECK: test_vshr_n_s8
5; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
7 ret <8 x i8> %vshr_n
8}
9
10define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
11; CHECK: test_vshr_n_s16
12; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
14 ret <4 x i16> %vshr_n
15}
16
17define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
18; CHECK: test_vshr_n_s32
19; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
21 ret <2 x i32> %vshr_n
22}
23
24define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
25; CHECK: test_vshrq_n_s8
26; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
28 ret <16 x i8> %vshr_n
29}
30
31define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
32; CHECK: test_vshrq_n_s16
33; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
35 ret <8 x i16> %vshr_n
36}
37
38define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
39; CHECK: test_vshrq_n_s32
40; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
42 ret <4 x i32> %vshr_n
43}
44
45define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
46; CHECK: test_vshrq_n_s64
47; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
49 ret <2 x i64> %vshr_n
50}
51
52define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
53; CHECK: test_vshr_n_u8
54; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
56 ret <8 x i8> %vshr_n
57}
58
59define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
60; CHECK: test_vshr_n_u16
61; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
63 ret <4 x i16> %vshr_n
64}
65
66define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
67; CHECK: test_vshr_n_u32
68; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
70 ret <2 x i32> %vshr_n
71}
72
73define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
74; CHECK: test_vshrq_n_u8
75; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
77 ret <16 x i8> %vshr_n
78}
79
80define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
81; CHECK: test_vshrq_n_u16
82; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
84 ret <8 x i16> %vshr_n
85}
86
87define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
88; CHECK: test_vshrq_n_u32
89; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
91 ret <4 x i32> %vshr_n
92}
93
94define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
95; CHECK: test_vshrq_n_u64
96; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
98 ret <2 x i64> %vshr_n
99}
100
101define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
102; CHECK: test_vsra_n_s8
103; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
105 %1 = add <8 x i8> %vsra_n, %a
106 ret <8 x i8> %1
107}
108
109define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
110; CHECK: test_vsra_n_s16
111; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
113 %1 = add <4 x i16> %vsra_n, %a
114 ret <4 x i16> %1
115}
116
117define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
118; CHECK: test_vsra_n_s32
119; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
121 %1 = add <2 x i32> %vsra_n, %a
122 ret <2 x i32> %1
123}
124
125define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
126; CHECK: test_vsraq_n_s8
127; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
129 %1 = add <16 x i8> %vsra_n, %a
130 ret <16 x i8> %1
131}
132
133define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
134; CHECK: test_vsraq_n_s16
135; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
137 %1 = add <8 x i16> %vsra_n, %a
138 ret <8 x i16> %1
139}
140
141define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
142; CHECK: test_vsraq_n_s32
143; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
145 %1 = add <4 x i32> %vsra_n, %a
146 ret <4 x i32> %1
147}
148
149define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
150; CHECK: test_vsraq_n_s64
151; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
153 %1 = add <2 x i64> %vsra_n, %a
154 ret <2 x i64> %1
155}
156
157define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
158; CHECK: test_vsra_n_u8
159; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
161 %1 = add <8 x i8> %vsra_n, %a
162 ret <8 x i8> %1
163}
164
165define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
166; CHECK: test_vsra_n_u16
167; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
169 %1 = add <4 x i16> %vsra_n, %a
170 ret <4 x i16> %1
171}
172
173define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
174; CHECK: test_vsra_n_u32
175; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
177 %1 = add <2 x i32> %vsra_n, %a
178 ret <2 x i32> %1
179}
180
181define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
182; CHECK: test_vsraq_n_u8
183; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
185 %1 = add <16 x i8> %vsra_n, %a
186 ret <16 x i8> %1
187}
188
189define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
190; CHECK: test_vsraq_n_u16
191; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
193 %1 = add <8 x i16> %vsra_n, %a
194 ret <8 x i16> %1
195}
196
197define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
198; CHECK: test_vsraq_n_u32
199; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
201 %1 = add <4 x i32> %vsra_n, %a
202 ret <4 x i32> %1
203}
204
205define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
206; CHECK: test_vsraq_n_u64
207; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
209 %1 = add <2 x i64> %vsra_n, %a
210 ret <2 x i64> %1
211}
212
213define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
214; CHECK: test_vrshr_n_s8
215; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
216 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
217 ret <8 x i8> %vrshr_n
218}
219
220
221define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
222; CHECK: test_vrshr_n_s16
223; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
224 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
225 ret <4 x i16> %vrshr_n
226}
227
228
229define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
230; CHECK: test_vrshr_n_s32
231; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
232 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
233 ret <2 x i32> %vrshr_n
234}
235
236
237define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
238; CHECK: test_vrshrq_n_s8
239; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
240 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
241 ret <16 x i8> %vrshr_n
242}
243
244
245define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
246; CHECK: test_vrshrq_n_s16
247; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
248 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
249 ret <8 x i16> %vrshr_n
250}
251
252
253define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
254; CHECK: test_vrshrq_n_s32
255; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
256 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
257 ret <4 x i32> %vrshr_n
258}
259
260
261define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
262; CHECK: test_vrshrq_n_s64
263; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
264 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
265 ret <2 x i64> %vrshr_n
266}
267
268
269define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
270; CHECK: test_vrshr_n_u8
271; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
272 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
273 ret <8 x i8> %vrshr_n
274}
275
276
277define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
278; CHECK: test_vrshr_n_u16
279; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
280 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
281 ret <4 x i16> %vrshr_n
282}
283
284
285define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
286; CHECK: test_vrshr_n_u32
287; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
288 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
289 ret <2 x i32> %vrshr_n
290}
291
292
293define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
294; CHECK: test_vrshrq_n_u8
295; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
296 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
297 ret <16 x i8> %vrshr_n
298}
299
300
301define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
302; CHECK: test_vrshrq_n_u16
303; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
304 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
305 ret <8 x i16> %vrshr_n
306}
307
308
309define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
310; CHECK: test_vrshrq_n_u32
311; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
312 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
313 ret <4 x i32> %vrshr_n
314}
315
316
317define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
318; CHECK: test_vrshrq_n_u64
319; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
320 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
321 ret <2 x i64> %vrshr_n
322}
323
324
325define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
326; CHECK: test_vrsra_n_s8
327; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
328 %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
329 %vrsra_n = add <8 x i8> %1, %a
330 ret <8 x i8> %vrsra_n
331}
332
333define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
334; CHECK: test_vrsra_n_s16
335; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
336 %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
337 %vrsra_n = add <4 x i16> %1, %a
338 ret <4 x i16> %vrsra_n
339}
340
341define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
342; CHECK: test_vrsra_n_s32
343; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
344 %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
345 %vrsra_n = add <2 x i32> %1, %a
346 ret <2 x i32> %vrsra_n
347}
348
349define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
350; CHECK: test_vrsraq_n_s8
351; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
352 %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
353 %vrsra_n = add <16 x i8> %1, %a
354 ret <16 x i8> %vrsra_n
355}
356
357define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
358; CHECK: test_vrsraq_n_s16
359; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
360 %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
361 %vrsra_n = add <8 x i16> %1, %a
362 ret <8 x i16> %vrsra_n
363}
364
365define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
366; CHECK: test_vrsraq_n_s32
367; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
368 %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
369 %vrsra_n = add <4 x i32> %1, %a
370 ret <4 x i32> %vrsra_n
371}
372
373define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
374; CHECK: test_vrsraq_n_s64
375; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
376 %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
377 %vrsra_n = add <2 x i64> %1, %a
378 ret <2 x i64> %vrsra_n
379}
380
381define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
382; CHECK: test_vrsra_n_u8
383; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
384 %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
385 %vrsra_n = add <8 x i8> %1, %a
386 ret <8 x i8> %vrsra_n
387}
388
389define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
390; CHECK: test_vrsra_n_u16
391; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
392 %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
393 %vrsra_n = add <4 x i16> %1, %a
394 ret <4 x i16> %vrsra_n
395}
396
397define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
398; CHECK: test_vrsra_n_u32
399; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
400 %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
401 %vrsra_n = add <2 x i32> %1, %a
402 ret <2 x i32> %vrsra_n
403}
404
405define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
406; CHECK: test_vrsraq_n_u8
407; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
408 %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
409 %vrsra_n = add <16 x i8> %1, %a
410 ret <16 x i8> %vrsra_n
411}
412
413define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
414; CHECK: test_vrsraq_n_u16
415; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
416 %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
417 %vrsra_n = add <8 x i16> %1, %a
418 ret <8 x i16> %vrsra_n
419}
420
421define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
422; CHECK: test_vrsraq_n_u32
423; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
424 %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
425 %vrsra_n = add <4 x i32> %1, %a
426 ret <4 x i32> %vrsra_n
427}
428
429define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
430; CHECK: test_vrsraq_n_u64
431; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
432 %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
433 %vrsra_n = add <2 x i64> %1, %a
434 ret <2 x i64> %vrsra_n
435}
436
437define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
438; CHECK: test_vsri_n_s8
439; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
440 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
441 ret <8 x i8> %vsri_n
442}
443
444
445define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
446; CHECK: test_vsri_n_s16
447; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
448 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
449 ret <4 x i16> %vsri
450}
451
452
453define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
454; CHECK: test_vsri_n_s32
455; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
456 %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
457 ret <2 x i32> %vsri
458}
459
460
461define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
462; CHECK: test_vsriq_n_s8
463; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
464 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
465 ret <16 x i8> %vsri_n
466}
467
468
469define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
470; CHECK: test_vsriq_n_s16
471; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
472 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
473 ret <8 x i16> %vsri
474}
475
476
477define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
478; CHECK: test_vsriq_n_s32
479; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
480 %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
481 ret <4 x i32> %vsri
482}
483
484
485define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
486; CHECK: test_vsriq_n_s64
487; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
488 %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
489 ret <2 x i64> %vsri
490}
491
492define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
493; CHECK: test_vsri_n_p8
494; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
495 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
496 ret <8 x i8> %vsri_n
497}
498
499define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
500; CHECK: test_vsri_n_p16
501; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
502 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
503 ret <4 x i16> %vsri
504}
505
506define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
507; CHECK: test_vsriq_n_p8
508; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
509 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
510 ret <16 x i8> %vsri_n
511}
512
513define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
514; CHECK: test_vsriq_n_p16
515; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
516 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
517 ret <8 x i16> %vsri
518}
519
520define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
521; CHECK: test_vsli_n_s8
522; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
523 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
524 ret <8 x i8> %vsli_n
525}
526
527define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
528; CHECK: test_vsli_n_s16
529; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
530 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
531 ret <4 x i16> %vsli
532}
533
534define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
535; CHECK: test_vsli_n_s32
536; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
537 %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
538 ret <2 x i32> %vsli
539}
540
541define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
542; CHECK: test_vsliq_n_s8
543; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
544 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
545 ret <16 x i8> %vsli_n
546}
547
548define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
549; CHECK: test_vsliq_n_s16
550; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
551 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
552 ret <8 x i16> %vsli
553}
554
555define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
556; CHECK: test_vsliq_n_s32
557; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
558 %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
559 ret <4 x i32> %vsli
560}
561
562define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
563; CHECK: test_vsliq_n_s64
564; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
565 %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
566 ret <2 x i64> %vsli
567}
568
569define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
570; CHECK: test_vsli_n_p8
571; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
572 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
573 ret <8 x i8> %vsli_n
574}
575
576define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
577; CHECK: test_vsli_n_p16
578; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
579 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
580 ret <4 x i16> %vsli
581}
582
583define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
584; CHECK: test_vsliq_n_p8
585; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
586 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
587 ret <16 x i8> %vsli_n
588}
589
590define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
591; CHECK: test_vsliq_n_p16
592; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
593 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
594 ret <8 x i16> %vsli
595}
596
597define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
598; CHECK: test_vqshl_n_s8
599; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
600 %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
601 ret <8 x i8> %vqshl
602}
603
604
605define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
606; CHECK: test_vqshl_n_s16
607; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
608 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
609 ret <4 x i16> %vqshl
610}
611
612
613define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
614; CHECK: test_vqshl_n_s32
615; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
616 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
617 ret <2 x i32> %vqshl
618}
619
620
621define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
622; CHECK: test_vqshlq_n_s8
623; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
624 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
625 ret <16 x i8> %vqshl_n
626}
627
628
629define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
630; CHECK: test_vqshlq_n_s16
631; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
632 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
633 ret <8 x i16> %vqshl
634}
635
636
637define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
638; CHECK: test_vqshlq_n_s32
639; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
640 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
641 ret <4 x i32> %vqshl
642}
643
644
645define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
646; CHECK: test_vqshlq_n_s64
647; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
648 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
649 ret <2 x i64> %vqshl
650}
651
652
653define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
654; CHECK: test_vqshl_n_u8
655; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
656 %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
657 ret <8 x i8> %vqshl_n
658}
659
660
661define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
662; CHECK: test_vqshl_n_u16
663; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
664 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
665 ret <4 x i16> %vqshl
666}
667
668
669define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
670; CHECK: test_vqshl_n_u32
671; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
672 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
673 ret <2 x i32> %vqshl
674}
675
676
677define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
678; CHECK: test_vqshlq_n_u8
679; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
680 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
681 ret <16 x i8> %vqshl_n
682}
683
684
685define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
686; CHECK: test_vqshlq_n_u16
687; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
688 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
689 ret <8 x i16> %vqshl
690}
691
692
693define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
694; CHECK: test_vqshlq_n_u32
695; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
696 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
697 ret <4 x i32> %vqshl
698}
699
700
701define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
702; CHECK: test_vqshlq_n_u64
703; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
704 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
705 ret <2 x i64> %vqshl
706}
707
708define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
709; CHECK: test_vqshlu_n_s8
710; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
711 %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
712 ret <8 x i8> %vqshlu
713}
714
715
716define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
717; CHECK: test_vqshlu_n_s16
718; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
719 %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
720 ret <4 x i16> %vqshlu
721}
722
723
724define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
725; CHECK: test_vqshlu_n_s32
726; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
727 %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
728 ret <2 x i32> %vqshlu
729}
730
731
732define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
733; CHECK: test_vqshluq_n_s8
734; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
735 %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
736 ret <16 x i8> %vqshlu
737}
738
739
740define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
741; CHECK: test_vqshluq_n_s16
742; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
743 %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
744 ret <8 x i16> %vqshlu
745}
746
747
748define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
749; CHECK: test_vqshluq_n_s32
750; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
751 %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
752 ret <4 x i32> %vqshlu
753}
754
755
756define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
757; CHECK: test_vqshluq_n_s64
758; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
759 %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
760 ret <2 x i64> %vqshlu
761}
762
763
764define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
765; CHECK: test_vshrn_n_s16
766; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
767 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
768 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
769 ret <8 x i8> %vshrn_n
770}
771
772define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
773; CHECK: test_vshrn_n_s32
774; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
775 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
776 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
777 ret <4 x i16> %vshrn_n
778}
779
780define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
781; CHECK: test_vshrn_n_s64
782; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
783 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
784 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
785 ret <2 x i32> %vshrn_n
786}
787
788define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
789; CHECK: test_vshrn_n_u16
790; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
791 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
792 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
793 ret <8 x i8> %vshrn_n
794}
795
796define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
797; CHECK: test_vshrn_n_u32
798; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
799 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
800 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
801 ret <4 x i16> %vshrn_n
802}
803
804define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
805; CHECK: test_vshrn_n_u64
806; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
807 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
808 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
809 ret <2 x i32> %vshrn_n
810}
811
812define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
813; CHECK: test_vshrn_high_n_s16
814; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
815 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
816 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
817 %2 = bitcast <8 x i8> %a to <1 x i64>
818 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
819 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
820 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
821 ret <16 x i8> %4
822}
823
824define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
825; CHECK: test_vshrn_high_n_s32
826; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
827 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
828 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
829 %2 = bitcast <4 x i16> %a to <1 x i64>
830 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
831 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
832 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
833 ret <8 x i16> %4
834}
835
836define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
837; CHECK: test_vshrn_high_n_s64
838; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
839 %1 = bitcast <2 x i32> %a to <1 x i64>
840 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
841 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
842 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
843 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
844 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
845 ret <4 x i32> %4
846}
847
848define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
849; CHECK: test_vshrn_high_n_u16
850; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
851 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
852 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
853 %2 = bitcast <8 x i8> %a to <1 x i64>
854 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
855 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
856 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
857 ret <16 x i8> %4
858}
859
860define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
861; CHECK: test_vshrn_high_n_u32
862; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
863 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
864 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
865 %2 = bitcast <4 x i16> %a to <1 x i64>
866 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
867 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
868 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
869 ret <8 x i16> %4
870}
871
872define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
873; CHECK: test_vshrn_high_n_u64
874; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
875 %1 = bitcast <2 x i32> %a to <1 x i64>
876 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
877 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
878 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
879 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
880 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
881 ret <4 x i32> %4
882}
883
884define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
885; CHECK: test_vqshrun_n_s16
886; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
887 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
888 ret <8 x i8> %vqshrun
889}
890
891
892define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
893; CHECK: test_vqshrun_n_s32
894; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
895 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
896 ret <4 x i16> %vqshrun
897}
898
899define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
900; CHECK: test_vqshrun_n_s64
901; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
902 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
903 ret <2 x i32> %vqshrun
904}
905
906define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
907; CHECK: test_vqshrun_high_n_s16
908; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
909 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
910 %1 = bitcast <8 x i8> %a to <1 x i64>
911 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
912 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
913 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
914 ret <16 x i8> %3
915}
916
917define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
918; CHECK: test_vqshrun_high_n_s32
919; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
920 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
921 %1 = bitcast <4 x i16> %a to <1 x i64>
922 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
923 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
924 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
925 ret <8 x i16> %3
926}
927
928define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
929; CHECK: test_vqshrun_high_n_s64
930; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
931 %1 = bitcast <2 x i32> %a to <1 x i64>
932 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
933 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
934 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
935 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
936 ret <4 x i32> %3
937}
938
939define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
940; CHECK: test_vrshrn_n_s16
941; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
942 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
943 ret <8 x i8> %vrshrn
944}
945
946
947define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
948; CHECK: test_vrshrn_n_s32
949; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
950 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
951 ret <4 x i16> %vrshrn
952}
953
954
955define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
956; CHECK: test_vrshrn_n_s64
957; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
958 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
959 ret <2 x i32> %vrshrn
960}
961
962define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
963; CHECK: test_vrshrn_high_n_s16
964; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
965 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
966 %1 = bitcast <8 x i8> %a to <1 x i64>
967 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
968 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
969 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
970 ret <16 x i8> %3
971}
972
973define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
974; CHECK: test_vrshrn_high_n_s32
975; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
976 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
977 %1 = bitcast <4 x i16> %a to <1 x i64>
978 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
979 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
980 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
981 ret <8 x i16> %3
982}
983
984define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
985; CHECK: test_vrshrn_high_n_s64
986; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
987 %1 = bitcast <2 x i32> %a to <1 x i64>
988 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
989 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
990 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
991 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
992 ret <4 x i32> %3
993}
994
995define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
996; CHECK: test_vqrshrun_n_s16
997; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
998 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
999 ret <8 x i8> %vqrshrun
1000}
1001
1002define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
1003; CHECK: test_vqrshrun_n_s32
1004; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1005 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
1006 ret <4 x i16> %vqrshrun
1007}
1008
1009define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
1010; CHECK: test_vqrshrun_n_s64
1011; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1012 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
1013 ret <2 x i32> %vqrshrun
1014}
1015
1016define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1017; CHECK: test_vqrshrun_high_n_s16
1018; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1019 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
1020 %1 = bitcast <8 x i8> %a to <1 x i64>
1021 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
1022 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1023 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1024 ret <16 x i8> %3
1025}
1026
1027define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1028; CHECK: test_vqrshrun_high_n_s32
1029; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1030 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
1031 %1 = bitcast <4 x i16> %a to <1 x i64>
1032 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
1033 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1034 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1035 ret <8 x i16> %3
1036}
1037
1038define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1039; CHECK: test_vqrshrun_high_n_s64
1040; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1041 %1 = bitcast <2 x i32> %a to <1 x i64>
1042 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
1043 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
1044 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1045 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1046 ret <4 x i32> %3
1047}
1048
1049define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
1050; CHECK: test_vqshrn_n_s16
1051; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1052 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
1053 ret <8 x i8> %vqshrn
1054}
1055
1056
1057define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
1058; CHECK: test_vqshrn_n_s32
1059; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1060 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
1061 ret <4 x i16> %vqshrn
1062}
1063
1064
1065define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
1066; CHECK: test_vqshrn_n_s64
1067; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1068 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
1069 ret <2 x i32> %vqshrn
1070}
1071
1072
1073define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
1074; CHECK: test_vqshrn_n_u16
1075; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1076 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
1077 ret <8 x i8> %vqshrn
1078}
1079
1080
1081define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
1082; CHECK: test_vqshrn_n_u32
1083; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1084 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
1085 ret <4 x i16> %vqshrn
1086}
1087
1088
1089define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
1090; CHECK: test_vqshrn_n_u64
1091; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1092 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
1093 ret <2 x i32> %vqshrn
1094}
1095
1096
1097define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1098; CHECK: test_vqshrn_high_n_s16
1099; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1100 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
1101 %1 = bitcast <8 x i8> %a to <1 x i64>
1102 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1103 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1104 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1105 ret <16 x i8> %3
1106}
1107
1108define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1109; CHECK: test_vqshrn_high_n_s32
1110; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1111 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
1112 %1 = bitcast <4 x i16> %a to <1 x i64>
1113 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1114 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1115 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1116 ret <8 x i16> %3
1117}
1118
1119define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1120; CHECK: test_vqshrn_high_n_s64
1121; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1122 %1 = bitcast <2 x i32> %a to <1 x i64>
1123 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
1124 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1125 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1126 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1127 ret <4 x i32> %3
1128}
1129
1130define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1131; CHECK: test_vqshrn_high_n_u16
1132; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1133 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
1134 %1 = bitcast <8 x i8> %a to <1 x i64>
1135 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1136 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1137 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1138 ret <16 x i8> %3
1139}
1140
1141define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1142; CHECK: test_vqshrn_high_n_u32
1143; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1144 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
1145 %1 = bitcast <4 x i16> %a to <1 x i64>
1146 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1147 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1148 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1149 ret <8 x i16> %3
1150}
1151
1152define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1153; CHECK: test_vqshrn_high_n_u64
1154; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1155 %1 = bitcast <2 x i32> %a to <1 x i64>
1156 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
1157 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1158 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1159 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1160 ret <4 x i32> %3
1161}
1162
1163define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
1164; CHECK: test_vqrshrn_n_s16
1165; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1166 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
1167 ret <8 x i8> %vqrshrn
1168}
1169
1170
1171define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
1172; CHECK: test_vqrshrn_n_s32
1173; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1174 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
1175 ret <4 x i16> %vqrshrn
1176}
1177
1178
1179define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
1180; CHECK: test_vqrshrn_n_s64
1181; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1182 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
1183 ret <2 x i32> %vqrshrn
1184}
1185
1186
1187define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
1188; CHECK: test_vqrshrn_n_u16
1189; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1190 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
1191 ret <8 x i8> %vqrshrn
1192}
1193
1194
1195define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
1196; CHECK: test_vqrshrn_n_u32
1197; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1198 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
1199 ret <4 x i16> %vqrshrn
1200}
1201
1202
1203define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
1204; CHECK: test_vqrshrn_n_u64
1205; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1206 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
1207 ret <2 x i32> %vqrshrn
1208}
1209
1210
1211define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1212; CHECK: test_vqrshrn_high_n_s16
1213; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1214 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
1215 %1 = bitcast <8 x i8> %a to <1 x i64>
1216 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1217 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1218 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1219 ret <16 x i8> %3
1220}
1221
1222define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1223; CHECK: test_vqrshrn_high_n_s32
1224; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1225 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
1226 %1 = bitcast <4 x i16> %a to <1 x i64>
1227 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1228 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1229 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1230 ret <8 x i16> %3
1231}
1232
1233define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1234; CHECK: test_vqrshrn_high_n_s64
1235; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1236 %1 = bitcast <2 x i32> %a to <1 x i64>
1237 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
1238 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1239 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1240 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1241 ret <4 x i32> %3
1242}
1243
1244define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1245; CHECK: test_vqrshrn_high_n_u16
1246; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1247 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
1248 %1 = bitcast <8 x i8> %a to <1 x i64>
1249 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1250 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1251 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1252 ret <16 x i8> %3
1253}
1254
1255define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1256; CHECK: test_vqrshrn_high_n_u32
1257; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1258 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
1259 %1 = bitcast <4 x i16> %a to <1 x i64>
1260 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1261 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1262 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1263 ret <8 x i16> %3
1264}
1265
1266define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1267; CHECK: test_vqrshrn_high_n_u64
1268; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1269 %1 = bitcast <2 x i32> %a to <1 x i64>
1270 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
1271 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1272 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1273 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1274 ret <4 x i32> %3
1275}
1276
1277define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
1278; CHECK: test_vcvt_n_f32_s32
1279; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1280 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1281 ret <2 x float> %vcvt
1282}
1283
1284define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
1285; CHECK: test_vcvtq_n_f32_s32
1286; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1287 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1288 ret <4 x float> %vcvt
1289}
1290
1291define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
1292; CHECK: test_vcvtq_n_f64_s64
1293; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1294 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1295 ret <2 x double> %vcvt
1296}
1297
1298define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
1299; CHECK: test_vcvt_n_f32_u32
1300; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1301 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1302 ret <2 x float> %vcvt
1303}
1304
1305define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
1306; CHECK: test_vcvtq_n_f32_u32
1307; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1308 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1309 ret <4 x float> %vcvt
1310}
1311
1312define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
1313; CHECK: test_vcvtq_n_f64_u64
1314; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1315 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1316 ret <2 x double> %vcvt
1317}
1318
1319define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
1320; CHECK: test_vcvt_n_s32_f32
1321; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1322 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
1323 ret <2 x i32> %vcvt
1324}
1325
1326define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
1327; CHECK: test_vcvtq_n_s32_f32
1328; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1329 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
1330 ret <4 x i32> %vcvt
1331}
1332
1333define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
1334; CHECK: test_vcvtq_n_s64_f64
1335; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1336 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
1337 ret <2 x i64> %vcvt
1338}
1339
1340define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
1341; CHECK: test_vcvt_n_u32_f32
1342; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1343 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
1344 ret <2 x i32> %vcvt
1345}
1346
1347define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
1348; CHECK: test_vcvt_n_u32_f32
1349; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1350 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
1351 ret <4 x i32> %vcvt
1352}
1353
1354define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
1355; CHECK: test_vcvtq_n_u64_f64
1356; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1357 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
1358 ret <2 x i64> %vcvt
1359}
1360
1361declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
1362
1363declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
1364
1365declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
1366
1367declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
1368
1369declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
1370
1371declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
1372
1373declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
1374
1375declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
1376
1377declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
1378
1379declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
1380
1381declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
1382
1383declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
1384
1385declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
1386
1387declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
1388
1389declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
1390
1391declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
1392
1393declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
1394
1395declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
1396
1397declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
1398
1399declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
1400
1401declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
1402
1403declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
1404
1405declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
1406
1407declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
1408
1409declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
1410
1411declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
1412
1413declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
1414
1415declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
1416
1417declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
1418
1419declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
1420
1421declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
1422
1423declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
1424
1425declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
1426
1427declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
1428
1429declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
1430
1431declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
1432
1433declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
1434
1435declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
1436
1437declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
1438
1439declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
1440
1441declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
1442
1443declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
1444
1445declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
1446
1447declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
1448
1449declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
1450
1451declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
1452
1453declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
1454
1455declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
1456
1457declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
1458
1459declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
1460
1461declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
1462
1463declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
1464
1465declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
1466
1467declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
1468
1469declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
1470
1471declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
1472
1473declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
1474
1475declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
1476
1477declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
1478
1479declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
1480
1481declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
1482
1483declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
1484
1485declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
1486
1487declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
1488
1489declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
1490
1491declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
1492
1493declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
1494
1495declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
1496
1497declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
1498
1499declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
1500
1501declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
1502
1503declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
1504
1505declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
1506
1507declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
1508
1509declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
1510
1511declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
1512
1513declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
1514
1515declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
1516
1517declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
1518
1519declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
1520
1521declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
1522
1523declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
1524