blob: 9ee4063658b2de3ed71e4ec5da6a354877e7761c [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
2
3@ptr = global i8* null
4
5define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
6; CHECK-LABEL: test_v8i8_pre_load:
7; CHECK: ldr d0, [x0, #40]!
8 %newaddr = getelementptr <8 x i8>* %addr, i32 5
9 %val = load <8 x i8>* %newaddr, align 8
10 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
11 ret <8 x i8> %val
12}
13
14define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
15; CHECK-LABEL: test_v8i8_post_load:
16; CHECK: ldr d0, [x0], #40
17 %newaddr = getelementptr <8 x i8>* %addr, i32 5
18 %val = load <8 x i8>* %addr, align 8
19 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
20 ret <8 x i8> %val
21}
22
23define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
24; CHECK-LABEL: test_v8i8_pre_store:
25; CHECK: str d0, [x0, #40]!
26 %newaddr = getelementptr <8 x i8>* %addr, i32 5
27 store <8 x i8> %in, <8 x i8>* %newaddr, align 8
28 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
29 ret void
30}
31
32define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
33; CHECK-LABEL: test_v8i8_post_store:
34; CHECK: str d0, [x0], #40
35 %newaddr = getelementptr <8 x i8>* %addr, i32 5
36 store <8 x i8> %in, <8 x i8>* %addr, align 8
37 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
38 ret void
39}
40
41define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
42; CHECK-LABEL: test_v4i16_pre_load:
43; CHECK: ldr d0, [x0, #40]!
44 %newaddr = getelementptr <4 x i16>* %addr, i32 5
45 %val = load <4 x i16>* %newaddr, align 8
46 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
47 ret <4 x i16> %val
48}
49
50define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
51; CHECK-LABEL: test_v4i16_post_load:
52; CHECK: ldr d0, [x0], #40
53 %newaddr = getelementptr <4 x i16>* %addr, i32 5
54 %val = load <4 x i16>* %addr, align 8
55 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
56 ret <4 x i16> %val
57}
58
59define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
60; CHECK-LABEL: test_v4i16_pre_store:
61; CHECK: str d0, [x0, #40]!
62 %newaddr = getelementptr <4 x i16>* %addr, i32 5
63 store <4 x i16> %in, <4 x i16>* %newaddr, align 8
64 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
65 ret void
66}
67
68define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
69; CHECK-LABEL: test_v4i16_post_store:
70; CHECK: str d0, [x0], #40
71 %newaddr = getelementptr <4 x i16>* %addr, i32 5
72 store <4 x i16> %in, <4 x i16>* %addr, align 8
73 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
74 ret void
75}
76
77define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
78; CHECK-LABEL: test_v2i32_pre_load:
79; CHECK: ldr d0, [x0, #40]!
80 %newaddr = getelementptr <2 x i32>* %addr, i32 5
81 %val = load <2 x i32>* %newaddr, align 8
82 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
83 ret <2 x i32> %val
84}
85
86define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
87; CHECK-LABEL: test_v2i32_post_load:
88; CHECK: ldr d0, [x0], #40
89 %newaddr = getelementptr <2 x i32>* %addr, i32 5
90 %val = load <2 x i32>* %addr, align 8
91 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
92 ret <2 x i32> %val
93}
94
95define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
96; CHECK-LABEL: test_v2i32_pre_store:
97; CHECK: str d0, [x0, #40]!
98 %newaddr = getelementptr <2 x i32>* %addr, i32 5
99 store <2 x i32> %in, <2 x i32>* %newaddr, align 8
100 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
101 ret void
102}
103
104define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
105; CHECK-LABEL: test_v2i32_post_store:
106; CHECK: str d0, [x0], #40
107 %newaddr = getelementptr <2 x i32>* %addr, i32 5
108 store <2 x i32> %in, <2 x i32>* %addr, align 8
109 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
110 ret void
111}
112
113define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
114; CHECK-LABEL: test_v2f32_pre_load:
115; CHECK: ldr d0, [x0, #40]!
116 %newaddr = getelementptr <2 x float>* %addr, i32 5
117 %val = load <2 x float>* %newaddr, align 8
118 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
119 ret <2 x float> %val
120}
121
122define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
123; CHECK-LABEL: test_v2f32_post_load:
124; CHECK: ldr d0, [x0], #40
125 %newaddr = getelementptr <2 x float>* %addr, i32 5
126 %val = load <2 x float>* %addr, align 8
127 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
128 ret <2 x float> %val
129}
130
131define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
132; CHECK-LABEL: test_v2f32_pre_store:
133; CHECK: str d0, [x0, #40]!
134 %newaddr = getelementptr <2 x float>* %addr, i32 5
135 store <2 x float> %in, <2 x float>* %newaddr, align 8
136 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
137 ret void
138}
139
140define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
141; CHECK-LABEL: test_v2f32_post_store:
142; CHECK: str d0, [x0], #40
143 %newaddr = getelementptr <2 x float>* %addr, i32 5
144 store <2 x float> %in, <2 x float>* %addr, align 8
145 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
146 ret void
147}
148
149define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
150; CHECK-LABEL: test_v1i64_pre_load:
151; CHECK: ldr d0, [x0, #40]!
152 %newaddr = getelementptr <1 x i64>* %addr, i32 5
153 %val = load <1 x i64>* %newaddr, align 8
154 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
155 ret <1 x i64> %val
156}
157
158define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
159; CHECK-LABEL: test_v1i64_post_load:
160; CHECK: ldr d0, [x0], #40
161 %newaddr = getelementptr <1 x i64>* %addr, i32 5
162 %val = load <1 x i64>* %addr, align 8
163 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
164 ret <1 x i64> %val
165}
166
167define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
168; CHECK-LABEL: test_v1i64_pre_store:
169; CHECK: str d0, [x0, #40]!
170 %newaddr = getelementptr <1 x i64>* %addr, i32 5
171 store <1 x i64> %in, <1 x i64>* %newaddr, align 8
172 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
173 ret void
174}
175
176define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
177; CHECK-LABEL: test_v1i64_post_store:
178; CHECK: str d0, [x0], #40
179 %newaddr = getelementptr <1 x i64>* %addr, i32 5
180 store <1 x i64> %in, <1 x i64>* %addr, align 8
181 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
182 ret void
183}
184
185define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
186; CHECK-LABEL: test_v16i8_pre_load:
187; CHECK: ldr q0, [x0, #80]!
188 %newaddr = getelementptr <16 x i8>* %addr, i32 5
189 %val = load <16 x i8>* %newaddr, align 8
190 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
191 ret <16 x i8> %val
192}
193
194define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
195; CHECK-LABEL: test_v16i8_post_load:
196; CHECK: ldr q0, [x0], #80
197 %newaddr = getelementptr <16 x i8>* %addr, i32 5
198 %val = load <16 x i8>* %addr, align 8
199 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
200 ret <16 x i8> %val
201}
202
203define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
204; CHECK-LABEL: test_v16i8_pre_store:
205; CHECK: str q0, [x0, #80]!
206 %newaddr = getelementptr <16 x i8>* %addr, i32 5
207 store <16 x i8> %in, <16 x i8>* %newaddr, align 8
208 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
209 ret void
210}
211
212define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
213; CHECK-LABEL: test_v16i8_post_store:
214; CHECK: str q0, [x0], #80
215 %newaddr = getelementptr <16 x i8>* %addr, i32 5
216 store <16 x i8> %in, <16 x i8>* %addr, align 8
217 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
218 ret void
219}
220
221define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
222; CHECK-LABEL: test_v8i16_pre_load:
223; CHECK: ldr q0, [x0, #80]!
224 %newaddr = getelementptr <8 x i16>* %addr, i32 5
225 %val = load <8 x i16>* %newaddr, align 8
226 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
227 ret <8 x i16> %val
228}
229
230define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
231; CHECK-LABEL: test_v8i16_post_load:
232; CHECK: ldr q0, [x0], #80
233 %newaddr = getelementptr <8 x i16>* %addr, i32 5
234 %val = load <8 x i16>* %addr, align 8
235 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
236 ret <8 x i16> %val
237}
238
239define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
240; CHECK-LABEL: test_v8i16_pre_store:
241; CHECK: str q0, [x0, #80]!
242 %newaddr = getelementptr <8 x i16>* %addr, i32 5
243 store <8 x i16> %in, <8 x i16>* %newaddr, align 8
244 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
245 ret void
246}
247
248define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
249; CHECK-LABEL: test_v8i16_post_store:
250; CHECK: str q0, [x0], #80
251 %newaddr = getelementptr <8 x i16>* %addr, i32 5
252 store <8 x i16> %in, <8 x i16>* %addr, align 8
253 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
254 ret void
255}
256
257define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
258; CHECK-LABEL: test_v4i32_pre_load:
259; CHECK: ldr q0, [x0, #80]!
260 %newaddr = getelementptr <4 x i32>* %addr, i32 5
261 %val = load <4 x i32>* %newaddr, align 8
262 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
263 ret <4 x i32> %val
264}
265
266define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
267; CHECK-LABEL: test_v4i32_post_load:
268; CHECK: ldr q0, [x0], #80
269 %newaddr = getelementptr <4 x i32>* %addr, i32 5
270 %val = load <4 x i32>* %addr, align 8
271 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
272 ret <4 x i32> %val
273}
274
275define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
276; CHECK-LABEL: test_v4i32_pre_store:
277; CHECK: str q0, [x0, #80]!
278 %newaddr = getelementptr <4 x i32>* %addr, i32 5
279 store <4 x i32> %in, <4 x i32>* %newaddr, align 8
280 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
281 ret void
282}
283
284define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
285; CHECK-LABEL: test_v4i32_post_store:
286; CHECK: str q0, [x0], #80
287 %newaddr = getelementptr <4 x i32>* %addr, i32 5
288 store <4 x i32> %in, <4 x i32>* %addr, align 8
289 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
290 ret void
291}
292
293
294define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
295; CHECK-LABEL: test_v4f32_pre_load:
296; CHECK: ldr q0, [x0, #80]!
297 %newaddr = getelementptr <4 x float>* %addr, i32 5
298 %val = load <4 x float>* %newaddr, align 8
299 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
300 ret <4 x float> %val
301}
302
303define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
304; CHECK-LABEL: test_v4f32_post_load:
305; CHECK: ldr q0, [x0], #80
306 %newaddr = getelementptr <4 x float>* %addr, i32 5
307 %val = load <4 x float>* %addr, align 8
308 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
309 ret <4 x float> %val
310}
311
312define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
313; CHECK-LABEL: test_v4f32_pre_store:
314; CHECK: str q0, [x0, #80]!
315 %newaddr = getelementptr <4 x float>* %addr, i32 5
316 store <4 x float> %in, <4 x float>* %newaddr, align 8
317 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
318 ret void
319}
320
321define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
322; CHECK-LABEL: test_v4f32_post_store:
323; CHECK: str q0, [x0], #80
324 %newaddr = getelementptr <4 x float>* %addr, i32 5
325 store <4 x float> %in, <4 x float>* %addr, align 8
326 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
327 ret void
328}
329
330
331define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
332; CHECK-LABEL: test_v2i64_pre_load:
333; CHECK: ldr q0, [x0, #80]!
334 %newaddr = getelementptr <2 x i64>* %addr, i32 5
335 %val = load <2 x i64>* %newaddr, align 8
336 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
337 ret <2 x i64> %val
338}
339
340define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
341; CHECK-LABEL: test_v2i64_post_load:
342; CHECK: ldr q0, [x0], #80
343 %newaddr = getelementptr <2 x i64>* %addr, i32 5
344 %val = load <2 x i64>* %addr, align 8
345 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
346 ret <2 x i64> %val
347}
348
349define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
350; CHECK-LABEL: test_v2i64_pre_store:
351; CHECK: str q0, [x0, #80]!
352 %newaddr = getelementptr <2 x i64>* %addr, i32 5
353 store <2 x i64> %in, <2 x i64>* %newaddr, align 8
354 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
355 ret void
356}
357
358define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
359; CHECK-LABEL: test_v2i64_post_store:
360; CHECK: str q0, [x0], #80
361 %newaddr = getelementptr <2 x i64>* %addr, i32 5
362 store <2 x i64> %in, <2 x i64>* %addr, align 8
363 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
364 ret void
365}
366
367
368define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
369; CHECK-LABEL: test_v2f64_pre_load:
370; CHECK: ldr q0, [x0, #80]!
371 %newaddr = getelementptr <2 x double>* %addr, i32 5
372 %val = load <2 x double>* %newaddr, align 8
373 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
374 ret <2 x double> %val
375}
376
377define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
378; CHECK-LABEL: test_v2f64_post_load:
379; CHECK: ldr q0, [x0], #80
380 %newaddr = getelementptr <2 x double>* %addr, i32 5
381 %val = load <2 x double>* %addr, align 8
382 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
383 ret <2 x double> %val
384}
385
386define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
387; CHECK-LABEL: test_v2f64_pre_store:
388; CHECK: str q0, [x0, #80]!
389 %newaddr = getelementptr <2 x double>* %addr, i32 5
390 store <2 x double> %in, <2 x double>* %newaddr, align 8
391 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
392 ret void
393}
394
395define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
396; CHECK-LABEL: test_v2f64_post_store:
397; CHECK: str q0, [x0], #80
398 %newaddr = getelementptr <2 x double>* %addr, i32 5
399 store <2 x double> %in, <2 x double>* %addr, align 8
400 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
401 ret void
402}
403
404define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
405; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
406; CHECK: st1.b { v0 }[3], [x0], #1
407 %elt = extractelement <16 x i8> %in, i32 3
408 store i8 %elt, i8* %addr
409
410 %newaddr = getelementptr i8* %addr, i32 1
411 ret i8* %newaddr
412}
413
414define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
415; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
416; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
417; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
418 %elt = extractelement <16 x i8> %in, i32 3
419 store i8 %elt, i8* %addr
420
421 %newaddr = getelementptr i8* %addr, i32 2
422 ret i8* %newaddr
423}
424
425
426define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
427; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
428; CHECK: st1.h { v0 }[3], [x0], #2
429 %elt = extractelement <8 x i16> %in, i32 3
430 store i16 %elt, i16* %addr
431
432 %newaddr = getelementptr i16* %addr, i32 1
433 ret i16* %newaddr
434}
435
436define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
437; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
438; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
439; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
440 %elt = extractelement <8 x i16> %in, i32 3
441 store i16 %elt, i16* %addr
442
443 %newaddr = getelementptr i16* %addr, i32 2
444 ret i16* %newaddr
445}
446
447define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
448; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
449; CHECK: st1.s { v0 }[3], [x0], #4
450 %elt = extractelement <4 x i32> %in, i32 3
451 store i32 %elt, i32* %addr
452
453 %newaddr = getelementptr i32* %addr, i32 1
454 ret i32* %newaddr
455}
456
457define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
458; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
459; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
460; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
461 %elt = extractelement <4 x i32> %in, i32 3
462 store i32 %elt, i32* %addr
463
464 %newaddr = getelementptr i32* %addr, i32 2
465 ret i32* %newaddr
466}
467
468define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
469; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
470; CHECK: st1.s { v0 }[3], [x0], #4
471 %elt = extractelement <4 x float> %in, i32 3
472 store float %elt, float* %addr
473
474 %newaddr = getelementptr float* %addr, i32 1
475 ret float* %newaddr
476}
477
478define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
479; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
480; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
481; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
482 %elt = extractelement <4 x float> %in, i32 3
483 store float %elt, float* %addr
484
485 %newaddr = getelementptr float* %addr, i32 2
486 ret float* %newaddr
487}
488
489define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
490; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
491; CHECK: st1.d { v0 }[1], [x0], #8
492 %elt = extractelement <2 x i64> %in, i64 1
493 store i64 %elt, i64* %addr
494
495 %newaddr = getelementptr i64* %addr, i64 1
496 ret i64* %newaddr
497}
498
499define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
500; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
501; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
502; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
503 %elt = extractelement <2 x i64> %in, i64 1
504 store i64 %elt, i64* %addr
505
506 %newaddr = getelementptr i64* %addr, i64 2
507 ret i64* %newaddr
508}
509
510define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
511; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
512; CHECK: st1.d { v0 }[1], [x0], #8
513 %elt = extractelement <2 x double> %in, i32 1
514 store double %elt, double* %addr
515
516 %newaddr = getelementptr double* %addr, i32 1
517 ret double* %newaddr
518}
519
520define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
521; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
522; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
523; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
524 %elt = extractelement <2 x double> %in, i32 1
525 store double %elt, double* %addr
526
527 %newaddr = getelementptr double* %addr, i32 2
528 ret double* %newaddr
529}
530
531define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
532; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
533; CHECK: st1.b { v0 }[3], [x0], #1
534 %elt = extractelement <8 x i8> %in, i32 3
535 store i8 %elt, i8* %addr
536
537 %newaddr = getelementptr i8* %addr, i32 1
538 ret i8* %newaddr
539}
540
541define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
542; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
543; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
544; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
545 %elt = extractelement <8 x i8> %in, i32 3
546 store i8 %elt, i8* %addr
547
548 %newaddr = getelementptr i8* %addr, i32 2
549 ret i8* %newaddr
550}
551
552define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
553; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
554; CHECK: st1.h { v0 }[3], [x0], #2
555 %elt = extractelement <4 x i16> %in, i32 3
556 store i16 %elt, i16* %addr
557
558 %newaddr = getelementptr i16* %addr, i32 1
559 ret i16* %newaddr
560}
561
562define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
563; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
564; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
565; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
566 %elt = extractelement <4 x i16> %in, i32 3
567 store i16 %elt, i16* %addr
568
569 %newaddr = getelementptr i16* %addr, i32 2
570 ret i16* %newaddr
571}
572
573define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
574; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
575; CHECK: st1.s { v0 }[1], [x0], #4
576 %elt = extractelement <2 x i32> %in, i32 1
577 store i32 %elt, i32* %addr
578
579 %newaddr = getelementptr i32* %addr, i32 1
580 ret i32* %newaddr
581}
582
583define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
584; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
585; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
586; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
587 %elt = extractelement <2 x i32> %in, i32 1
588 store i32 %elt, i32* %addr
589
590 %newaddr = getelementptr i32* %addr, i32 2
591 ret i32* %newaddr
592}
593
594define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
595; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
596; CHECK: st1.s { v0 }[1], [x0], #4
597 %elt = extractelement <2 x float> %in, i32 1
598 store float %elt, float* %addr
599
600 %newaddr = getelementptr float* %addr, i32 1
601 ret float* %newaddr
602}
603
604define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
605; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
606; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
607; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
608 %elt = extractelement <2 x float> %in, i32 1
609 store float %elt, float* %addr
610
611 %newaddr = getelementptr float* %addr, i32 2
612 ret float* %newaddr
613}
614
615define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
616;CHECK-LABEL: test_v16i8_post_imm_ld2:
617;CHECK: ld2.16b { v0, v1 }, [x0], #32
618 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
619 %tmp = getelementptr i8* %A, i32 32
620 store i8* %tmp, i8** %ptr
621 ret { <16 x i8>, <16 x i8> } %ld2
622}
623
624define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
625;CHECK-LABEL: test_v16i8_post_reg_ld2:
626;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
627 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
628 %tmp = getelementptr i8* %A, i64 %inc
629 store i8* %tmp, i8** %ptr
630 ret { <16 x i8>, <16 x i8> } %ld2
631}
632
633declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
634
635
636define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
637;CHECK-LABEL: test_v8i8_post_imm_ld2:
638;CHECK: ld2.8b { v0, v1 }, [x0], #16
639 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
640 %tmp = getelementptr i8* %A, i32 16
641 store i8* %tmp, i8** %ptr
642 ret { <8 x i8>, <8 x i8> } %ld2
643}
644
645define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
646;CHECK-LABEL: test_v8i8_post_reg_ld2:
647;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
648 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
649 %tmp = getelementptr i8* %A, i64 %inc
650 store i8* %tmp, i8** %ptr
651 ret { <8 x i8>, <8 x i8> } %ld2
652}
653
654declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
655
656
657define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
658;CHECK-LABEL: test_v8i16_post_imm_ld2:
659;CHECK: ld2.8h { v0, v1 }, [x0], #32
660 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
661 %tmp = getelementptr i16* %A, i32 16
662 store i16* %tmp, i16** %ptr
663 ret { <8 x i16>, <8 x i16> } %ld2
664}
665
666define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
667;CHECK-LABEL: test_v8i16_post_reg_ld2:
668;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
669 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
670 %tmp = getelementptr i16* %A, i64 %inc
671 store i16* %tmp, i16** %ptr
672 ret { <8 x i16>, <8 x i16> } %ld2
673}
674
675declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
676
677
678define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
679;CHECK-LABEL: test_v4i16_post_imm_ld2:
680;CHECK: ld2.4h { v0, v1 }, [x0], #16
681 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
682 %tmp = getelementptr i16* %A, i32 8
683 store i16* %tmp, i16** %ptr
684 ret { <4 x i16>, <4 x i16> } %ld2
685}
686
687define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
688;CHECK-LABEL: test_v4i16_post_reg_ld2:
689;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
690 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
691 %tmp = getelementptr i16* %A, i64 %inc
692 store i16* %tmp, i16** %ptr
693 ret { <4 x i16>, <4 x i16> } %ld2
694}
695
696declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
697
698
699define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
700;CHECK-LABEL: test_v4i32_post_imm_ld2:
701;CHECK: ld2.4s { v0, v1 }, [x0], #32
702 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
703 %tmp = getelementptr i32* %A, i32 8
704 store i32* %tmp, i32** %ptr
705 ret { <4 x i32>, <4 x i32> } %ld2
706}
707
708define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
709;CHECK-LABEL: test_v4i32_post_reg_ld2:
710;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
711 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
712 %tmp = getelementptr i32* %A, i64 %inc
713 store i32* %tmp, i32** %ptr
714 ret { <4 x i32>, <4 x i32> } %ld2
715}
716
717declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
718
719
720define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
721;CHECK-LABEL: test_v2i32_post_imm_ld2:
722;CHECK: ld2.2s { v0, v1 }, [x0], #16
723 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
724 %tmp = getelementptr i32* %A, i32 4
725 store i32* %tmp, i32** %ptr
726 ret { <2 x i32>, <2 x i32> } %ld2
727}
728
729define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
730;CHECK-LABEL: test_v2i32_post_reg_ld2:
731;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
732 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
733 %tmp = getelementptr i32* %A, i64 %inc
734 store i32* %tmp, i32** %ptr
735 ret { <2 x i32>, <2 x i32> } %ld2
736}
737
738declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
739
740
741define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
742;CHECK-LABEL: test_v2i64_post_imm_ld2:
743;CHECK: ld2.2d { v0, v1 }, [x0], #32
744 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
745 %tmp = getelementptr i64* %A, i32 4
746 store i64* %tmp, i64** %ptr
747 ret { <2 x i64>, <2 x i64> } %ld2
748}
749
750define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
751;CHECK-LABEL: test_v2i64_post_reg_ld2:
752;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
753 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
754 %tmp = getelementptr i64* %A, i64 %inc
755 store i64* %tmp, i64** %ptr
756 ret { <2 x i64>, <2 x i64> } %ld2
757}
758
759declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
760
761
762define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
763;CHECK-LABEL: test_v1i64_post_imm_ld2:
764;CHECK: ld1.1d { v0, v1 }, [x0], #16
765 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
766 %tmp = getelementptr i64* %A, i32 2
767 store i64* %tmp, i64** %ptr
768 ret { <1 x i64>, <1 x i64> } %ld2
769}
770
771define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
772;CHECK-LABEL: test_v1i64_post_reg_ld2:
773;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
774 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
775 %tmp = getelementptr i64* %A, i64 %inc
776 store i64* %tmp, i64** %ptr
777 ret { <1 x i64>, <1 x i64> } %ld2
778}
779
780declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
781
782
783define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
784;CHECK-LABEL: test_v4f32_post_imm_ld2:
785;CHECK: ld2.4s { v0, v1 }, [x0], #32
786 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
787 %tmp = getelementptr float* %A, i32 8
788 store float* %tmp, float** %ptr
789 ret { <4 x float>, <4 x float> } %ld2
790}
791
792define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
793;CHECK-LABEL: test_v4f32_post_reg_ld2:
794;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
795 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
796 %tmp = getelementptr float* %A, i64 %inc
797 store float* %tmp, float** %ptr
798 ret { <4 x float>, <4 x float> } %ld2
799}
800
801declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
802
803
804define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
805;CHECK-LABEL: test_v2f32_post_imm_ld2:
806;CHECK: ld2.2s { v0, v1 }, [x0], #16
807 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
808 %tmp = getelementptr float* %A, i32 4
809 store float* %tmp, float** %ptr
810 ret { <2 x float>, <2 x float> } %ld2
811}
812
813define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
814;CHECK-LABEL: test_v2f32_post_reg_ld2:
815;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
816 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
817 %tmp = getelementptr float* %A, i64 %inc
818 store float* %tmp, float** %ptr
819 ret { <2 x float>, <2 x float> } %ld2
820}
821
822declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
823
824
825define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
826;CHECK-LABEL: test_v2f64_post_imm_ld2:
827;CHECK: ld2.2d { v0, v1 }, [x0], #32
828 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
829 %tmp = getelementptr double* %A, i32 4
830 store double* %tmp, double** %ptr
831 ret { <2 x double>, <2 x double> } %ld2
832}
833
834define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
835;CHECK-LABEL: test_v2f64_post_reg_ld2:
836;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
837 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
838 %tmp = getelementptr double* %A, i64 %inc
839 store double* %tmp, double** %ptr
840 ret { <2 x double>, <2 x double> } %ld2
841}
842
843declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
844
845
846define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
847;CHECK-LABEL: test_v1f64_post_imm_ld2:
848;CHECK: ld1.1d { v0, v1 }, [x0], #16
849 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
850 %tmp = getelementptr double* %A, i32 2
851 store double* %tmp, double** %ptr
852 ret { <1 x double>, <1 x double> } %ld2
853}
854
855define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
856;CHECK-LABEL: test_v1f64_post_reg_ld2:
857;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
858 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
859 %tmp = getelementptr double* %A, i64 %inc
860 store double* %tmp, double** %ptr
861 ret { <1 x double>, <1 x double> } %ld2
862}
863
864declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
865
866
867define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
868;CHECK-LABEL: test_v16i8_post_imm_ld3:
869;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
870 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
871 %tmp = getelementptr i8* %A, i32 48
872 store i8* %tmp, i8** %ptr
873 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
874}
875
876define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
877;CHECK-LABEL: test_v16i8_post_reg_ld3:
878;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
879 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
880 %tmp = getelementptr i8* %A, i64 %inc
881 store i8* %tmp, i8** %ptr
882 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
883}
884
885declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
886
887
888define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
889;CHECK-LABEL: test_v8i8_post_imm_ld3:
890;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
891 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
892 %tmp = getelementptr i8* %A, i32 24
893 store i8* %tmp, i8** %ptr
894 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
895}
896
897define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
898;CHECK-LABEL: test_v8i8_post_reg_ld3:
899;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
900 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
901 %tmp = getelementptr i8* %A, i64 %inc
902 store i8* %tmp, i8** %ptr
903 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
904}
905
906declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
907
908
909define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
910;CHECK-LABEL: test_v8i16_post_imm_ld3:
911;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
912 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
913 %tmp = getelementptr i16* %A, i32 24
914 store i16* %tmp, i16** %ptr
915 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
916}
917
918define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
919;CHECK-LABEL: test_v8i16_post_reg_ld3:
920;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
921 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
922 %tmp = getelementptr i16* %A, i64 %inc
923 store i16* %tmp, i16** %ptr
924 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
925}
926
927declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
928
929
930define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
931;CHECK-LABEL: test_v4i16_post_imm_ld3:
932;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
933 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
934 %tmp = getelementptr i16* %A, i32 12
935 store i16* %tmp, i16** %ptr
936 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
937}
938
939define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
940;CHECK-LABEL: test_v4i16_post_reg_ld3:
941;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
942 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
943 %tmp = getelementptr i16* %A, i64 %inc
944 store i16* %tmp, i16** %ptr
945 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
946}
947
948declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
949
950
951define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
952;CHECK-LABEL: test_v4i32_post_imm_ld3:
953;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
954 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
955 %tmp = getelementptr i32* %A, i32 12
956 store i32* %tmp, i32** %ptr
957 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
958}
959
960define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
961;CHECK-LABEL: test_v4i32_post_reg_ld3:
962;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
963 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
964 %tmp = getelementptr i32* %A, i64 %inc
965 store i32* %tmp, i32** %ptr
966 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
967}
968
969declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
970
971
972define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
973;CHECK-LABEL: test_v2i32_post_imm_ld3:
974;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
975 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
976 %tmp = getelementptr i32* %A, i32 6
977 store i32* %tmp, i32** %ptr
978 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
979}
980
981define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
982;CHECK-LABEL: test_v2i32_post_reg_ld3:
983;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
984 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
985 %tmp = getelementptr i32* %A, i64 %inc
986 store i32* %tmp, i32** %ptr
987 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
988}
989
990declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
991
992
993define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
994;CHECK-LABEL: test_v2i64_post_imm_ld3:
995;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
996 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
997 %tmp = getelementptr i64* %A, i32 6
998 store i64* %tmp, i64** %ptr
999 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1000}
1001
1002define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1003;CHECK-LABEL: test_v2i64_post_reg_ld3:
1004;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1005 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
1006 %tmp = getelementptr i64* %A, i64 %inc
1007 store i64* %tmp, i64** %ptr
1008 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1009}
1010
1011declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
1012
1013
1014define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
1015;CHECK-LABEL: test_v1i64_post_imm_ld3:
1016;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1017 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1018 %tmp = getelementptr i64* %A, i32 3
1019 store i64* %tmp, i64** %ptr
1020 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1021}
1022
1023define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1024;CHECK-LABEL: test_v1i64_post_reg_ld3:
1025;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1026 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1027 %tmp = getelementptr i64* %A, i64 %inc
1028 store i64* %tmp, i64** %ptr
1029 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1030}
1031
1032declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
1033
1034
1035define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
1036;CHECK-LABEL: test_v4f32_post_imm_ld3:
1037;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
1038 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1039 %tmp = getelementptr float* %A, i32 12
1040 store float* %tmp, float** %ptr
1041 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1042}
1043
1044define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1045;CHECK-LABEL: test_v4f32_post_reg_ld3:
1046;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1047 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1048 %tmp = getelementptr float* %A, i64 %inc
1049 store float* %tmp, float** %ptr
1050 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1051}
1052
1053declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
1054
1055
1056define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
1057;CHECK-LABEL: test_v2f32_post_imm_ld3:
1058;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
1059 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1060 %tmp = getelementptr float* %A, i32 6
1061 store float* %tmp, float** %ptr
1062 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1063}
1064
1065define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1066;CHECK-LABEL: test_v2f32_post_reg_ld3:
1067;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1068 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1069 %tmp = getelementptr float* %A, i64 %inc
1070 store float* %tmp, float** %ptr
1071 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1072}
1073
1074declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
1075
1076
1077define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
1078;CHECK-LABEL: test_v2f64_post_imm_ld3:
1079;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
1080 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1081 %tmp = getelementptr double* %A, i32 6
1082 store double* %tmp, double** %ptr
1083 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1084}
1085
1086define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1087;CHECK-LABEL: test_v2f64_post_reg_ld3:
1088;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1089 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1090 %tmp = getelementptr double* %A, i64 %inc
1091 store double* %tmp, double** %ptr
1092 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1093}
1094
1095declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
1096
1097
1098define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
1099;CHECK-LABEL: test_v1f64_post_imm_ld3:
1100;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1101 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1102 %tmp = getelementptr double* %A, i32 3
1103 store double* %tmp, double** %ptr
1104 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1105}
1106
1107define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1108;CHECK-LABEL: test_v1f64_post_reg_ld3:
1109;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1110 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1111 %tmp = getelementptr double* %A, i64 %inc
1112 store double* %tmp, double** %ptr
1113 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1114}
1115
1116declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
1117
1118
1119define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
1120;CHECK-LABEL: test_v16i8_post_imm_ld4:
1121;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
1122 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1123 %tmp = getelementptr i8* %A, i32 64
1124 store i8* %tmp, i8** %ptr
1125 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1126}
1127
1128define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1129;CHECK-LABEL: test_v16i8_post_reg_ld4:
1130;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1131 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1132 %tmp = getelementptr i8* %A, i64 %inc
1133 store i8* %tmp, i8** %ptr
1134 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1135}
1136
1137declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
1138
1139
1140define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
1141;CHECK-LABEL: test_v8i8_post_imm_ld4:
1142;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
1143 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1144 %tmp = getelementptr i8* %A, i32 32
1145 store i8* %tmp, i8** %ptr
1146 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1147}
1148
1149define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1150;CHECK-LABEL: test_v8i8_post_reg_ld4:
1151;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1152 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1153 %tmp = getelementptr i8* %A, i64 %inc
1154 store i8* %tmp, i8** %ptr
1155 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1156}
1157
1158declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
1159
1160
1161define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
1162;CHECK-LABEL: test_v8i16_post_imm_ld4:
1163;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
1164 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1165 %tmp = getelementptr i16* %A, i32 32
1166 store i16* %tmp, i16** %ptr
1167 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1168}
1169
1170define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1171;CHECK-LABEL: test_v8i16_post_reg_ld4:
1172;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1173 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1174 %tmp = getelementptr i16* %A, i64 %inc
1175 store i16* %tmp, i16** %ptr
1176 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1177}
1178
1179declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
1180
1181
1182define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
1183;CHECK-LABEL: test_v4i16_post_imm_ld4:
1184;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
1185 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1186 %tmp = getelementptr i16* %A, i32 16
1187 store i16* %tmp, i16** %ptr
1188 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1189}
1190
1191define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1192;CHECK-LABEL: test_v4i16_post_reg_ld4:
1193;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1194 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1195 %tmp = getelementptr i16* %A, i64 %inc
1196 store i16* %tmp, i16** %ptr
1197 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1198}
1199
1200declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
1201
1202
1203define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
1204;CHECK-LABEL: test_v4i32_post_imm_ld4:
1205;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1206 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1207 %tmp = getelementptr i32* %A, i32 16
1208 store i32* %tmp, i32** %ptr
1209 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1210}
1211
1212define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1213;CHECK-LABEL: test_v4i32_post_reg_ld4:
1214;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1215 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1216 %tmp = getelementptr i32* %A, i64 %inc
1217 store i32* %tmp, i32** %ptr
1218 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1219}
1220
1221declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
1222
1223
1224define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
1225;CHECK-LABEL: test_v2i32_post_imm_ld4:
1226;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1227 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1228 %tmp = getelementptr i32* %A, i32 8
1229 store i32* %tmp, i32** %ptr
1230 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1231}
1232
1233define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1234;CHECK-LABEL: test_v2i32_post_reg_ld4:
1235;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1236 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1237 %tmp = getelementptr i32* %A, i64 %inc
1238 store i32* %tmp, i32** %ptr
1239 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1240}
1241
1242declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
1243
1244
1245define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
1246;CHECK-LABEL: test_v2i64_post_imm_ld4:
1247;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1248 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1249 %tmp = getelementptr i64* %A, i32 8
1250 store i64* %tmp, i64** %ptr
1251 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1252}
1253
1254define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1255;CHECK-LABEL: test_v2i64_post_reg_ld4:
1256;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1257 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1258 %tmp = getelementptr i64* %A, i64 %inc
1259 store i64* %tmp, i64** %ptr
1260 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1261}
1262
1263declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
1264
1265
1266define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
1267;CHECK-LABEL: test_v1i64_post_imm_ld4:
1268;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1269 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1270 %tmp = getelementptr i64* %A, i32 4
1271 store i64* %tmp, i64** %ptr
1272 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1273}
1274
1275define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1276;CHECK-LABEL: test_v1i64_post_reg_ld4:
1277;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1278 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1279 %tmp = getelementptr i64* %A, i64 %inc
1280 store i64* %tmp, i64** %ptr
1281 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1282}
1283
1284declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
1285
1286
1287define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
1288;CHECK-LABEL: test_v4f32_post_imm_ld4:
1289;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1290 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1291 %tmp = getelementptr float* %A, i32 16
1292 store float* %tmp, float** %ptr
1293 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1294}
1295
1296define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1297;CHECK-LABEL: test_v4f32_post_reg_ld4:
1298;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1299 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1300 %tmp = getelementptr float* %A, i64 %inc
1301 store float* %tmp, float** %ptr
1302 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1303}
1304
1305declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
1306
1307
1308define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
1309;CHECK-LABEL: test_v2f32_post_imm_ld4:
1310;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1311 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1312 %tmp = getelementptr float* %A, i32 8
1313 store float* %tmp, float** %ptr
1314 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1315}
1316
1317define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1318;CHECK-LABEL: test_v2f32_post_reg_ld4:
1319;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1320 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1321 %tmp = getelementptr float* %A, i64 %inc
1322 store float* %tmp, float** %ptr
1323 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1324}
1325
1326declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
1327
1328
1329define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
1330;CHECK-LABEL: test_v2f64_post_imm_ld4:
1331;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1332 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1333 %tmp = getelementptr double* %A, i32 8
1334 store double* %tmp, double** %ptr
1335 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1336}
1337
1338define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1339;CHECK-LABEL: test_v2f64_post_reg_ld4:
1340;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1341 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1342 %tmp = getelementptr double* %A, i64 %inc
1343 store double* %tmp, double** %ptr
1344 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1345}
1346
1347declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
1348
1349
1350define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
1351;CHECK-LABEL: test_v1f64_post_imm_ld4:
1352;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1353 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1354 %tmp = getelementptr double* %A, i32 4
1355 store double* %tmp, double** %ptr
1356 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1357}
1358
1359define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1360;CHECK-LABEL: test_v1f64_post_reg_ld4:
1361;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1362 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1363 %tmp = getelementptr double* %A, i64 %inc
1364 store double* %tmp, double** %ptr
1365 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1366}
1367
1368declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
1369
1370define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1371;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
1372;CHECK: ld1.16b { v0, v1 }, [x0], #32
1373 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1374 %tmp = getelementptr i8* %A, i32 32
1375 store i8* %tmp, i8** %ptr
1376 ret { <16 x i8>, <16 x i8> } %ld1x2
1377}
1378
1379define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1380;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
1381;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
1382 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1383 %tmp = getelementptr i8* %A, i64 %inc
1384 store i8* %tmp, i8** %ptr
1385 ret { <16 x i8>, <16 x i8> } %ld1x2
1386}
1387
1388declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
1389
1390
1391define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1392;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
1393;CHECK: ld1.8b { v0, v1 }, [x0], #16
1394 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1395 %tmp = getelementptr i8* %A, i32 16
1396 store i8* %tmp, i8** %ptr
1397 ret { <8 x i8>, <8 x i8> } %ld1x2
1398}
1399
1400define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1401;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
1402;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
1403 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1404 %tmp = getelementptr i8* %A, i64 %inc
1405 store i8* %tmp, i8** %ptr
1406 ret { <8 x i8>, <8 x i8> } %ld1x2
1407}
1408
1409declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
1410
1411
1412define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1413;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
1414;CHECK: ld1.8h { v0, v1 }, [x0], #32
1415 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1416 %tmp = getelementptr i16* %A, i32 16
1417 store i16* %tmp, i16** %ptr
1418 ret { <8 x i16>, <8 x i16> } %ld1x2
1419}
1420
1421define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1422;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
1423;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
1424 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1425 %tmp = getelementptr i16* %A, i64 %inc
1426 store i16* %tmp, i16** %ptr
1427 ret { <8 x i16>, <8 x i16> } %ld1x2
1428}
1429
1430declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
1431
1432
1433define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1434;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
1435;CHECK: ld1.4h { v0, v1 }, [x0], #16
1436 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1437 %tmp = getelementptr i16* %A, i32 8
1438 store i16* %tmp, i16** %ptr
1439 ret { <4 x i16>, <4 x i16> } %ld1x2
1440}
1441
1442define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1443;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
1444;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
1445 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1446 %tmp = getelementptr i16* %A, i64 %inc
1447 store i16* %tmp, i16** %ptr
1448 ret { <4 x i16>, <4 x i16> } %ld1x2
1449}
1450
1451declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
1452
1453
1454define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1455;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
1456;CHECK: ld1.4s { v0, v1 }, [x0], #32
1457 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1458 %tmp = getelementptr i32* %A, i32 8
1459 store i32* %tmp, i32** %ptr
1460 ret { <4 x i32>, <4 x i32> } %ld1x2
1461}
1462
1463define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1464;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
1465;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1466 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1467 %tmp = getelementptr i32* %A, i64 %inc
1468 store i32* %tmp, i32** %ptr
1469 ret { <4 x i32>, <4 x i32> } %ld1x2
1470}
1471
1472declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
1473
1474
1475define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1476;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
1477;CHECK: ld1.2s { v0, v1 }, [x0], #16
1478 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1479 %tmp = getelementptr i32* %A, i32 4
1480 store i32* %tmp, i32** %ptr
1481 ret { <2 x i32>, <2 x i32> } %ld1x2
1482}
1483
1484define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1485;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
1486;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1487 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1488 %tmp = getelementptr i32* %A, i64 %inc
1489 store i32* %tmp, i32** %ptr
1490 ret { <2 x i32>, <2 x i32> } %ld1x2
1491}
1492
1493declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
1494
1495
1496define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1497;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
1498;CHECK: ld1.2d { v0, v1 }, [x0], #32
1499 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1500 %tmp = getelementptr i64* %A, i32 4
1501 store i64* %tmp, i64** %ptr
1502 ret { <2 x i64>, <2 x i64> } %ld1x2
1503}
1504
1505define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1506;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
1507;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1508 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1509 %tmp = getelementptr i64* %A, i64 %inc
1510 store i64* %tmp, i64** %ptr
1511 ret { <2 x i64>, <2 x i64> } %ld1x2
1512}
1513
1514declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
1515
1516
1517define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1518;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
1519;CHECK: ld1.1d { v0, v1 }, [x0], #16
1520 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1521 %tmp = getelementptr i64* %A, i32 2
1522 store i64* %tmp, i64** %ptr
1523 ret { <1 x i64>, <1 x i64> } %ld1x2
1524}
1525
1526define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1527;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
1528;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1529 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1530 %tmp = getelementptr i64* %A, i64 %inc
1531 store i64* %tmp, i64** %ptr
1532 ret { <1 x i64>, <1 x i64> } %ld1x2
1533}
1534
1535declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
1536
1537
1538define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
1539;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
1540;CHECK: ld1.4s { v0, v1 }, [x0], #32
1541 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1542 %tmp = getelementptr float* %A, i32 8
1543 store float* %tmp, float** %ptr
1544 ret { <4 x float>, <4 x float> } %ld1x2
1545}
1546
1547define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1548;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
1549;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1550 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1551 %tmp = getelementptr float* %A, i64 %inc
1552 store float* %tmp, float** %ptr
1553 ret { <4 x float>, <4 x float> } %ld1x2
1554}
1555
1556declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
1557
1558
1559define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
1560;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
1561;CHECK: ld1.2s { v0, v1 }, [x0], #16
1562 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1563 %tmp = getelementptr float* %A, i32 4
1564 store float* %tmp, float** %ptr
1565 ret { <2 x float>, <2 x float> } %ld1x2
1566}
1567
1568define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1569;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
1570;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1571 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1572 %tmp = getelementptr float* %A, i64 %inc
1573 store float* %tmp, float** %ptr
1574 ret { <2 x float>, <2 x float> } %ld1x2
1575}
1576
1577declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
1578
1579
1580define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
1581;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
1582;CHECK: ld1.2d { v0, v1 }, [x0], #32
1583 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1584 %tmp = getelementptr double* %A, i32 4
1585 store double* %tmp, double** %ptr
1586 ret { <2 x double>, <2 x double> } %ld1x2
1587}
1588
1589define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1590;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
1591;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1592 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1593 %tmp = getelementptr double* %A, i64 %inc
1594 store double* %tmp, double** %ptr
1595 ret { <2 x double>, <2 x double> } %ld1x2
1596}
1597
1598declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
1599
1600
1601define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
1602;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
1603;CHECK: ld1.1d { v0, v1 }, [x0], #16
1604 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1605 %tmp = getelementptr double* %A, i32 2
1606 store double* %tmp, double** %ptr
1607 ret { <1 x double>, <1 x double> } %ld1x2
1608}
1609
1610define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1611;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
1612;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1613 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1614 %tmp = getelementptr double* %A, i64 %inc
1615 store double* %tmp, double** %ptr
1616 ret { <1 x double>, <1 x double> } %ld1x2
1617}
1618
1619declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
1620
1621
1622define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1623;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
1624;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
1625 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1626 %tmp = getelementptr i8* %A, i32 48
1627 store i8* %tmp, i8** %ptr
1628 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1629}
1630
1631define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1632;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
1633;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1634 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1635 %tmp = getelementptr i8* %A, i64 %inc
1636 store i8* %tmp, i8** %ptr
1637 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1638}
1639
1640declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
1641
1642
1643define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1644;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
1645;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
1646 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1647 %tmp = getelementptr i8* %A, i32 24
1648 store i8* %tmp, i8** %ptr
1649 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1650}
1651
1652define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1653;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
1654;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1655 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1656 %tmp = getelementptr i8* %A, i64 %inc
1657 store i8* %tmp, i8** %ptr
1658 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1659}
1660
1661declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
1662
1663
1664define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1665;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
1666;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
1667 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1668 %tmp = getelementptr i16* %A, i32 24
1669 store i16* %tmp, i16** %ptr
1670 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1671}
1672
1673define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1674;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
1675;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1676 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1677 %tmp = getelementptr i16* %A, i64 %inc
1678 store i16* %tmp, i16** %ptr
1679 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1680}
1681
1682declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
1683
1684
1685define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1686;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
1687;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
1688 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1689 %tmp = getelementptr i16* %A, i32 12
1690 store i16* %tmp, i16** %ptr
1691 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1692}
1693
1694define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1695;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
1696;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1697 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1698 %tmp = getelementptr i16* %A, i64 %inc
1699 store i16* %tmp, i16** %ptr
1700 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1701}
1702
1703declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
1704
1705
1706define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1707;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
1708;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1709 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1710 %tmp = getelementptr i32* %A, i32 12
1711 store i32* %tmp, i32** %ptr
1712 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1713}
1714
1715define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1716;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
1717;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1718 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1719 %tmp = getelementptr i32* %A, i64 %inc
1720 store i32* %tmp, i32** %ptr
1721 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1722}
1723
1724declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
1725
1726
1727define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1728;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
1729;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1730 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1731 %tmp = getelementptr i32* %A, i32 6
1732 store i32* %tmp, i32** %ptr
1733 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1734}
1735
1736define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1737;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
1738;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1739 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1740 %tmp = getelementptr i32* %A, i64 %inc
1741 store i32* %tmp, i32** %ptr
1742 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1743}
1744
1745declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
1746
1747
1748define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1749;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
1750;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1751 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1752 %tmp = getelementptr i64* %A, i32 6
1753 store i64* %tmp, i64** %ptr
1754 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1755}
1756
1757define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1758;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
1759;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1760 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1761 %tmp = getelementptr i64* %A, i64 %inc
1762 store i64* %tmp, i64** %ptr
1763 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1764}
1765
1766declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
1767
1768
1769define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1770;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
1771;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1772 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1773 %tmp = getelementptr i64* %A, i32 3
1774 store i64* %tmp, i64** %ptr
1775 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1776}
1777
1778define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1779;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
1780;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1781 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1782 %tmp = getelementptr i64* %A, i64 %inc
1783 store i64* %tmp, i64** %ptr
1784 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1785}
1786
1787declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
1788
1789
1790define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
1791;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
1792;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1793 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1794 %tmp = getelementptr float* %A, i32 12
1795 store float* %tmp, float** %ptr
1796 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1797}
1798
1799define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1800;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
1801;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1802 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1803 %tmp = getelementptr float* %A, i64 %inc
1804 store float* %tmp, float** %ptr
1805 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1806}
1807
1808declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
1809
1810
1811define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
1812;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
1813;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1814 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1815 %tmp = getelementptr float* %A, i32 6
1816 store float* %tmp, float** %ptr
1817 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1818}
1819
1820define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1821;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
1822;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1823 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1824 %tmp = getelementptr float* %A, i64 %inc
1825 store float* %tmp, float** %ptr
1826 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1827}
1828
1829declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
1830
1831
1832define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
1833;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
1834;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1835 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1836 %tmp = getelementptr double* %A, i32 6
1837 store double* %tmp, double** %ptr
1838 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1839}
1840
1841define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1842;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
1843;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1844 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1845 %tmp = getelementptr double* %A, i64 %inc
1846 store double* %tmp, double** %ptr
1847 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1848}
1849
1850declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
1851
1852
1853define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
1854;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
1855;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1856 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1857 %tmp = getelementptr double* %A, i32 3
1858 store double* %tmp, double** %ptr
1859 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1860}
1861
1862define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1863;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
1864;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1865 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1866 %tmp = getelementptr double* %A, i64 %inc
1867 store double* %tmp, double** %ptr
1868 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1869}
1870
1871declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
1872
1873
1874define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1875;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
1876;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
1877 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1878 %tmp = getelementptr i8* %A, i32 64
1879 store i8* %tmp, i8** %ptr
1880 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1881}
1882
1883define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1884;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
1885;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1886 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1887 %tmp = getelementptr i8* %A, i64 %inc
1888 store i8* %tmp, i8** %ptr
1889 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1890}
1891
1892declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
1893
1894
1895define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1896;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
1897;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
1898 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1899 %tmp = getelementptr i8* %A, i32 32
1900 store i8* %tmp, i8** %ptr
1901 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1902}
1903
1904define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1905;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
1906;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1907 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1908 %tmp = getelementptr i8* %A, i64 %inc
1909 store i8* %tmp, i8** %ptr
1910 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1911}
1912
1913declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
1914
1915
1916define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1917;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
1918;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
1919 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1920 %tmp = getelementptr i16* %A, i32 32
1921 store i16* %tmp, i16** %ptr
1922 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1923}
1924
1925define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1926;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
1927;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1928 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1929 %tmp = getelementptr i16* %A, i64 %inc
1930 store i16* %tmp, i16** %ptr
1931 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1932}
1933
1934declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
1935
1936
1937define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1938;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
1939;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
1940 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1941 %tmp = getelementptr i16* %A, i32 16
1942 store i16* %tmp, i16** %ptr
1943 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1944}
1945
1946define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1947;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
1948;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1949 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1950 %tmp = getelementptr i16* %A, i64 %inc
1951 store i16* %tmp, i16** %ptr
1952 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1953}
1954
1955declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
1956
1957
1958define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1959;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
1960;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
1961 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1962 %tmp = getelementptr i32* %A, i32 16
1963 store i32* %tmp, i32** %ptr
1964 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1965}
1966
1967define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1968;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
1969;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1970 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1971 %tmp = getelementptr i32* %A, i64 %inc
1972 store i32* %tmp, i32** %ptr
1973 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1974}
1975
1976declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
1977
1978
1979define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1980;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
1981;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
1982 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1983 %tmp = getelementptr i32* %A, i32 8
1984 store i32* %tmp, i32** %ptr
1985 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1986}
1987
1988define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1989;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
1990;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1991 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1992 %tmp = getelementptr i32* %A, i64 %inc
1993 store i32* %tmp, i32** %ptr
1994 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1995}
1996
1997declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
1998
1999
2000define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2001;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
2002;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2003 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2004 %tmp = getelementptr i64* %A, i32 8
2005 store i64* %tmp, i64** %ptr
2006 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2007}
2008
2009define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2010;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
2011;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2012 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2013 %tmp = getelementptr i64* %A, i64 %inc
2014 store i64* %tmp, i64** %ptr
2015 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2016}
2017
2018declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
2019
2020
2021define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2022;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
2023;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2024 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2025 %tmp = getelementptr i64* %A, i32 4
2026 store i64* %tmp, i64** %ptr
2027 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2028}
2029
2030define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2031;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
2032;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2033 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2034 %tmp = getelementptr i64* %A, i64 %inc
2035 store i64* %tmp, i64** %ptr
2036 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2037}
2038
2039declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
2040
2041
2042define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
2043;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
2044;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
2045 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2046 %tmp = getelementptr float* %A, i32 16
2047 store float* %tmp, float** %ptr
2048 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2049}
2050
2051define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2052;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
2053;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2054 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2055 %tmp = getelementptr float* %A, i64 %inc
2056 store float* %tmp, float** %ptr
2057 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2058}
2059
2060declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
2061
2062
2063define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
2064;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
2065;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
2066 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2067 %tmp = getelementptr float* %A, i32 8
2068 store float* %tmp, float** %ptr
2069 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2070}
2071
2072define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2073;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
2074;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2075 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2076 %tmp = getelementptr float* %A, i64 %inc
2077 store float* %tmp, float** %ptr
2078 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2079}
2080
2081declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
2082
2083
2084define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
2085;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
2086;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2087 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2088 %tmp = getelementptr double* %A, i32 8
2089 store double* %tmp, double** %ptr
2090 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2091}
2092
2093define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2094;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
2095;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2096 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2097 %tmp = getelementptr double* %A, i64 %inc
2098 store double* %tmp, double** %ptr
2099 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2100}
2101
2102declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
2103
2104
2105define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
2106;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
2107;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2108 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2109 %tmp = getelementptr double* %A, i32 4
2110 store double* %tmp, double** %ptr
2111 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2112}
2113
2114define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2115;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
2116;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2117 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2118 %tmp = getelementptr double* %A, i64 %inc
2119 store double* %tmp, double** %ptr
2120 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2121}
2122
2123declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
2124
2125
2126define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2127;CHECK-LABEL: test_v16i8_post_imm_ld2r:
2128;CHECK: ld2r.16b { v0, v1 }, [x0], #2
2129 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2130 %tmp = getelementptr i8* %A, i32 2
2131 store i8* %tmp, i8** %ptr
2132 ret { <16 x i8>, <16 x i8> } %ld2
2133}
2134
2135define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2136;CHECK-LABEL: test_v16i8_post_reg_ld2r:
2137;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
2138 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2139 %tmp = getelementptr i8* %A, i64 %inc
2140 store i8* %tmp, i8** %ptr
2141 ret { <16 x i8>, <16 x i8> } %ld2
2142}
2143
2144declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
2145
2146
2147define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2148;CHECK-LABEL: test_v8i8_post_imm_ld2r:
2149;CHECK: ld2r.8b { v0, v1 }, [x0], #2
2150 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2151 %tmp = getelementptr i8* %A, i32 2
2152 store i8* %tmp, i8** %ptr
2153 ret { <8 x i8>, <8 x i8> } %ld2
2154}
2155
2156define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2157;CHECK-LABEL: test_v8i8_post_reg_ld2r:
2158;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
2159 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2160 %tmp = getelementptr i8* %A, i64 %inc
2161 store i8* %tmp, i8** %ptr
2162 ret { <8 x i8>, <8 x i8> } %ld2
2163}
2164
2165declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
2166
2167
2168define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2169;CHECK-LABEL: test_v8i16_post_imm_ld2r:
2170;CHECK: ld2r.8h { v0, v1 }, [x0], #4
2171 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2172 %tmp = getelementptr i16* %A, i32 2
2173 store i16* %tmp, i16** %ptr
2174 ret { <8 x i16>, <8 x i16> } %ld2
2175}
2176
2177define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2178;CHECK-LABEL: test_v8i16_post_reg_ld2r:
2179;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
2180 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2181 %tmp = getelementptr i16* %A, i64 %inc
2182 store i16* %tmp, i16** %ptr
2183 ret { <8 x i16>, <8 x i16> } %ld2
2184}
2185
2186declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
2187
2188
2189define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2190;CHECK-LABEL: test_v4i16_post_imm_ld2r:
2191;CHECK: ld2r.4h { v0, v1 }, [x0], #4
2192 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2193 %tmp = getelementptr i16* %A, i32 2
2194 store i16* %tmp, i16** %ptr
2195 ret { <4 x i16>, <4 x i16> } %ld2
2196}
2197
2198define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2199;CHECK-LABEL: test_v4i16_post_reg_ld2r:
2200;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
2201 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2202 %tmp = getelementptr i16* %A, i64 %inc
2203 store i16* %tmp, i16** %ptr
2204 ret { <4 x i16>, <4 x i16> } %ld2
2205}
2206
2207declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
2208
2209
2210define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2211;CHECK-LABEL: test_v4i32_post_imm_ld2r:
2212;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2213 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2214 %tmp = getelementptr i32* %A, i32 2
2215 store i32* %tmp, i32** %ptr
2216 ret { <4 x i32>, <4 x i32> } %ld2
2217}
2218
2219define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2220;CHECK-LABEL: test_v4i32_post_reg_ld2r:
2221;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2222 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2223 %tmp = getelementptr i32* %A, i64 %inc
2224 store i32* %tmp, i32** %ptr
2225 ret { <4 x i32>, <4 x i32> } %ld2
2226}
2227
2228declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
2229
2230define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2231;CHECK-LABEL: test_v2i32_post_imm_ld2r:
2232;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2233 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2234 %tmp = getelementptr i32* %A, i32 2
2235 store i32* %tmp, i32** %ptr
2236 ret { <2 x i32>, <2 x i32> } %ld2
2237}
2238
2239define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2240;CHECK-LABEL: test_v2i32_post_reg_ld2r:
2241;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2242 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2243 %tmp = getelementptr i32* %A, i64 %inc
2244 store i32* %tmp, i32** %ptr
2245 ret { <2 x i32>, <2 x i32> } %ld2
2246}
2247
2248declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
2249
2250
2251define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2252;CHECK-LABEL: test_v2i64_post_imm_ld2r:
2253;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2254 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2255 %tmp = getelementptr i64* %A, i32 2
2256 store i64* %tmp, i64** %ptr
2257 ret { <2 x i64>, <2 x i64> } %ld2
2258}
2259
2260define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2261;CHECK-LABEL: test_v2i64_post_reg_ld2r:
2262;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2263 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2264 %tmp = getelementptr i64* %A, i64 %inc
2265 store i64* %tmp, i64** %ptr
2266 ret { <2 x i64>, <2 x i64> } %ld2
2267}
2268
2269declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
2270
2271define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2272;CHECK-LABEL: test_v1i64_post_imm_ld2r:
2273;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2274 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2275 %tmp = getelementptr i64* %A, i32 2
2276 store i64* %tmp, i64** %ptr
2277 ret { <1 x i64>, <1 x i64> } %ld2
2278}
2279
2280define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2281;CHECK-LABEL: test_v1i64_post_reg_ld2r:
2282;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2283 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2284 %tmp = getelementptr i64* %A, i64 %inc
2285 store i64* %tmp, i64** %ptr
2286 ret { <1 x i64>, <1 x i64> } %ld2
2287}
2288
2289declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
2290
2291
2292define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2293;CHECK-LABEL: test_v4f32_post_imm_ld2r:
2294;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2295 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2296 %tmp = getelementptr float* %A, i32 2
2297 store float* %tmp, float** %ptr
2298 ret { <4 x float>, <4 x float> } %ld2
2299}
2300
2301define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2302;CHECK-LABEL: test_v4f32_post_reg_ld2r:
2303;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2304 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2305 %tmp = getelementptr float* %A, i64 %inc
2306 store float* %tmp, float** %ptr
2307 ret { <4 x float>, <4 x float> } %ld2
2308}
2309
2310declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
2311
2312define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2313;CHECK-LABEL: test_v2f32_post_imm_ld2r:
2314;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2315 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2316 %tmp = getelementptr float* %A, i32 2
2317 store float* %tmp, float** %ptr
2318 ret { <2 x float>, <2 x float> } %ld2
2319}
2320
2321define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2322;CHECK-LABEL: test_v2f32_post_reg_ld2r:
2323;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2324 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2325 %tmp = getelementptr float* %A, i64 %inc
2326 store float* %tmp, float** %ptr
2327 ret { <2 x float>, <2 x float> } %ld2
2328}
2329
2330declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
2331
2332
2333define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2334;CHECK-LABEL: test_v2f64_post_imm_ld2r:
2335;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2336 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2337 %tmp = getelementptr double* %A, i32 2
2338 store double* %tmp, double** %ptr
2339 ret { <2 x double>, <2 x double> } %ld2
2340}
2341
2342define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2343;CHECK-LABEL: test_v2f64_post_reg_ld2r:
2344;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2345 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2346 %tmp = getelementptr double* %A, i64 %inc
2347 store double* %tmp, double** %ptr
2348 ret { <2 x double>, <2 x double> } %ld2
2349}
2350
2351declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
2352
2353define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2354;CHECK-LABEL: test_v1f64_post_imm_ld2r:
2355;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2356 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2357 %tmp = getelementptr double* %A, i32 2
2358 store double* %tmp, double** %ptr
2359 ret { <1 x double>, <1 x double> } %ld2
2360}
2361
2362define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2363;CHECK-LABEL: test_v1f64_post_reg_ld2r:
2364;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2365 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2366 %tmp = getelementptr double* %A, i64 %inc
2367 store double* %tmp, double** %ptr
2368 ret { <1 x double>, <1 x double> } %ld2
2369}
2370
2371declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
2372
2373
2374define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2375;CHECK-LABEL: test_v16i8_post_imm_ld3r:
2376;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
2377 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2378 %tmp = getelementptr i8* %A, i32 3
2379 store i8* %tmp, i8** %ptr
2380 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2381}
2382
2383define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2384;CHECK-LABEL: test_v16i8_post_reg_ld3r:
2385;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2386 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2387 %tmp = getelementptr i8* %A, i64 %inc
2388 store i8* %tmp, i8** %ptr
2389 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2390}
2391
2392declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
2393
2394
2395define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2396;CHECK-LABEL: test_v8i8_post_imm_ld3r:
2397;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
2398 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2399 %tmp = getelementptr i8* %A, i32 3
2400 store i8* %tmp, i8** %ptr
2401 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2402}
2403
2404define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2405;CHECK-LABEL: test_v8i8_post_reg_ld3r:
2406;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2407 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2408 %tmp = getelementptr i8* %A, i64 %inc
2409 store i8* %tmp, i8** %ptr
2410 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2411}
2412
2413declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
2414
2415
2416define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2417;CHECK-LABEL: test_v8i16_post_imm_ld3r:
2418;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
2419 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2420 %tmp = getelementptr i16* %A, i32 3
2421 store i16* %tmp, i16** %ptr
2422 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2423}
2424
2425define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2426;CHECK-LABEL: test_v8i16_post_reg_ld3r:
2427;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2428 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2429 %tmp = getelementptr i16* %A, i64 %inc
2430 store i16* %tmp, i16** %ptr
2431 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2432}
2433
2434declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
2435
2436
2437define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2438;CHECK-LABEL: test_v4i16_post_imm_ld3r:
2439;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
2440 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2441 %tmp = getelementptr i16* %A, i32 3
2442 store i16* %tmp, i16** %ptr
2443 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2444}
2445
2446define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2447;CHECK-LABEL: test_v4i16_post_reg_ld3r:
2448;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2449 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2450 %tmp = getelementptr i16* %A, i64 %inc
2451 store i16* %tmp, i16** %ptr
2452 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2453}
2454
2455declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
2456
2457
2458define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2459;CHECK-LABEL: test_v4i32_post_imm_ld3r:
2460;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2461 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2462 %tmp = getelementptr i32* %A, i32 3
2463 store i32* %tmp, i32** %ptr
2464 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2465}
2466
2467define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2468;CHECK-LABEL: test_v4i32_post_reg_ld3r:
2469;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2470 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2471 %tmp = getelementptr i32* %A, i64 %inc
2472 store i32* %tmp, i32** %ptr
2473 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2474}
2475
2476declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
2477
2478define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2479;CHECK-LABEL: test_v2i32_post_imm_ld3r:
2480;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2481 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2482 %tmp = getelementptr i32* %A, i32 3
2483 store i32* %tmp, i32** %ptr
2484 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2485}
2486
2487define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2488;CHECK-LABEL: test_v2i32_post_reg_ld3r:
2489;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2490 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2491 %tmp = getelementptr i32* %A, i64 %inc
2492 store i32* %tmp, i32** %ptr
2493 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2494}
2495
2496declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
2497
2498
2499define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2500;CHECK-LABEL: test_v2i64_post_imm_ld3r:
2501;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2502 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2503 %tmp = getelementptr i64* %A, i32 3
2504 store i64* %tmp, i64** %ptr
2505 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2506}
2507
2508define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2509;CHECK-LABEL: test_v2i64_post_reg_ld3r:
2510;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2511 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2512 %tmp = getelementptr i64* %A, i64 %inc
2513 store i64* %tmp, i64** %ptr
2514 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2515}
2516
2517declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
2518
2519define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2520;CHECK-LABEL: test_v1i64_post_imm_ld3r:
2521;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2522 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2523 %tmp = getelementptr i64* %A, i32 3
2524 store i64* %tmp, i64** %ptr
2525 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2526}
2527
2528define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2529;CHECK-LABEL: test_v1i64_post_reg_ld3r:
2530;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2531 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2532 %tmp = getelementptr i64* %A, i64 %inc
2533 store i64* %tmp, i64** %ptr
2534 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2535}
2536
2537declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
2538
2539
2540define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2541;CHECK-LABEL: test_v4f32_post_imm_ld3r:
2542;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2543 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2544 %tmp = getelementptr float* %A, i32 3
2545 store float* %tmp, float** %ptr
2546 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2547}
2548
2549define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2550;CHECK-LABEL: test_v4f32_post_reg_ld3r:
2551;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2552 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2553 %tmp = getelementptr float* %A, i64 %inc
2554 store float* %tmp, float** %ptr
2555 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2556}
2557
2558declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
2559
2560define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2561;CHECK-LABEL: test_v2f32_post_imm_ld3r:
2562;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2563 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2564 %tmp = getelementptr float* %A, i32 3
2565 store float* %tmp, float** %ptr
2566 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2567}
2568
2569define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2570;CHECK-LABEL: test_v2f32_post_reg_ld3r:
2571;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2572 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2573 %tmp = getelementptr float* %A, i64 %inc
2574 store float* %tmp, float** %ptr
2575 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2576}
2577
2578declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
2579
2580
2581define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2582;CHECK-LABEL: test_v2f64_post_imm_ld3r:
2583;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2584 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2585 %tmp = getelementptr double* %A, i32 3
2586 store double* %tmp, double** %ptr
2587 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2588}
2589
2590define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2591;CHECK-LABEL: test_v2f64_post_reg_ld3r:
2592;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2593 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2594 %tmp = getelementptr double* %A, i64 %inc
2595 store double* %tmp, double** %ptr
2596 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2597}
2598
2599declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
2600
2601define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2602;CHECK-LABEL: test_v1f64_post_imm_ld3r:
2603;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2604 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2605 %tmp = getelementptr double* %A, i32 3
2606 store double* %tmp, double** %ptr
2607 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2608}
2609
2610define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2611;CHECK-LABEL: test_v1f64_post_reg_ld3r:
2612;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2613 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2614 %tmp = getelementptr double* %A, i64 %inc
2615 store double* %tmp, double** %ptr
2616 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2617}
2618
2619declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
2620
2621
2622define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2623;CHECK-LABEL: test_v16i8_post_imm_ld4r:
2624;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
2625 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2626 %tmp = getelementptr i8* %A, i32 4
2627 store i8* %tmp, i8** %ptr
2628 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2629}
2630
2631define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2632;CHECK-LABEL: test_v16i8_post_reg_ld4r:
2633;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2634 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2635 %tmp = getelementptr i8* %A, i64 %inc
2636 store i8* %tmp, i8** %ptr
2637 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2638}
2639
2640declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
2641
2642
2643define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2644;CHECK-LABEL: test_v8i8_post_imm_ld4r:
2645;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
2646 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2647 %tmp = getelementptr i8* %A, i32 4
2648 store i8* %tmp, i8** %ptr
2649 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2650}
2651
2652define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2653;CHECK-LABEL: test_v8i8_post_reg_ld4r:
2654;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2655 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2656 %tmp = getelementptr i8* %A, i64 %inc
2657 store i8* %tmp, i8** %ptr
2658 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2659}
2660
2661declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
2662
2663
2664define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2665;CHECK-LABEL: test_v8i16_post_imm_ld4r:
2666;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
2667 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2668 %tmp = getelementptr i16* %A, i32 4
2669 store i16* %tmp, i16** %ptr
2670 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2671}
2672
2673define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2674;CHECK-LABEL: test_v8i16_post_reg_ld4r:
2675;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2676 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2677 %tmp = getelementptr i16* %A, i64 %inc
2678 store i16* %tmp, i16** %ptr
2679 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2680}
2681
2682declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
2683
2684
2685define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2686;CHECK-LABEL: test_v4i16_post_imm_ld4r:
2687;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
2688 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2689 %tmp = getelementptr i16* %A, i32 4
2690 store i16* %tmp, i16** %ptr
2691 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2692}
2693
2694define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2695;CHECK-LABEL: test_v4i16_post_reg_ld4r:
2696;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2697 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2698 %tmp = getelementptr i16* %A, i64 %inc
2699 store i16* %tmp, i16** %ptr
2700 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2701}
2702
2703declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
2704
2705
2706define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2707;CHECK-LABEL: test_v4i32_post_imm_ld4r:
2708;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2709 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2710 %tmp = getelementptr i32* %A, i32 4
2711 store i32* %tmp, i32** %ptr
2712 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2713}
2714
2715define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2716;CHECK-LABEL: test_v4i32_post_reg_ld4r:
2717;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2718 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2719 %tmp = getelementptr i32* %A, i64 %inc
2720 store i32* %tmp, i32** %ptr
2721 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2722}
2723
2724declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
2725
2726define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2727;CHECK-LABEL: test_v2i32_post_imm_ld4r:
2728;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2729 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2730 %tmp = getelementptr i32* %A, i32 4
2731 store i32* %tmp, i32** %ptr
2732 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2733}
2734
2735define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2736;CHECK-LABEL: test_v2i32_post_reg_ld4r:
2737;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2738 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2739 %tmp = getelementptr i32* %A, i64 %inc
2740 store i32* %tmp, i32** %ptr
2741 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2742}
2743
2744declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
2745
2746
2747define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2748;CHECK-LABEL: test_v2i64_post_imm_ld4r:
2749;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2750 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2751 %tmp = getelementptr i64* %A, i32 4
2752 store i64* %tmp, i64** %ptr
2753 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2754}
2755
2756define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2757;CHECK-LABEL: test_v2i64_post_reg_ld4r:
2758;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2759 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2760 %tmp = getelementptr i64* %A, i64 %inc
2761 store i64* %tmp, i64** %ptr
2762 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2763}
2764
2765declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
2766
2767define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2768;CHECK-LABEL: test_v1i64_post_imm_ld4r:
2769;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2770 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2771 %tmp = getelementptr i64* %A, i32 4
2772 store i64* %tmp, i64** %ptr
2773 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2774}
2775
2776define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2777;CHECK-LABEL: test_v1i64_post_reg_ld4r:
2778;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2779 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2780 %tmp = getelementptr i64* %A, i64 %inc
2781 store i64* %tmp, i64** %ptr
2782 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2783}
2784
2785declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
2786
2787
2788define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2789;CHECK-LABEL: test_v4f32_post_imm_ld4r:
2790;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2791 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2792 %tmp = getelementptr float* %A, i32 4
2793 store float* %tmp, float** %ptr
2794 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2795}
2796
2797define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2798;CHECK-LABEL: test_v4f32_post_reg_ld4r:
2799;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2800 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2801 %tmp = getelementptr float* %A, i64 %inc
2802 store float* %tmp, float** %ptr
2803 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2804}
2805
2806declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
2807
2808define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2809;CHECK-LABEL: test_v2f32_post_imm_ld4r:
2810;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2811 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2812 %tmp = getelementptr float* %A, i32 4
2813 store float* %tmp, float** %ptr
2814 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2815}
2816
2817define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2818;CHECK-LABEL: test_v2f32_post_reg_ld4r:
2819;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2820 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2821 %tmp = getelementptr float* %A, i64 %inc
2822 store float* %tmp, float** %ptr
2823 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2824}
2825
2826declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
2827
2828
2829define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2830;CHECK-LABEL: test_v2f64_post_imm_ld4r:
2831;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2832 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2833 %tmp = getelementptr double* %A, i32 4
2834 store double* %tmp, double** %ptr
2835 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2836}
2837
2838define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2839;CHECK-LABEL: test_v2f64_post_reg_ld4r:
2840;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2841 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2842 %tmp = getelementptr double* %A, i64 %inc
2843 store double* %tmp, double** %ptr
2844 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2845}
2846
2847declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
2848
2849define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2850;CHECK-LABEL: test_v1f64_post_imm_ld4r:
2851;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2852 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2853 %tmp = getelementptr double* %A, i32 4
2854 store double* %tmp, double** %ptr
2855 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2856}
2857
2858define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2859;CHECK-LABEL: test_v1f64_post_reg_ld4r:
2860;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2861 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2862 %tmp = getelementptr double* %A, i64 %inc
2863 store double* %tmp, double** %ptr
2864 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2865}
2866
2867declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
2868
2869
2870define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
2871;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
2872;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2873 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2874 %tmp = getelementptr i8* %A, i32 2
2875 store i8* %tmp, i8** %ptr
2876 ret { <16 x i8>, <16 x i8> } %ld2
2877}
2878
2879define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
2880;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
2881;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2882 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2883 %tmp = getelementptr i8* %A, i64 %inc
2884 store i8* %tmp, i8** %ptr
2885 ret { <16 x i8>, <16 x i8> } %ld2
2886}
2887
2888declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
2889
2890
2891define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
2892;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
2893;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2894 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2895 %tmp = getelementptr i8* %A, i32 2
2896 store i8* %tmp, i8** %ptr
2897 ret { <8 x i8>, <8 x i8> } %ld2
2898}
2899
2900define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
2901;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
2902;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2903 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2904 %tmp = getelementptr i8* %A, i64 %inc
2905 store i8* %tmp, i8** %ptr
2906 ret { <8 x i8>, <8 x i8> } %ld2
2907}
2908
2909declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
2910
2911
2912define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
2913;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
2914;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2915 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2916 %tmp = getelementptr i16* %A, i32 2
2917 store i16* %tmp, i16** %ptr
2918 ret { <8 x i16>, <8 x i16> } %ld2
2919}
2920
2921define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
2922;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
2923;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2924 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2925 %tmp = getelementptr i16* %A, i64 %inc
2926 store i16* %tmp, i16** %ptr
2927 ret { <8 x i16>, <8 x i16> } %ld2
2928}
2929
2930declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
2931
2932
2933define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
2934;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
2935;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2936 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2937 %tmp = getelementptr i16* %A, i32 2
2938 store i16* %tmp, i16** %ptr
2939 ret { <4 x i16>, <4 x i16> } %ld2
2940}
2941
2942define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
2943;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
2944;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2945 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2946 %tmp = getelementptr i16* %A, i64 %inc
2947 store i16* %tmp, i16** %ptr
2948 ret { <4 x i16>, <4 x i16> } %ld2
2949}
2950
2951declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
2952
2953
2954define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
2955;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
2956;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2957 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2958 %tmp = getelementptr i32* %A, i32 2
2959 store i32* %tmp, i32** %ptr
2960 ret { <4 x i32>, <4 x i32> } %ld2
2961}
2962
2963define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
2964;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
2965;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2966 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2967 %tmp = getelementptr i32* %A, i64 %inc
2968 store i32* %tmp, i32** %ptr
2969 ret { <4 x i32>, <4 x i32> } %ld2
2970}
2971
2972declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
2973
2974
2975define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
2976;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
2977;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2978 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2979 %tmp = getelementptr i32* %A, i32 2
2980 store i32* %tmp, i32** %ptr
2981 ret { <2 x i32>, <2 x i32> } %ld2
2982}
2983
2984define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
2985;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
2986;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2987 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2988 %tmp = getelementptr i32* %A, i64 %inc
2989 store i32* %tmp, i32** %ptr
2990 ret { <2 x i32>, <2 x i32> } %ld2
2991}
2992
2993declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
2994
2995
2996define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
2997;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
2998;CHECK: ld2.d { v0, v1 }[0], [x0], #16
2999 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3000 %tmp = getelementptr i64* %A, i32 2
3001 store i64* %tmp, i64** %ptr
3002 ret { <2 x i64>, <2 x i64> } %ld2
3003}
3004
3005define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
3006;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
3007;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3008 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3009 %tmp = getelementptr i64* %A, i64 %inc
3010 store i64* %tmp, i64** %ptr
3011 ret { <2 x i64>, <2 x i64> } %ld2
3012}
3013
3014declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3015
3016
3017define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3018;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
3019;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3020 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3021 %tmp = getelementptr i64* %A, i32 2
3022 store i64* %tmp, i64** %ptr
3023 ret { <1 x i64>, <1 x i64> } %ld2
3024}
3025
3026define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
3027;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
3028;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3029 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3030 %tmp = getelementptr i64* %A, i64 %inc
3031 store i64* %tmp, i64** %ptr
3032 ret { <1 x i64>, <1 x i64> } %ld2
3033}
3034
3035declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3036
3037
3038define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3039;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
3040;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3041 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3042 %tmp = getelementptr float* %A, i32 2
3043 store float* %tmp, float** %ptr
3044 ret { <4 x float>, <4 x float> } %ld2
3045}
3046
3047define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
3048;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
3049;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3050 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3051 %tmp = getelementptr float* %A, i64 %inc
3052 store float* %tmp, float** %ptr
3053 ret { <4 x float>, <4 x float> } %ld2
3054}
3055
3056declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
3057
3058
3059define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3060;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
3061;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3062 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3063 %tmp = getelementptr float* %A, i32 2
3064 store float* %tmp, float** %ptr
3065 ret { <2 x float>, <2 x float> } %ld2
3066}
3067
3068define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
3069;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
3070;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3071 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3072 %tmp = getelementptr float* %A, i64 %inc
3073 store float* %tmp, float** %ptr
3074 ret { <2 x float>, <2 x float> } %ld2
3075}
3076
3077declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
3078
3079
3080define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3081;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
3082;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3083 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3084 %tmp = getelementptr double* %A, i32 2
3085 store double* %tmp, double** %ptr
3086 ret { <2 x double>, <2 x double> } %ld2
3087}
3088
3089define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
3090;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
3091;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3092 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3093 %tmp = getelementptr double* %A, i64 %inc
3094 store double* %tmp, double** %ptr
3095 ret { <2 x double>, <2 x double> } %ld2
3096}
3097
3098declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
3099
3100
3101define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3102;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
3103;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3104 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3105 %tmp = getelementptr double* %A, i32 2
3106 store double* %tmp, double** %ptr
3107 ret { <1 x double>, <1 x double> } %ld2
3108}
3109
3110define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
3111;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
3112;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3113 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3114 %tmp = getelementptr double* %A, i64 %inc
3115 store double* %tmp, double** %ptr
3116 ret { <1 x double>, <1 x double> } %ld2
3117}
3118
3119declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
3120
3121
3122define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3123;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
3124;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3125 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3126 %tmp = getelementptr i8* %A, i32 3
3127 store i8* %tmp, i8** %ptr
3128 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3129}
3130
3131define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3132;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
3133;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3134 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3135 %tmp = getelementptr i8* %A, i64 %inc
3136 store i8* %tmp, i8** %ptr
3137 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3138}
3139
3140declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3141
3142
3143define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3144;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
3145;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3146 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3147 %tmp = getelementptr i8* %A, i32 3
3148 store i8* %tmp, i8** %ptr
3149 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3150}
3151
3152define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3153;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
3154;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3155 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3156 %tmp = getelementptr i8* %A, i64 %inc
3157 store i8* %tmp, i8** %ptr
3158 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3159}
3160
3161declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3162
3163
3164define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3165;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
3166;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3167 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3168 %tmp = getelementptr i16* %A, i32 3
3169 store i16* %tmp, i16** %ptr
3170 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3171}
3172
3173define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3174;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
3175;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3176 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3177 %tmp = getelementptr i16* %A, i64 %inc
3178 store i16* %tmp, i16** %ptr
3179 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3180}
3181
3182declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3183
3184
3185define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3186;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
3187;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3188 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3189 %tmp = getelementptr i16* %A, i32 3
3190 store i16* %tmp, i16** %ptr
3191 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3192}
3193
3194define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3195;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
3196;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3197 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3198 %tmp = getelementptr i16* %A, i64 %inc
3199 store i16* %tmp, i16** %ptr
3200 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3201}
3202
3203declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3204
3205
3206define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3207;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
3208;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3209 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3210 %tmp = getelementptr i32* %A, i32 3
3211 store i32* %tmp, i32** %ptr
3212 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3213}
3214
3215define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3216;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
3217;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3218 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3219 %tmp = getelementptr i32* %A, i64 %inc
3220 store i32* %tmp, i32** %ptr
3221 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3222}
3223
3224declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3225
3226
3227define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3228;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
3229;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3230 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3231 %tmp = getelementptr i32* %A, i32 3
3232 store i32* %tmp, i32** %ptr
3233 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3234}
3235
3236define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3237;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
3238;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3239 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3240 %tmp = getelementptr i32* %A, i64 %inc
3241 store i32* %tmp, i32** %ptr
3242 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3243}
3244
3245declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3246
3247
3248define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3249;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
3250;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3251 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3252 %tmp = getelementptr i64* %A, i32 3
3253 store i64* %tmp, i64** %ptr
3254 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3255}
3256
3257define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3258;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
3259;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3260 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3261 %tmp = getelementptr i64* %A, i64 %inc
3262 store i64* %tmp, i64** %ptr
3263 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3264}
3265
3266declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3267
3268
3269define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3270;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
3271;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3272 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3273 %tmp = getelementptr i64* %A, i32 3
3274 store i64* %tmp, i64** %ptr
3275 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3276}
3277
3278define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3279;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
3280;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3281 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3282 %tmp = getelementptr i64* %A, i64 %inc
3283 store i64* %tmp, i64** %ptr
3284 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3285}
3286
3287declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3288
3289
3290define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3291;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
3292;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3293 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3294 %tmp = getelementptr float* %A, i32 3
3295 store float* %tmp, float** %ptr
3296 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3297}
3298
3299define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3300;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
3301;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3302 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3303 %tmp = getelementptr float* %A, i64 %inc
3304 store float* %tmp, float** %ptr
3305 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3306}
3307
3308declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3309
3310
3311define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3312;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
3313;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3314 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3315 %tmp = getelementptr float* %A, i32 3
3316 store float* %tmp, float** %ptr
3317 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3318}
3319
3320define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3321;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
3322;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3323 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3324 %tmp = getelementptr float* %A, i64 %inc
3325 store float* %tmp, float** %ptr
3326 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3327}
3328
3329declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3330
3331
3332define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3333;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
3334;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3335 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3336 %tmp = getelementptr double* %A, i32 3
3337 store double* %tmp, double** %ptr
3338 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3339}
3340
3341define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3342;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
3343;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3344 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3345 %tmp = getelementptr double* %A, i64 %inc
3346 store double* %tmp, double** %ptr
3347 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3348}
3349
3350declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3351
3352
3353define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3354;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
3355;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3356 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3357 %tmp = getelementptr double* %A, i32 3
3358 store double* %tmp, double** %ptr
3359 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3360}
3361
3362define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3363;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
3364;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3365 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3366 %tmp = getelementptr double* %A, i64 %inc
3367 store double* %tmp, double** %ptr
3368 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3369}
3370
3371declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3372
3373
3374define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3375;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
3376;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3377 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3378 %tmp = getelementptr i8* %A, i32 4
3379 store i8* %tmp, i8** %ptr
3380 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3381}
3382
3383define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3384;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
3385;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3386 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3387 %tmp = getelementptr i8* %A, i64 %inc
3388 store i8* %tmp, i8** %ptr
3389 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3390}
3391
3392declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3393
3394
3395define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3396;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
3397;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3398 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3399 %tmp = getelementptr i8* %A, i32 4
3400 store i8* %tmp, i8** %ptr
3401 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3402}
3403
3404define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3405;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
3406;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3407 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3408 %tmp = getelementptr i8* %A, i64 %inc
3409 store i8* %tmp, i8** %ptr
3410 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3411}
3412
3413declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3414
3415
3416define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3417;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
3418;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3419 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3420 %tmp = getelementptr i16* %A, i32 4
3421 store i16* %tmp, i16** %ptr
3422 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3423}
3424
3425define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3426;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
3427;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3428 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3429 %tmp = getelementptr i16* %A, i64 %inc
3430 store i16* %tmp, i16** %ptr
3431 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3432}
3433
3434declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3435
3436
3437define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3438;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
3439;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3440 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3441 %tmp = getelementptr i16* %A, i32 4
3442 store i16* %tmp, i16** %ptr
3443 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3444}
3445
3446define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3447;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
3448;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3449 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3450 %tmp = getelementptr i16* %A, i64 %inc
3451 store i16* %tmp, i16** %ptr
3452 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3453}
3454
3455declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3456
3457
3458define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3459;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
3460;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3461 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3462 %tmp = getelementptr i32* %A, i32 4
3463 store i32* %tmp, i32** %ptr
3464 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3465}
3466
3467define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3468;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
3469;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3470 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3471 %tmp = getelementptr i32* %A, i64 %inc
3472 store i32* %tmp, i32** %ptr
3473 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3474}
3475
3476declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3477
3478
3479define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3480;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
3481;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3482 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3483 %tmp = getelementptr i32* %A, i32 4
3484 store i32* %tmp, i32** %ptr
3485 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3486}
3487
3488define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3489;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
3490;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3491 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3492 %tmp = getelementptr i32* %A, i64 %inc
3493 store i32* %tmp, i32** %ptr
3494 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3495}
3496
3497declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3498
3499
3500define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3501;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
3502;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3503 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3504 %tmp = getelementptr i64* %A, i32 4
3505 store i64* %tmp, i64** %ptr
3506 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3507}
3508
3509define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3510;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
3511;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3512 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3513 %tmp = getelementptr i64* %A, i64 %inc
3514 store i64* %tmp, i64** %ptr
3515 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3516}
3517
3518declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3519
3520
3521define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3522;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
3523;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3524 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3525 %tmp = getelementptr i64* %A, i32 4
3526 store i64* %tmp, i64** %ptr
3527 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3528}
3529
3530define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3531;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
3532;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3533 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3534 %tmp = getelementptr i64* %A, i64 %inc
3535 store i64* %tmp, i64** %ptr
3536 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3537}
3538
3539declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3540
3541
3542define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3543;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
3544;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3545 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3546 %tmp = getelementptr float* %A, i32 4
3547 store float* %tmp, float** %ptr
3548 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3549}
3550
3551define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3552;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
3553;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3554 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3555 %tmp = getelementptr float* %A, i64 %inc
3556 store float* %tmp, float** %ptr
3557 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3558}
3559
3560declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3561
3562
3563define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3564;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
3565;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3566 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3567 %tmp = getelementptr float* %A, i32 4
3568 store float* %tmp, float** %ptr
3569 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3570}
3571
3572define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3573;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
3574;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3575 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3576 %tmp = getelementptr float* %A, i64 %inc
3577 store float* %tmp, float** %ptr
3578 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3579}
3580
3581declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3582
3583
3584define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3585;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
3586;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3587 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3588 %tmp = getelementptr double* %A, i32 4
3589 store double* %tmp, double** %ptr
3590 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3591}
3592
3593define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3594;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
3595;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3596 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3597 %tmp = getelementptr double* %A, i64 %inc
3598 store double* %tmp, double** %ptr
3599 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3600}
3601
3602declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3603
3604
3605define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3606;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
3607;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3608 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3609 %tmp = getelementptr double* %A, i32 4
3610 store double* %tmp, double** %ptr
3611 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3612}
3613
3614define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3615;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
3616;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3617 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3618 %tmp = getelementptr double* %A, i64 %inc
3619 store double* %tmp, double** %ptr
3620 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3621}
3622
3623declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3624
3625
3626define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
3627;CHECK-LABEL: test_v16i8_post_imm_st2:
3628;CHECK: st2.16b { v0, v1 }, [x0], #32
3629 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3630 %tmp = getelementptr i8* %A, i32 32
3631 ret i8* %tmp
3632}
3633
3634define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
3635;CHECK-LABEL: test_v16i8_post_reg_st2:
3636;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
3637 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3638 %tmp = getelementptr i8* %A, i64 %inc
3639 ret i8* %tmp
3640}
3641
3642declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
3643
3644
3645define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
3646;CHECK-LABEL: test_v8i8_post_imm_st2:
3647;CHECK: st2.8b { v0, v1 }, [x0], #16
3648 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3649 %tmp = getelementptr i8* %A, i32 16
3650 ret i8* %tmp
3651}
3652
3653define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
3654;CHECK-LABEL: test_v8i8_post_reg_st2:
3655;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
3656 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3657 %tmp = getelementptr i8* %A, i64 %inc
3658 ret i8* %tmp
3659}
3660
3661declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
3662
3663
3664define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
3665;CHECK-LABEL: test_v8i16_post_imm_st2:
3666;CHECK: st2.8h { v0, v1 }, [x0], #32
3667 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3668 %tmp = getelementptr i16* %A, i32 16
3669 ret i16* %tmp
3670}
3671
3672define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
3673;CHECK-LABEL: test_v8i16_post_reg_st2:
3674;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
3675 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3676 %tmp = getelementptr i16* %A, i64 %inc
3677 ret i16* %tmp
3678}
3679
3680declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
3681
3682
3683define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
3684;CHECK-LABEL: test_v4i16_post_imm_st2:
3685;CHECK: st2.4h { v0, v1 }, [x0], #16
3686 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3687 %tmp = getelementptr i16* %A, i32 8
3688 ret i16* %tmp
3689}
3690
3691define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
3692;CHECK-LABEL: test_v4i16_post_reg_st2:
3693;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
3694 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3695 %tmp = getelementptr i16* %A, i64 %inc
3696 ret i16* %tmp
3697}
3698
3699declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
3700
3701
3702define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
3703;CHECK-LABEL: test_v4i32_post_imm_st2:
3704;CHECK: st2.4s { v0, v1 }, [x0], #32
3705 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3706 %tmp = getelementptr i32* %A, i32 8
3707 ret i32* %tmp
3708}
3709
3710define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
3711;CHECK-LABEL: test_v4i32_post_reg_st2:
3712;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3713 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3714 %tmp = getelementptr i32* %A, i64 %inc
3715 ret i32* %tmp
3716}
3717
3718declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
3719
3720
3721define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
3722;CHECK-LABEL: test_v2i32_post_imm_st2:
3723;CHECK: st2.2s { v0, v1 }, [x0], #16
3724 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3725 %tmp = getelementptr i32* %A, i32 4
3726 ret i32* %tmp
3727}
3728
3729define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
3730;CHECK-LABEL: test_v2i32_post_reg_st2:
3731;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3732 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3733 %tmp = getelementptr i32* %A, i64 %inc
3734 ret i32* %tmp
3735}
3736
3737declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
3738
3739
3740define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
3741;CHECK-LABEL: test_v2i64_post_imm_st2:
3742;CHECK: st2.2d { v0, v1 }, [x0], #32
3743 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3744 %tmp = getelementptr i64* %A, i64 4
3745 ret i64* %tmp
3746}
3747
3748define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
3749;CHECK-LABEL: test_v2i64_post_reg_st2:
3750;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3751 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3752 %tmp = getelementptr i64* %A, i64 %inc
3753 ret i64* %tmp
3754}
3755
3756declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
3757
3758
3759define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3760;CHECK-LABEL: test_v1i64_post_imm_st2:
3761;CHECK: st1.1d { v0, v1 }, [x0], #16
3762 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3763 %tmp = getelementptr i64* %A, i64 2
3764 ret i64* %tmp
3765}
3766
3767define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
3768;CHECK-LABEL: test_v1i64_post_reg_st2:
3769;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3770 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3771 %tmp = getelementptr i64* %A, i64 %inc
3772 ret i64* %tmp
3773}
3774
3775declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
3776
3777
3778define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3779;CHECK-LABEL: test_v4f32_post_imm_st2:
3780;CHECK: st2.4s { v0, v1 }, [x0], #32
3781 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3782 %tmp = getelementptr float* %A, i32 8
3783 ret float* %tmp
3784}
3785
3786define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
3787;CHECK-LABEL: test_v4f32_post_reg_st2:
3788;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3789 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3790 %tmp = getelementptr float* %A, i64 %inc
3791 ret float* %tmp
3792}
3793
3794declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
3795
3796
3797define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3798;CHECK-LABEL: test_v2f32_post_imm_st2:
3799;CHECK: st2.2s { v0, v1 }, [x0], #16
3800 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3801 %tmp = getelementptr float* %A, i32 4
3802 ret float* %tmp
3803}
3804
3805define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
3806;CHECK-LABEL: test_v2f32_post_reg_st2:
3807;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3808 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3809 %tmp = getelementptr float* %A, i64 %inc
3810 ret float* %tmp
3811}
3812
3813declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
3814
3815
3816define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3817;CHECK-LABEL: test_v2f64_post_imm_st2:
3818;CHECK: st2.2d { v0, v1 }, [x0], #32
3819 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3820 %tmp = getelementptr double* %A, i64 4
3821 ret double* %tmp
3822}
3823
3824define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
3825;CHECK-LABEL: test_v2f64_post_reg_st2:
3826;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3827 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3828 %tmp = getelementptr double* %A, i64 %inc
3829 ret double* %tmp
3830}
3831
3832declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
3833
3834
3835define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3836;CHECK-LABEL: test_v1f64_post_imm_st2:
3837;CHECK: st1.1d { v0, v1 }, [x0], #16
3838 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3839 %tmp = getelementptr double* %A, i64 2
3840 ret double* %tmp
3841}
3842
3843define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
3844;CHECK-LABEL: test_v1f64_post_reg_st2:
3845;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3846 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3847 %tmp = getelementptr double* %A, i64 %inc
3848 ret double* %tmp
3849}
3850
3851declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
3852
3853
3854define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3855;CHECK-LABEL: test_v16i8_post_imm_st3:
3856;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
3857 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3858 %tmp = getelementptr i8* %A, i32 48
3859 ret i8* %tmp
3860}
3861
3862define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
3863;CHECK-LABEL: test_v16i8_post_reg_st3:
3864;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3865 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3866 %tmp = getelementptr i8* %A, i64 %inc
3867 ret i8* %tmp
3868}
3869
3870declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
3871
3872
3873define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3874;CHECK-LABEL: test_v8i8_post_imm_st3:
3875;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
3876 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3877 %tmp = getelementptr i8* %A, i32 24
3878 ret i8* %tmp
3879}
3880
3881define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
3882;CHECK-LABEL: test_v8i8_post_reg_st3:
3883;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3884 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3885 %tmp = getelementptr i8* %A, i64 %inc
3886 ret i8* %tmp
3887}
3888
3889declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
3890
3891
3892define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3893;CHECK-LABEL: test_v8i16_post_imm_st3:
3894;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
3895 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3896 %tmp = getelementptr i16* %A, i32 24
3897 ret i16* %tmp
3898}
3899
3900define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
3901;CHECK-LABEL: test_v8i16_post_reg_st3:
3902;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3903 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3904 %tmp = getelementptr i16* %A, i64 %inc
3905 ret i16* %tmp
3906}
3907
3908declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
3909
3910
3911define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3912;CHECK-LABEL: test_v4i16_post_imm_st3:
3913;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
3914 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3915 %tmp = getelementptr i16* %A, i32 12
3916 ret i16* %tmp
3917}
3918
3919define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
3920;CHECK-LABEL: test_v4i16_post_reg_st3:
3921;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3922 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3923 %tmp = getelementptr i16* %A, i64 %inc
3924 ret i16* %tmp
3925}
3926
3927declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
3928
3929
3930define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3931;CHECK-LABEL: test_v4i32_post_imm_st3:
3932;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
3933 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3934 %tmp = getelementptr i32* %A, i32 12
3935 ret i32* %tmp
3936}
3937
3938define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
3939;CHECK-LABEL: test_v4i32_post_reg_st3:
3940;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3941 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3942 %tmp = getelementptr i32* %A, i64 %inc
3943 ret i32* %tmp
3944}
3945
3946declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
3947
3948
3949define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3950;CHECK-LABEL: test_v2i32_post_imm_st3:
3951;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
3952 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3953 %tmp = getelementptr i32* %A, i32 6
3954 ret i32* %tmp
3955}
3956
3957define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
3958;CHECK-LABEL: test_v2i32_post_reg_st3:
3959;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3960 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3961 %tmp = getelementptr i32* %A, i64 %inc
3962 ret i32* %tmp
3963}
3964
3965declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
3966
3967
3968define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3969;CHECK-LABEL: test_v2i64_post_imm_st3:
3970;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
3971 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3972 %tmp = getelementptr i64* %A, i64 6
3973 ret i64* %tmp
3974}
3975
3976define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
3977;CHECK-LABEL: test_v2i64_post_reg_st3:
3978;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3979 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3980 %tmp = getelementptr i64* %A, i64 %inc
3981 ret i64* %tmp
3982}
3983
3984declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
3985
3986
3987define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3988;CHECK-LABEL: test_v1i64_post_imm_st3:
3989;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
3990 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3991 %tmp = getelementptr i64* %A, i64 3
3992 ret i64* %tmp
3993}
3994
3995define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
3996;CHECK-LABEL: test_v1i64_post_reg_st3:
3997;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3998 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3999 %tmp = getelementptr i64* %A, i64 %inc
4000 ret i64* %tmp
4001}
4002
4003declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4004
4005
4006define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4007;CHECK-LABEL: test_v4f32_post_imm_st3:
4008;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
4009 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4010 %tmp = getelementptr float* %A, i32 12
4011 ret float* %tmp
4012}
4013
4014define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4015;CHECK-LABEL: test_v4f32_post_reg_st3:
4016;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4017 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4018 %tmp = getelementptr float* %A, i64 %inc
4019 ret float* %tmp
4020}
4021
4022declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4023
4024
4025define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4026;CHECK-LABEL: test_v2f32_post_imm_st3:
4027;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
4028 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4029 %tmp = getelementptr float* %A, i32 6
4030 ret float* %tmp
4031}
4032
4033define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4034;CHECK-LABEL: test_v2f32_post_reg_st3:
4035;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4036 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4037 %tmp = getelementptr float* %A, i64 %inc
4038 ret float* %tmp
4039}
4040
4041declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4042
4043
4044define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4045;CHECK-LABEL: test_v2f64_post_imm_st3:
4046;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
4047 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4048 %tmp = getelementptr double* %A, i64 6
4049 ret double* %tmp
4050}
4051
4052define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4053;CHECK-LABEL: test_v2f64_post_reg_st3:
4054;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4055 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4056 %tmp = getelementptr double* %A, i64 %inc
4057 ret double* %tmp
4058}
4059
4060declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4061
4062
4063define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4064;CHECK-LABEL: test_v1f64_post_imm_st3:
4065;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4066 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4067 %tmp = getelementptr double* %A, i64 3
4068 ret double* %tmp
4069}
4070
4071define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4072;CHECK-LABEL: test_v1f64_post_reg_st3:
4073;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4074 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4075 %tmp = getelementptr double* %A, i64 %inc
4076 ret double* %tmp
4077}
4078
4079declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4080
4081
4082define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4083;CHECK-LABEL: test_v16i8_post_imm_st4:
4084;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
4085 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4086 %tmp = getelementptr i8* %A, i32 64
4087 ret i8* %tmp
4088}
4089
4090define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4091;CHECK-LABEL: test_v16i8_post_reg_st4:
4092;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4093 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4094 %tmp = getelementptr i8* %A, i64 %inc
4095 ret i8* %tmp
4096}
4097
4098declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4099
4100
4101define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4102;CHECK-LABEL: test_v8i8_post_imm_st4:
4103;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
4104 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4105 %tmp = getelementptr i8* %A, i32 32
4106 ret i8* %tmp
4107}
4108
4109define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4110;CHECK-LABEL: test_v8i8_post_reg_st4:
4111;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4112 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4113 %tmp = getelementptr i8* %A, i64 %inc
4114 ret i8* %tmp
4115}
4116
4117declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4118
4119
4120define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4121;CHECK-LABEL: test_v8i16_post_imm_st4:
4122;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
4123 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4124 %tmp = getelementptr i16* %A, i32 32
4125 ret i16* %tmp
4126}
4127
4128define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4129;CHECK-LABEL: test_v8i16_post_reg_st4:
4130;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4131 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4132 %tmp = getelementptr i16* %A, i64 %inc
4133 ret i16* %tmp
4134}
4135
4136declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4137
4138
4139define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4140;CHECK-LABEL: test_v4i16_post_imm_st4:
4141;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
4142 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4143 %tmp = getelementptr i16* %A, i32 16
4144 ret i16* %tmp
4145}
4146
4147define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4148;CHECK-LABEL: test_v4i16_post_reg_st4:
4149;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4150 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4151 %tmp = getelementptr i16* %A, i64 %inc
4152 ret i16* %tmp
4153}
4154
4155declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)
4156
4157
4158define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4159;CHECK-LABEL: test_v4i32_post_imm_st4:
4160;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4161 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4162 %tmp = getelementptr i32* %A, i32 16
4163 ret i32* %tmp
4164}
4165
4166define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4167;CHECK-LABEL: test_v4i32_post_reg_st4:
4168;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4169 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4170 %tmp = getelementptr i32* %A, i64 %inc
4171 ret i32* %tmp
4172}
4173
4174declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)
4175
4176
4177define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4178;CHECK-LABEL: test_v2i32_post_imm_st4:
4179;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4180 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4181 %tmp = getelementptr i32* %A, i32 8
4182 ret i32* %tmp
4183}
4184
4185define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4186;CHECK-LABEL: test_v2i32_post_reg_st4:
4187;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4188 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4189 %tmp = getelementptr i32* %A, i64 %inc
4190 ret i32* %tmp
4191}
4192
4193declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4194
4195
4196define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4197;CHECK-LABEL: test_v2i64_post_imm_st4:
4198;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4199 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4200 %tmp = getelementptr i64* %A, i64 8
4201 ret i64* %tmp
4202}
4203
4204define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4205;CHECK-LABEL: test_v2i64_post_reg_st4:
4206;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4207 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4208 %tmp = getelementptr i64* %A, i64 %inc
4209 ret i64* %tmp
4210}
4211
4212declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)
4213
4214
4215define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4216;CHECK-LABEL: test_v1i64_post_imm_st4:
4217;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4218 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4219 %tmp = getelementptr i64* %A, i64 4
4220 ret i64* %tmp
4221}
4222
4223define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4224;CHECK-LABEL: test_v1i64_post_reg_st4:
4225;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4226 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4227 %tmp = getelementptr i64* %A, i64 %inc
4228 ret i64* %tmp
4229}
4230
4231declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)
4232
4233
4234define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4235;CHECK-LABEL: test_v4f32_post_imm_st4:
4236;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4237 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4238 %tmp = getelementptr float* %A, i32 16
4239 ret float* %tmp
4240}
4241
4242define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4243;CHECK-LABEL: test_v4f32_post_reg_st4:
4244;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4245 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4246 %tmp = getelementptr float* %A, i64 %inc
4247 ret float* %tmp
4248}
4249
4250declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4251
4252
4253define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4254;CHECK-LABEL: test_v2f32_post_imm_st4:
4255;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4256 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4257 %tmp = getelementptr float* %A, i32 8
4258 ret float* %tmp
4259}
4260
4261define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4262;CHECK-LABEL: test_v2f32_post_reg_st4:
4263;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4264 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4265 %tmp = getelementptr float* %A, i64 %inc
4266 ret float* %tmp
4267}
4268
4269declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4270
4271
4272define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4273;CHECK-LABEL: test_v2f64_post_imm_st4:
4274;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4275 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4276 %tmp = getelementptr double* %A, i64 8
4277 ret double* %tmp
4278}
4279
4280define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4281;CHECK-LABEL: test_v2f64_post_reg_st4:
4282;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4283 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4284 %tmp = getelementptr double* %A, i64 %inc
4285 ret double* %tmp
4286}
4287
4288declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)
4289
4290
4291define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4292;CHECK-LABEL: test_v1f64_post_imm_st4:
4293;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4294 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4295 %tmp = getelementptr double* %A, i64 4
4296 ret double* %tmp
4297}
4298
4299define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4300;CHECK-LABEL: test_v1f64_post_reg_st4:
4301;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4302 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4303 %tmp = getelementptr double* %A, i64 %inc
4304 ret double* %tmp
4305}
4306
4307declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4308
4309
4310define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
4311;CHECK-LABEL: test_v16i8_post_imm_st1x2:
4312;CHECK: st1.16b { v0, v1 }, [x0], #32
4313 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4314 %tmp = getelementptr i8* %A, i32 32
4315 ret i8* %tmp
4316}
4317
4318define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
4319;CHECK-LABEL: test_v16i8_post_reg_st1x2:
4320;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
4321 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4322 %tmp = getelementptr i8* %A, i64 %inc
4323 ret i8* %tmp
4324}
4325
4326declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
4327
4328
4329define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
4330;CHECK-LABEL: test_v8i8_post_imm_st1x2:
4331;CHECK: st1.8b { v0, v1 }, [x0], #16
4332 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4333 %tmp = getelementptr i8* %A, i32 16
4334 ret i8* %tmp
4335}
4336
4337define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
4338;CHECK-LABEL: test_v8i8_post_reg_st1x2:
4339;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
4340 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4341 %tmp = getelementptr i8* %A, i64 %inc
4342 ret i8* %tmp
4343}
4344
4345declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
4346
4347
4348define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
4349;CHECK-LABEL: test_v8i16_post_imm_st1x2:
4350;CHECK: st1.8h { v0, v1 }, [x0], #32
4351 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4352 %tmp = getelementptr i16* %A, i32 16
4353 ret i16* %tmp
4354}
4355
4356define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
4357;CHECK-LABEL: test_v8i16_post_reg_st1x2:
4358;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
4359 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4360 %tmp = getelementptr i16* %A, i64 %inc
4361 ret i16* %tmp
4362}
4363
4364declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
4365
4366
4367define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
4368;CHECK-LABEL: test_v4i16_post_imm_st1x2:
4369;CHECK: st1.4h { v0, v1 }, [x0], #16
4370 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4371 %tmp = getelementptr i16* %A, i32 8
4372 ret i16* %tmp
4373}
4374
4375define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
4376;CHECK-LABEL: test_v4i16_post_reg_st1x2:
4377;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
4378 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4379 %tmp = getelementptr i16* %A, i64 %inc
4380 ret i16* %tmp
4381}
4382
4383declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
4384
4385
4386define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
4387;CHECK-LABEL: test_v4i32_post_imm_st1x2:
4388;CHECK: st1.4s { v0, v1 }, [x0], #32
4389 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4390 %tmp = getelementptr i32* %A, i32 8
4391 ret i32* %tmp
4392}
4393
4394define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
4395;CHECK-LABEL: test_v4i32_post_reg_st1x2:
4396;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4397 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4398 %tmp = getelementptr i32* %A, i64 %inc
4399 ret i32* %tmp
4400}
4401
4402declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
4403
4404
4405define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
4406;CHECK-LABEL: test_v2i32_post_imm_st1x2:
4407;CHECK: st1.2s { v0, v1 }, [x0], #16
4408 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4409 %tmp = getelementptr i32* %A, i32 4
4410 ret i32* %tmp
4411}
4412
4413define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
4414;CHECK-LABEL: test_v2i32_post_reg_st1x2:
4415;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4416 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4417 %tmp = getelementptr i32* %A, i64 %inc
4418 ret i32* %tmp
4419}
4420
4421declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
4422
4423
4424define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
4425;CHECK-LABEL: test_v2i64_post_imm_st1x2:
4426;CHECK: st1.2d { v0, v1 }, [x0], #32
4427 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4428 %tmp = getelementptr i64* %A, i64 4
4429 ret i64* %tmp
4430}
4431
4432define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
4433;CHECK-LABEL: test_v2i64_post_reg_st1x2:
4434;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4435 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4436 %tmp = getelementptr i64* %A, i64 %inc
4437 ret i64* %tmp
4438}
4439
4440declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
4441
4442
4443define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
4444;CHECK-LABEL: test_v1i64_post_imm_st1x2:
4445;CHECK: st1.1d { v0, v1 }, [x0], #16
4446 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4447 %tmp = getelementptr i64* %A, i64 2
4448 ret i64* %tmp
4449}
4450
4451define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
4452;CHECK-LABEL: test_v1i64_post_reg_st1x2:
4453;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4454 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4455 %tmp = getelementptr i64* %A, i64 %inc
4456 ret i64* %tmp
4457}
4458
4459declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
4460
4461
4462define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
4463;CHECK-LABEL: test_v4f32_post_imm_st1x2:
4464;CHECK: st1.4s { v0, v1 }, [x0], #32
4465 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4466 %tmp = getelementptr float* %A, i32 8
4467 ret float* %tmp
4468}
4469
4470define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
4471;CHECK-LABEL: test_v4f32_post_reg_st1x2:
4472;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4473 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4474 %tmp = getelementptr float* %A, i64 %inc
4475 ret float* %tmp
4476}
4477
4478declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
4479
4480
4481define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
4482;CHECK-LABEL: test_v2f32_post_imm_st1x2:
4483;CHECK: st1.2s { v0, v1 }, [x0], #16
4484 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4485 %tmp = getelementptr float* %A, i32 4
4486 ret float* %tmp
4487}
4488
4489define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
4490;CHECK-LABEL: test_v2f32_post_reg_st1x2:
4491;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4492 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4493 %tmp = getelementptr float* %A, i64 %inc
4494 ret float* %tmp
4495}
4496
4497declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
4498
4499
4500define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
4501;CHECK-LABEL: test_v2f64_post_imm_st1x2:
4502;CHECK: st1.2d { v0, v1 }, [x0], #32
4503 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4504 %tmp = getelementptr double* %A, i64 4
4505 ret double* %tmp
4506}
4507
4508define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
4509;CHECK-LABEL: test_v2f64_post_reg_st1x2:
4510;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4511 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4512 %tmp = getelementptr double* %A, i64 %inc
4513 ret double* %tmp
4514}
4515
4516declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
4517
4518
4519define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
4520;CHECK-LABEL: test_v1f64_post_imm_st1x2:
4521;CHECK: st1.1d { v0, v1 }, [x0], #16
4522 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4523 %tmp = getelementptr double* %A, i64 2
4524 ret double* %tmp
4525}
4526
4527define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
4528;CHECK-LABEL: test_v1f64_post_reg_st1x2:
4529;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4530 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4531 %tmp = getelementptr double* %A, i64 %inc
4532 ret double* %tmp
4533}
4534
4535declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
4536
4537
4538define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
4539;CHECK-LABEL: test_v16i8_post_imm_st1x3:
4540;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
4541 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4542 %tmp = getelementptr i8* %A, i32 48
4543 ret i8* %tmp
4544}
4545
4546define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
4547;CHECK-LABEL: test_v16i8_post_reg_st1x3:
4548;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4549 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4550 %tmp = getelementptr i8* %A, i64 %inc
4551 ret i8* %tmp
4552}
4553
4554declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
4555
4556
4557define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
4558;CHECK-LABEL: test_v8i8_post_imm_st1x3:
4559;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
4560 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4561 %tmp = getelementptr i8* %A, i32 24
4562 ret i8* %tmp
4563}
4564
4565define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
4566;CHECK-LABEL: test_v8i8_post_reg_st1x3:
4567;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4568 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4569 %tmp = getelementptr i8* %A, i64 %inc
4570 ret i8* %tmp
4571}
4572
4573declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
4574
4575
4576define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
4577;CHECK-LABEL: test_v8i16_post_imm_st1x3:
4578;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
4579 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4580 %tmp = getelementptr i16* %A, i32 24
4581 ret i16* %tmp
4582}
4583
4584define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
4585;CHECK-LABEL: test_v8i16_post_reg_st1x3:
4586;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4587 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4588 %tmp = getelementptr i16* %A, i64 %inc
4589 ret i16* %tmp
4590}
4591
4592declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
4593
4594
4595define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
4596;CHECK-LABEL: test_v4i16_post_imm_st1x3:
4597;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
4598 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4599 %tmp = getelementptr i16* %A, i32 12
4600 ret i16* %tmp
4601}
4602
4603define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
4604;CHECK-LABEL: test_v4i16_post_reg_st1x3:
4605;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4606 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4607 %tmp = getelementptr i16* %A, i64 %inc
4608 ret i16* %tmp
4609}
4610
4611declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
4612
4613
4614define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
4615;CHECK-LABEL: test_v4i32_post_imm_st1x3:
4616;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4617 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4618 %tmp = getelementptr i32* %A, i32 12
4619 ret i32* %tmp
4620}
4621
4622define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
4623;CHECK-LABEL: test_v4i32_post_reg_st1x3:
4624;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4625 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4626 %tmp = getelementptr i32* %A, i64 %inc
4627 ret i32* %tmp
4628}
4629
4630declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
4631
4632
4633define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
4634;CHECK-LABEL: test_v2i32_post_imm_st1x3:
4635;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4636 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4637 %tmp = getelementptr i32* %A, i32 6
4638 ret i32* %tmp
4639}
4640
4641define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
4642;CHECK-LABEL: test_v2i32_post_reg_st1x3:
4643;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4644 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4645 %tmp = getelementptr i32* %A, i64 %inc
4646 ret i32* %tmp
4647}
4648
4649declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
4650
4651
4652define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
4653;CHECK-LABEL: test_v2i64_post_imm_st1x3:
4654;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4655 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4656 %tmp = getelementptr i64* %A, i64 6
4657 ret i64* %tmp
4658}
4659
4660define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
4661;CHECK-LABEL: test_v2i64_post_reg_st1x3:
4662;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4663 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4664 %tmp = getelementptr i64* %A, i64 %inc
4665 ret i64* %tmp
4666}
4667
4668declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
4669
4670
4671define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
4672;CHECK-LABEL: test_v1i64_post_imm_st1x3:
4673;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4674 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4675 %tmp = getelementptr i64* %A, i64 3
4676 ret i64* %tmp
4677}
4678
4679define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
4680;CHECK-LABEL: test_v1i64_post_reg_st1x3:
4681;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4682 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4683 %tmp = getelementptr i64* %A, i64 %inc
4684 ret i64* %tmp
4685}
4686
4687declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4688
4689
4690define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4691;CHECK-LABEL: test_v4f32_post_imm_st1x3:
4692;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4693 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4694 %tmp = getelementptr float* %A, i32 12
4695 ret float* %tmp
4696}
4697
4698define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4699;CHECK-LABEL: test_v4f32_post_reg_st1x3:
4700;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4701 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4702 %tmp = getelementptr float* %A, i64 %inc
4703 ret float* %tmp
4704}
4705
4706declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4707
4708
4709define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4710;CHECK-LABEL: test_v2f32_post_imm_st1x3:
4711;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4712 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4713 %tmp = getelementptr float* %A, i32 6
4714 ret float* %tmp
4715}
4716
4717define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4718;CHECK-LABEL: test_v2f32_post_reg_st1x3:
4719;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4720 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4721 %tmp = getelementptr float* %A, i64 %inc
4722 ret float* %tmp
4723}
4724
4725declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4726
4727
4728define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4729;CHECK-LABEL: test_v2f64_post_imm_st1x3:
4730;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4731 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4732 %tmp = getelementptr double* %A, i64 6
4733 ret double* %tmp
4734}
4735
4736define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4737;CHECK-LABEL: test_v2f64_post_reg_st1x3:
4738;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4739 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4740 %tmp = getelementptr double* %A, i64 %inc
4741 ret double* %tmp
4742}
4743
4744declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4745
4746
4747define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4748;CHECK-LABEL: test_v1f64_post_imm_st1x3:
4749;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4750 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4751 %tmp = getelementptr double* %A, i64 3
4752 ret double* %tmp
4753}
4754
4755define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4756;CHECK-LABEL: test_v1f64_post_reg_st1x3:
4757;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4758 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4759 %tmp = getelementptr double* %A, i64 %inc
4760 ret double* %tmp
4761}
4762
4763declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4764
4765
4766define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4767;CHECK-LABEL: test_v16i8_post_imm_st1x4:
4768;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
4769 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4770 %tmp = getelementptr i8* %A, i32 64
4771 ret i8* %tmp
4772}
4773
4774define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4775;CHECK-LABEL: test_v16i8_post_reg_st1x4:
4776;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4777 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4778 %tmp = getelementptr i8* %A, i64 %inc
4779 ret i8* %tmp
4780}
4781
4782declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4783
4784
4785define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4786;CHECK-LABEL: test_v8i8_post_imm_st1x4:
4787;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
4788 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4789 %tmp = getelementptr i8* %A, i32 32
4790 ret i8* %tmp
4791}
4792
4793define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4794;CHECK-LABEL: test_v8i8_post_reg_st1x4:
4795;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4796 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4797 %tmp = getelementptr i8* %A, i64 %inc
4798 ret i8* %tmp
4799}
4800
4801declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4802
4803
4804define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4805;CHECK-LABEL: test_v8i16_post_imm_st1x4:
4806;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
4807 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4808 %tmp = getelementptr i16* %A, i32 32
4809 ret i16* %tmp
4810}
4811
4812define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4813;CHECK-LABEL: test_v8i16_post_reg_st1x4:
4814;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4815 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4816 %tmp = getelementptr i16* %A, i64 %inc
4817 ret i16* %tmp
4818}
4819
4820declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4821
4822
4823define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4824;CHECK-LABEL: test_v4i16_post_imm_st1x4:
4825;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
4826 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4827 %tmp = getelementptr i16* %A, i32 16
4828 ret i16* %tmp
4829}
4830
4831define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4832;CHECK-LABEL: test_v4i16_post_reg_st1x4:
4833;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4834 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4835 %tmp = getelementptr i16* %A, i64 %inc
4836 ret i16* %tmp
4837}
4838
4839declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)
4840
4841
4842define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4843;CHECK-LABEL: test_v4i32_post_imm_st1x4:
4844;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4845 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4846 %tmp = getelementptr i32* %A, i32 16
4847 ret i32* %tmp
4848}
4849
4850define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4851;CHECK-LABEL: test_v4i32_post_reg_st1x4:
4852;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4853 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4854 %tmp = getelementptr i32* %A, i64 %inc
4855 ret i32* %tmp
4856}
4857
4858declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)
4859
4860
4861define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4862;CHECK-LABEL: test_v2i32_post_imm_st1x4:
4863;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4864 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4865 %tmp = getelementptr i32* %A, i32 8
4866 ret i32* %tmp
4867}
4868
4869define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4870;CHECK-LABEL: test_v2i32_post_reg_st1x4:
4871;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4872 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4873 %tmp = getelementptr i32* %A, i64 %inc
4874 ret i32* %tmp
4875}
4876
4877declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4878
4879
4880define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4881;CHECK-LABEL: test_v2i64_post_imm_st1x4:
4882;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4883 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4884 %tmp = getelementptr i64* %A, i64 8
4885 ret i64* %tmp
4886}
4887
4888define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4889;CHECK-LABEL: test_v2i64_post_reg_st1x4:
4890;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4891 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4892 %tmp = getelementptr i64* %A, i64 %inc
4893 ret i64* %tmp
4894}
4895
4896declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)
4897
4898
4899define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4900;CHECK-LABEL: test_v1i64_post_imm_st1x4:
4901;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4902 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4903 %tmp = getelementptr i64* %A, i64 4
4904 ret i64* %tmp
4905}
4906
4907define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4908;CHECK-LABEL: test_v1i64_post_reg_st1x4:
4909;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4910 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4911 %tmp = getelementptr i64* %A, i64 %inc
4912 ret i64* %tmp
4913}
4914
4915declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)
4916
4917
4918define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4919;CHECK-LABEL: test_v4f32_post_imm_st1x4:
4920;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4921 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4922 %tmp = getelementptr float* %A, i32 16
4923 ret float* %tmp
4924}
4925
4926define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4927;CHECK-LABEL: test_v4f32_post_reg_st1x4:
4928;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4929 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4930 %tmp = getelementptr float* %A, i64 %inc
4931 ret float* %tmp
4932}
4933
4934declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4935
4936
4937define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4938;CHECK-LABEL: test_v2f32_post_imm_st1x4:
4939;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4940 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4941 %tmp = getelementptr float* %A, i32 8
4942 ret float* %tmp
4943}
4944
4945define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4946;CHECK-LABEL: test_v2f32_post_reg_st1x4:
4947;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4948 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4949 %tmp = getelementptr float* %A, i64 %inc
4950 ret float* %tmp
4951}
4952
4953declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4954
4955
4956define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4957;CHECK-LABEL: test_v2f64_post_imm_st1x4:
4958;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4959 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4960 %tmp = getelementptr double* %A, i64 8
4961 ret double* %tmp
4962}
4963
4964define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4965;CHECK-LABEL: test_v2f64_post_reg_st1x4:
4966;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4967 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4968 %tmp = getelementptr double* %A, i64 %inc
4969 ret double* %tmp
4970}
4971
4972declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)
4973
4974
4975define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4976;CHECK-LABEL: test_v1f64_post_imm_st1x4:
4977;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4978 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4979 %tmp = getelementptr double* %A, i64 4
4980 ret double* %tmp
4981}
4982
4983define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4984;CHECK-LABEL: test_v1f64_post_reg_st1x4:
4985;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4986 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4987 %tmp = getelementptr double* %A, i64 %inc
4988 ret double* %tmp
4989}
4990
4991declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4992
4993
4994define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
4995 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
4996 %tmp = getelementptr i8* %A, i32 2
4997 ret i8* %tmp
4998}
4999
5000define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
5001 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
5002 %tmp = getelementptr i8* %A, i64 %inc
5003 ret i8* %tmp
5004}
5005
5006declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
5007
5008
5009define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
5010;CHECK-LABEL: test_v16i8_post_imm_st2lane:
5011;CHECK: st2.b { v0, v1 }[0], [x0], #2
5012 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5013 %tmp = getelementptr i8* %A, i32 2
5014 ret i8* %tmp
5015}
5016
5017define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
5018;CHECK-LABEL: test_v16i8_post_reg_st2lane:
5019;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5020 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5021 %tmp = getelementptr i8* %A, i64 %inc
5022 ret i8* %tmp
5023}
5024
5025declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
5026
5027
5028define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
5029;CHECK-LABEL: test_v8i8_post_imm_st2lane:
5030;CHECK: st2.b { v0, v1 }[0], [x0], #2
5031 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5032 %tmp = getelementptr i8* %A, i32 2
5033 ret i8* %tmp
5034}
5035
5036define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
5037;CHECK-LABEL: test_v8i8_post_reg_st2lane:
5038;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5039 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5040 %tmp = getelementptr i8* %A, i64 %inc
5041 ret i8* %tmp
5042}
5043
5044declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
5045
5046
5047define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
5048;CHECK-LABEL: test_v8i16_post_imm_st2lane:
5049;CHECK: st2.h { v0, v1 }[0], [x0], #4
5050 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5051 %tmp = getelementptr i16* %A, i32 2
5052 ret i16* %tmp
5053}
5054
5055define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
5056;CHECK-LABEL: test_v8i16_post_reg_st2lane:
5057;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5058 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5059 %tmp = getelementptr i16* %A, i64 %inc
5060 ret i16* %tmp
5061}
5062
5063declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
5064
5065
5066define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
5067;CHECK-LABEL: test_v4i16_post_imm_st2lane:
5068;CHECK: st2.h { v0, v1 }[0], [x0], #4
5069 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5070 %tmp = getelementptr i16* %A, i32 2
5071 ret i16* %tmp
5072}
5073
5074define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
5075;CHECK-LABEL: test_v4i16_post_reg_st2lane:
5076;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5077 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5078 %tmp = getelementptr i16* %A, i64 %inc
5079 ret i16* %tmp
5080}
5081
5082declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
5083
5084
5085define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
5086;CHECK-LABEL: test_v4i32_post_imm_st2lane:
5087;CHECK: st2.s { v0, v1 }[0], [x0], #8
5088 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5089 %tmp = getelementptr i32* %A, i32 2
5090 ret i32* %tmp
5091}
5092
5093define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
5094;CHECK-LABEL: test_v4i32_post_reg_st2lane:
5095;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5096 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5097 %tmp = getelementptr i32* %A, i64 %inc
5098 ret i32* %tmp
5099}
5100
5101declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
5102
5103
5104define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
5105;CHECK-LABEL: test_v2i32_post_imm_st2lane:
5106;CHECK: st2.s { v0, v1 }[0], [x0], #8
5107 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5108 %tmp = getelementptr i32* %A, i32 2
5109 ret i32* %tmp
5110}
5111
5112define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
5113;CHECK-LABEL: test_v2i32_post_reg_st2lane:
5114;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5115 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5116 %tmp = getelementptr i32* %A, i64 %inc
5117 ret i32* %tmp
5118}
5119
5120declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
5121
5122
5123define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
5124;CHECK-LABEL: test_v2i64_post_imm_st2lane:
5125;CHECK: st2.d { v0, v1 }[0], [x0], #16
5126 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5127 %tmp = getelementptr i64* %A, i64 2
5128 ret i64* %tmp
5129}
5130
5131define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
5132;CHECK-LABEL: test_v2i64_post_reg_st2lane:
5133;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5134 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5135 %tmp = getelementptr i64* %A, i64 %inc
5136 ret i64* %tmp
5137}
5138
5139declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
5140
5141
5142define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
5143;CHECK-LABEL: test_v1i64_post_imm_st2lane:
5144;CHECK: st2.d { v0, v1 }[0], [x0], #16
5145 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5146 %tmp = getelementptr i64* %A, i64 2
5147 ret i64* %tmp
5148}
5149
5150define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
5151;CHECK-LABEL: test_v1i64_post_reg_st2lane:
5152;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5153 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5154 %tmp = getelementptr i64* %A, i64 %inc
5155 ret i64* %tmp
5156}
5157
5158declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
5159
5160
5161define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
5162;CHECK-LABEL: test_v4f32_post_imm_st2lane:
5163;CHECK: st2.s { v0, v1 }[0], [x0], #8
5164 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5165 %tmp = getelementptr float* %A, i32 2
5166 ret float* %tmp
5167}
5168
5169define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
5170;CHECK-LABEL: test_v4f32_post_reg_st2lane:
5171;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5172 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5173 %tmp = getelementptr float* %A, i64 %inc
5174 ret float* %tmp
5175}
5176
5177declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
5178
5179
5180define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
5181;CHECK-LABEL: test_v2f32_post_imm_st2lane:
5182;CHECK: st2.s { v0, v1 }[0], [x0], #8
5183 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5184 %tmp = getelementptr float* %A, i32 2
5185 ret float* %tmp
5186}
5187
5188define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
5189;CHECK-LABEL: test_v2f32_post_reg_st2lane:
5190;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5191 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5192 %tmp = getelementptr float* %A, i64 %inc
5193 ret float* %tmp
5194}
5195
5196declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
5197
5198
5199define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
5200;CHECK-LABEL: test_v2f64_post_imm_st2lane:
5201;CHECK: st2.d { v0, v1 }[0], [x0], #16
5202 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5203 %tmp = getelementptr double* %A, i64 2
5204 ret double* %tmp
5205}
5206
5207define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
5208;CHECK-LABEL: test_v2f64_post_reg_st2lane:
5209;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5210 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5211 %tmp = getelementptr double* %A, i64 %inc
5212 ret double* %tmp
5213}
5214
5215declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
5216
5217
5218define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
5219;CHECK-LABEL: test_v1f64_post_imm_st2lane:
5220;CHECK: st2.d { v0, v1 }[0], [x0], #16
5221 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5222 %tmp = getelementptr double* %A, i64 2
5223 ret double* %tmp
5224}
5225
5226define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
5227;CHECK-LABEL: test_v1f64_post_reg_st2lane:
5228;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5229 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5230 %tmp = getelementptr double* %A, i64 %inc
5231 ret double* %tmp
5232}
5233
5234declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
5235
5236
5237define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
5238;CHECK-LABEL: test_v16i8_post_imm_st3lane:
5239;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5240 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5241 %tmp = getelementptr i8* %A, i32 3
5242 ret i8* %tmp
5243}
5244
5245define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
5246;CHECK-LABEL: test_v16i8_post_reg_st3lane:
5247;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5248 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5249 %tmp = getelementptr i8* %A, i64 %inc
5250 ret i8* %tmp
5251}
5252
5253declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5254
5255
5256define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
5257;CHECK-LABEL: test_v8i8_post_imm_st3lane:
5258;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5259 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5260 %tmp = getelementptr i8* %A, i32 3
5261 ret i8* %tmp
5262}
5263
5264define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
5265;CHECK-LABEL: test_v8i8_post_reg_st3lane:
5266;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5267 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5268 %tmp = getelementptr i8* %A, i64 %inc
5269 ret i8* %tmp
5270}
5271
5272declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5273
5274
5275define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
5276;CHECK-LABEL: test_v8i16_post_imm_st3lane:
5277;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5278 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5279 %tmp = getelementptr i16* %A, i32 3
5280 ret i16* %tmp
5281}
5282
5283define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
5284;CHECK-LABEL: test_v8i16_post_reg_st3lane:
5285;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5286 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5287 %tmp = getelementptr i16* %A, i64 %inc
5288 ret i16* %tmp
5289}
5290
5291declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5292
5293
5294define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
5295;CHECK-LABEL: test_v4i16_post_imm_st3lane:
5296;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5297 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5298 %tmp = getelementptr i16* %A, i32 3
5299 ret i16* %tmp
5300}
5301
5302define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
5303;CHECK-LABEL: test_v4i16_post_reg_st3lane:
5304;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5305 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5306 %tmp = getelementptr i16* %A, i64 %inc
5307 ret i16* %tmp
5308}
5309
5310declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5311
5312
5313define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
5314;CHECK-LABEL: test_v4i32_post_imm_st3lane:
5315;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5316 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5317 %tmp = getelementptr i32* %A, i32 3
5318 ret i32* %tmp
5319}
5320
5321define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
5322;CHECK-LABEL: test_v4i32_post_reg_st3lane:
5323;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5324 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5325 %tmp = getelementptr i32* %A, i64 %inc
5326 ret i32* %tmp
5327}
5328
5329declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5330
5331
5332define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
5333;CHECK-LABEL: test_v2i32_post_imm_st3lane:
5334;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5335 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5336 %tmp = getelementptr i32* %A, i32 3
5337 ret i32* %tmp
5338}
5339
5340define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
5341;CHECK-LABEL: test_v2i32_post_reg_st3lane:
5342;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5343 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5344 %tmp = getelementptr i32* %A, i64 %inc
5345 ret i32* %tmp
5346}
5347
5348declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5349
5350
5351define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
5352;CHECK-LABEL: test_v2i64_post_imm_st3lane:
5353;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5354 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5355 %tmp = getelementptr i64* %A, i64 3
5356 ret i64* %tmp
5357}
5358
5359define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
5360;CHECK-LABEL: test_v2i64_post_reg_st3lane:
5361;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5362 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5363 %tmp = getelementptr i64* %A, i64 %inc
5364 ret i64* %tmp
5365}
5366
5367declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5368
5369
5370define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
5371;CHECK-LABEL: test_v1i64_post_imm_st3lane:
5372;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5373 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5374 %tmp = getelementptr i64* %A, i64 3
5375 ret i64* %tmp
5376}
5377
5378define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
5379;CHECK-LABEL: test_v1i64_post_reg_st3lane:
5380;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5381 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5382 %tmp = getelementptr i64* %A, i64 %inc
5383 ret i64* %tmp
5384}
5385
5386declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5387
5388
5389define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
5390;CHECK-LABEL: test_v4f32_post_imm_st3lane:
5391;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5392 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5393 %tmp = getelementptr float* %A, i32 3
5394 ret float* %tmp
5395}
5396
5397define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
5398;CHECK-LABEL: test_v4f32_post_reg_st3lane:
5399;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5400 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5401 %tmp = getelementptr float* %A, i64 %inc
5402 ret float* %tmp
5403}
5404
5405declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
5406
5407
5408define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
5409;CHECK-LABEL: test_v2f32_post_imm_st3lane:
5410;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5411 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5412 %tmp = getelementptr float* %A, i32 3
5413 ret float* %tmp
5414}
5415
5416define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
5417;CHECK-LABEL: test_v2f32_post_reg_st3lane:
5418;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5419 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5420 %tmp = getelementptr float* %A, i64 %inc
5421 ret float* %tmp
5422}
5423
5424declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
5425
5426
5427define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
5428;CHECK-LABEL: test_v2f64_post_imm_st3lane:
5429;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5430 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5431 %tmp = getelementptr double* %A, i64 3
5432 ret double* %tmp
5433}
5434
5435define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
5436;CHECK-LABEL: test_v2f64_post_reg_st3lane:
5437;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5438 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5439 %tmp = getelementptr double* %A, i64 %inc
5440 ret double* %tmp
5441}
5442
5443declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
5444
5445
5446define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
5447;CHECK-LABEL: test_v1f64_post_imm_st3lane:
5448;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5449 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5450 %tmp = getelementptr double* %A, i64 3
5451 ret double* %tmp
5452}
5453
5454define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
5455;CHECK-LABEL: test_v1f64_post_reg_st3lane:
5456;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5457 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5458 %tmp = getelementptr double* %A, i64 %inc
5459 ret double* %tmp
5460}
5461
5462declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
5463
5464
5465define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
5466;CHECK-LABEL: test_v16i8_post_imm_st4lane:
5467;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5468 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5469 %tmp = getelementptr i8* %A, i32 4
5470 ret i8* %tmp
5471}
5472
5473define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
5474;CHECK-LABEL: test_v16i8_post_reg_st4lane:
5475;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5476 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5477 %tmp = getelementptr i8* %A, i64 %inc
5478 ret i8* %tmp
5479}
5480
5481declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5482
5483
5484define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
5485;CHECK-LABEL: test_v8i8_post_imm_st4lane:
5486;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5487 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5488 %tmp = getelementptr i8* %A, i32 4
5489 ret i8* %tmp
5490}
5491
5492define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
5493;CHECK-LABEL: test_v8i8_post_reg_st4lane:
5494;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5495 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5496 %tmp = getelementptr i8* %A, i64 %inc
5497 ret i8* %tmp
5498}
5499
5500declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5501
5502
5503define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
5504;CHECK-LABEL: test_v8i16_post_imm_st4lane:
5505;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5506 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5507 %tmp = getelementptr i16* %A, i32 4
5508 ret i16* %tmp
5509}
5510
5511define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
5512;CHECK-LABEL: test_v8i16_post_reg_st4lane:
5513;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5514 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5515 %tmp = getelementptr i16* %A, i64 %inc
5516 ret i16* %tmp
5517}
5518
5519declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5520
5521
5522define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
5523;CHECK-LABEL: test_v4i16_post_imm_st4lane:
5524;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5525 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5526 %tmp = getelementptr i16* %A, i32 4
5527 ret i16* %tmp
5528}
5529
5530define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
5531;CHECK-LABEL: test_v4i16_post_reg_st4lane:
5532;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5533 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5534 %tmp = getelementptr i16* %A, i64 %inc
5535 ret i16* %tmp
5536}
5537
5538declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5539
5540
5541define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
5542;CHECK-LABEL: test_v4i32_post_imm_st4lane:
5543;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5544 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5545 %tmp = getelementptr i32* %A, i32 4
5546 ret i32* %tmp
5547}
5548
5549define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
5550;CHECK-LABEL: test_v4i32_post_reg_st4lane:
5551;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5552 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5553 %tmp = getelementptr i32* %A, i64 %inc
5554 ret i32* %tmp
5555}
5556
5557declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5558
5559
5560define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
5561;CHECK-LABEL: test_v2i32_post_imm_st4lane:
5562;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5563 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5564 %tmp = getelementptr i32* %A, i32 4
5565 ret i32* %tmp
5566}
5567
5568define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
5569;CHECK-LABEL: test_v2i32_post_reg_st4lane:
5570;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5571 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5572 %tmp = getelementptr i32* %A, i64 %inc
5573 ret i32* %tmp
5574}
5575
5576declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5577
5578
5579define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
5580;CHECK-LABEL: test_v2i64_post_imm_st4lane:
5581;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5582 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5583 %tmp = getelementptr i64* %A, i64 4
5584 ret i64* %tmp
5585}
5586
5587define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
5588;CHECK-LABEL: test_v2i64_post_reg_st4lane:
5589;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5590 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5591 %tmp = getelementptr i64* %A, i64 %inc
5592 ret i64* %tmp
5593}
5594
5595declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5596
5597
5598define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
5599;CHECK-LABEL: test_v1i64_post_imm_st4lane:
5600;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5601 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5602 %tmp = getelementptr i64* %A, i64 4
5603 ret i64* %tmp
5604}
5605
5606define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
5607;CHECK-LABEL: test_v1i64_post_reg_st4lane:
5608;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5609 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5610 %tmp = getelementptr i64* %A, i64 %inc
5611 ret i64* %tmp
5612}
5613
5614declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5615
5616
5617define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
5618;CHECK-LABEL: test_v4f32_post_imm_st4lane:
5619;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5620 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5621 %tmp = getelementptr float* %A, i32 4
5622 ret float* %tmp
5623}
5624
5625define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
5626;CHECK-LABEL: test_v4f32_post_reg_st4lane:
5627;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5628 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5629 %tmp = getelementptr float* %A, i64 %inc
5630 ret float* %tmp
5631}
5632
5633declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
5634
5635
5636define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
5637;CHECK-LABEL: test_v2f32_post_imm_st4lane:
5638;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5639 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5640 %tmp = getelementptr float* %A, i32 4
5641 ret float* %tmp
5642}
5643
5644define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
5645;CHECK-LABEL: test_v2f32_post_reg_st4lane:
5646;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5647 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5648 %tmp = getelementptr float* %A, i64 %inc
5649 ret float* %tmp
5650}
5651
5652declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
5653
5654
5655define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
5656;CHECK-LABEL: test_v2f64_post_imm_st4lane:
5657;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5658 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5659 %tmp = getelementptr double* %A, i64 4
5660 ret double* %tmp
5661}
5662
5663define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
5664;CHECK-LABEL: test_v2f64_post_reg_st4lane:
5665;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5666 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5667 %tmp = getelementptr double* %A, i64 %inc
5668 ret double* %tmp
5669}
5670
5671declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
5672
5673
5674define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
5675;CHECK-LABEL: test_v1f64_post_imm_st4lane:
5676;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5677 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5678 %tmp = getelementptr double* %A, i64 4
5679 ret double* %tmp
5680}
5681
5682define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
5683;CHECK-LABEL: test_v1f64_post_reg_st4lane:
5684;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5685 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5686 %tmp = getelementptr double* %A, i64 %inc
5687 ret double* %tmp
5688}
5689
5690declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
5691
5692define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5693; CHECK-LABEL: test_v16i8_post_imm_ld1r:
5694; CHECK: ld1r.16b { v0 }, [x0], #1
5695 %tmp1 = load i8* %bar
5696 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5697 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5698 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5699 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5700 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5701 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5702 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5703 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5704 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5705 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5706 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5707 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5708 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5709 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5710 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5711 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5712 %tmp18 = getelementptr i8* %bar, i64 1
5713 store i8* %tmp18, i8** %ptr
5714 ret <16 x i8> %tmp17
5715}
5716
5717define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5718; CHECK-LABEL: test_v16i8_post_reg_ld1r:
5719; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
5720 %tmp1 = load i8* %bar
5721 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5722 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5723 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5724 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5725 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5726 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5727 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5728 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5729 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5730 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5731 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5732 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5733 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5734 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5735 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5736 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5737 %tmp18 = getelementptr i8* %bar, i64 %inc
5738 store i8* %tmp18, i8** %ptr
5739 ret <16 x i8> %tmp17
5740}
5741
5742define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5743; CHECK-LABEL: test_v8i8_post_imm_ld1r:
5744; CHECK: ld1r.8b { v0 }, [x0], #1
5745 %tmp1 = load i8* %bar
5746 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5747 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5748 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5749 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5750 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5751 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5752 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5753 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5754 %tmp10 = getelementptr i8* %bar, i64 1
5755 store i8* %tmp10, i8** %ptr
5756 ret <8 x i8> %tmp9
5757}
5758
5759define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5760; CHECK-LABEL: test_v8i8_post_reg_ld1r:
5761; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
5762 %tmp1 = load i8* %bar
5763 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5764 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5765 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5766 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5767 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5768 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5769 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5770 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5771 %tmp10 = getelementptr i8* %bar, i64 %inc
5772 store i8* %tmp10, i8** %ptr
5773 ret <8 x i8> %tmp9
5774}
5775
5776define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5777; CHECK-LABEL: test_v8i16_post_imm_ld1r:
5778; CHECK: ld1r.8h { v0 }, [x0], #2
5779 %tmp1 = load i16* %bar
5780 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5781 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5782 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5783 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5784 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5785 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5786 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5787 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5788 %tmp10 = getelementptr i16* %bar, i64 1
5789 store i16* %tmp10, i16** %ptr
5790 ret <8 x i16> %tmp9
5791}
5792
5793define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5794; CHECK-LABEL: test_v8i16_post_reg_ld1r:
5795; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
5796 %tmp1 = load i16* %bar
5797 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5798 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5799 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5800 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5801 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5802 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5803 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5804 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5805 %tmp10 = getelementptr i16* %bar, i64 %inc
5806 store i16* %tmp10, i16** %ptr
5807 ret <8 x i16> %tmp9
5808}
5809
5810define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5811; CHECK-LABEL: test_v4i16_post_imm_ld1r:
5812; CHECK: ld1r.4h { v0 }, [x0], #2
5813 %tmp1 = load i16* %bar
5814 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5815 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5816 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5817 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5818 %tmp6 = getelementptr i16* %bar, i64 1
5819 store i16* %tmp6, i16** %ptr
5820 ret <4 x i16> %tmp5
5821}
5822
5823define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5824; CHECK-LABEL: test_v4i16_post_reg_ld1r:
5825; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
5826 %tmp1 = load i16* %bar
5827 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5828 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5829 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5830 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5831 %tmp6 = getelementptr i16* %bar, i64 %inc
5832 store i16* %tmp6, i16** %ptr
5833 ret <4 x i16> %tmp5
5834}
5835
5836define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5837; CHECK-LABEL: test_v4i32_post_imm_ld1r:
5838; CHECK: ld1r.4s { v0 }, [x0], #4
5839 %tmp1 = load i32* %bar
5840 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5841 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5842 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5843 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5844 %tmp6 = getelementptr i32* %bar, i64 1
5845 store i32* %tmp6, i32** %ptr
5846 ret <4 x i32> %tmp5
5847}
5848
5849define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5850; CHECK-LABEL: test_v4i32_post_reg_ld1r:
5851; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5852 %tmp1 = load i32* %bar
5853 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5854 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5855 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5856 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5857 %tmp6 = getelementptr i32* %bar, i64 %inc
5858 store i32* %tmp6, i32** %ptr
5859 ret <4 x i32> %tmp5
5860}
5861
5862define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5863; CHECK-LABEL: test_v2i32_post_imm_ld1r:
5864; CHECK: ld1r.2s { v0 }, [x0], #4
5865 %tmp1 = load i32* %bar
5866 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5867 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5868 %tmp4 = getelementptr i32* %bar, i64 1
5869 store i32* %tmp4, i32** %ptr
5870 ret <2 x i32> %tmp3
5871}
5872
5873define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5874; CHECK-LABEL: test_v2i32_post_reg_ld1r:
5875; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5876 %tmp1 = load i32* %bar
5877 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5878 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5879 %tmp4 = getelementptr i32* %bar, i64 %inc
5880 store i32* %tmp4, i32** %ptr
5881 ret <2 x i32> %tmp3
5882}
5883
5884define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
5885; CHECK-LABEL: test_v2i64_post_imm_ld1r:
5886; CHECK: ld1r.2d { v0 }, [x0], #8
5887 %tmp1 = load i64* %bar
5888 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5889 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5890 %tmp4 = getelementptr i64* %bar, i64 1
5891 store i64* %tmp4, i64** %ptr
5892 ret <2 x i64> %tmp3
5893}
5894
5895define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
5896; CHECK-LABEL: test_v2i64_post_reg_ld1r:
5897; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5898 %tmp1 = load i64* %bar
5899 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5900 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5901 %tmp4 = getelementptr i64* %bar, i64 %inc
5902 store i64* %tmp4, i64** %ptr
5903 ret <2 x i64> %tmp3
5904}
5905
5906define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
5907; CHECK-LABEL: test_v4f32_post_imm_ld1r:
5908; CHECK: ld1r.4s { v0 }, [x0], #4
5909 %tmp1 = load float* %bar
5910 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5911 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5912 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5913 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5914 %tmp6 = getelementptr float* %bar, i64 1
5915 store float* %tmp6, float** %ptr
5916 ret <4 x float> %tmp5
5917}
5918
5919define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5920; CHECK-LABEL: test_v4f32_post_reg_ld1r:
5921; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5922 %tmp1 = load float* %bar
5923 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5924 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5925 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5926 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5927 %tmp6 = getelementptr float* %bar, i64 %inc
5928 store float* %tmp6, float** %ptr
5929 ret <4 x float> %tmp5
5930}
5931
5932define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
5933; CHECK-LABEL: test_v2f32_post_imm_ld1r:
5934; CHECK: ld1r.2s { v0 }, [x0], #4
5935 %tmp1 = load float* %bar
5936 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5937 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5938 %tmp4 = getelementptr float* %bar, i64 1
5939 store float* %tmp4, float** %ptr
5940 ret <2 x float> %tmp3
5941}
5942
5943define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5944; CHECK-LABEL: test_v2f32_post_reg_ld1r:
5945; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5946 %tmp1 = load float* %bar
5947 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5948 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5949 %tmp4 = getelementptr float* %bar, i64 %inc
5950 store float* %tmp4, float** %ptr
5951 ret <2 x float> %tmp3
5952}
5953
5954define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
5955; CHECK-LABEL: test_v2f64_post_imm_ld1r:
5956; CHECK: ld1r.2d { v0 }, [x0], #8
5957 %tmp1 = load double* %bar
5958 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5959 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5960 %tmp4 = getelementptr double* %bar, i64 1
5961 store double* %tmp4, double** %ptr
5962 ret <2 x double> %tmp3
5963}
5964
5965define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
5966; CHECK-LABEL: test_v2f64_post_reg_ld1r:
5967; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5968 %tmp1 = load double* %bar
5969 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5970 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5971 %tmp4 = getelementptr double* %bar, i64 %inc
5972 store double* %tmp4, double** %ptr
5973 ret <2 x double> %tmp3
5974}
5975
5976define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
5977; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
5978; CHECK: ld1.b { v0 }[1], [x0], #1
5979 %tmp1 = load i8* %bar
5980 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5981 %tmp3 = getelementptr i8* %bar, i64 1
5982 store i8* %tmp3, i8** %ptr
5983 ret <16 x i8> %tmp2
5984}
5985
5986define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
5987; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
5988; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
5989 %tmp1 = load i8* %bar
5990 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5991 %tmp3 = getelementptr i8* %bar, i64 %inc
5992 store i8* %tmp3, i8** %ptr
5993 ret <16 x i8> %tmp2
5994}
5995
5996define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
5997; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
5998; CHECK: ld1.b { v0 }[1], [x0], #1
5999 %tmp1 = load i8* %bar
6000 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6001 %tmp3 = getelementptr i8* %bar, i64 1
6002 store i8* %tmp3, i8** %ptr
6003 ret <8 x i8> %tmp2
6004}
6005
6006define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
6007; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
6008; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
6009 %tmp1 = load i8* %bar
6010 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6011 %tmp3 = getelementptr i8* %bar, i64 %inc
6012 store i8* %tmp3, i8** %ptr
6013 ret <8 x i8> %tmp2
6014}
6015
6016define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
6017; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
6018; CHECK: ld1.h { v0 }[1], [x0], #2
6019 %tmp1 = load i16* %bar
6020 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6021 %tmp3 = getelementptr i16* %bar, i64 1
6022 store i16* %tmp3, i16** %ptr
6023 ret <8 x i16> %tmp2
6024}
6025
6026define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
6027; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
6028; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6029 %tmp1 = load i16* %bar
6030 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6031 %tmp3 = getelementptr i16* %bar, i64 %inc
6032 store i16* %tmp3, i16** %ptr
6033 ret <8 x i16> %tmp2
6034}
6035
6036define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
6037; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
6038; CHECK: ld1.h { v0 }[1], [x0], #2
6039 %tmp1 = load i16* %bar
6040 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6041 %tmp3 = getelementptr i16* %bar, i64 1
6042 store i16* %tmp3, i16** %ptr
6043 ret <4 x i16> %tmp2
6044}
6045
6046define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
6047; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
6048; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6049 %tmp1 = load i16* %bar
6050 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6051 %tmp3 = getelementptr i16* %bar, i64 %inc
6052 store i16* %tmp3, i16** %ptr
6053 ret <4 x i16> %tmp2
6054}
6055
6056define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
6057; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
6058; CHECK: ld1.s { v0 }[1], [x0], #4
6059 %tmp1 = load i32* %bar
6060 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6061 %tmp3 = getelementptr i32* %bar, i64 1
6062 store i32* %tmp3, i32** %ptr
6063 ret <4 x i32> %tmp2
6064}
6065
6066define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
6067; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
6068; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6069 %tmp1 = load i32* %bar
6070 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6071 %tmp3 = getelementptr i32* %bar, i64 %inc
6072 store i32* %tmp3, i32** %ptr
6073 ret <4 x i32> %tmp2
6074}
6075
6076define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
6077; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
6078; CHECK: ld1.s { v0 }[1], [x0], #4
6079 %tmp1 = load i32* %bar
6080 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6081 %tmp3 = getelementptr i32* %bar, i64 1
6082 store i32* %tmp3, i32** %ptr
6083 ret <2 x i32> %tmp2
6084}
6085
6086define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
6087; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
6088; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6089 %tmp1 = load i32* %bar
6090 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6091 %tmp3 = getelementptr i32* %bar, i64 %inc
6092 store i32* %tmp3, i32** %ptr
6093 ret <2 x i32> %tmp2
6094}
6095
6096define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
6097; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
6098; CHECK: ld1.d { v0 }[1], [x0], #8
6099 %tmp1 = load i64* %bar
6100 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6101 %tmp3 = getelementptr i64* %bar, i64 1
6102 store i64* %tmp3, i64** %ptr
6103 ret <2 x i64> %tmp2
6104}
6105
6106define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
6107; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
6108; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6109 %tmp1 = load i64* %bar
6110 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6111 %tmp3 = getelementptr i64* %bar, i64 %inc
6112 store i64* %tmp3, i64** %ptr
6113 ret <2 x i64> %tmp2
6114}
6115
6116define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
6117; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
6118; CHECK: ld1.s { v0 }[1], [x0], #4
6119 %tmp1 = load float* %bar
6120 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6121 %tmp3 = getelementptr float* %bar, i64 1
6122 store float* %tmp3, float** %ptr
6123 ret <4 x float> %tmp2
6124}
6125
6126define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
6127; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
6128; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6129 %tmp1 = load float* %bar
6130 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6131 %tmp3 = getelementptr float* %bar, i64 %inc
6132 store float* %tmp3, float** %ptr
6133 ret <4 x float> %tmp2
6134}
6135
6136define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
6137; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
6138; CHECK: ld1.s { v0 }[1], [x0], #4
6139 %tmp1 = load float* %bar
6140 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6141 %tmp3 = getelementptr float* %bar, i64 1
6142 store float* %tmp3, float** %ptr
6143 ret <2 x float> %tmp2
6144}
6145
6146define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
6147; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
6148; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6149 %tmp1 = load float* %bar
6150 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6151 %tmp3 = getelementptr float* %bar, i64 %inc
6152 store float* %tmp3, float** %ptr
6153 ret <2 x float> %tmp2
6154}
6155
6156define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
6157; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
6158; CHECK: ld1.d { v0 }[1], [x0], #8
6159 %tmp1 = load double* %bar
6160 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6161 %tmp3 = getelementptr double* %bar, i64 1
6162 store double* %tmp3, double** %ptr
6163 ret <2 x double> %tmp2
6164}
6165
6166define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
6167; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
6168; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6169 %tmp1 = load double* %bar
6170 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6171 %tmp3 = getelementptr double* %bar, i64 %inc
6172 store double* %tmp3, double** %ptr
6173 ret <2 x double> %tmp2
6174}