blob: 6cc052bbeb1cad75eac25af61c5646110349aa83 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson8a3198b2009-09-01 18:51:56 +00002
Bob Wilsond168cef2010-11-03 16:24:53 +00003define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vst1lanei8:
5;Check the (default) alignment.
6;CHECK: vst1.8 {d16[3]}, [r0]
7 %tmp1 = load <8 x i8>* %B
8 %tmp2 = extractelement <8 x i8> %tmp1, i32 3
9 store i8 %tmp2, i8* %A, align 8
10 ret void
11}
12
13define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
14;CHECK: vst1lanei16:
15;Check the alignment value. Max for this instruction is 16 bits:
16;CHECK: vst1.16 {d16[2]}, [r0, :16]
17 %tmp1 = load <4 x i16>* %B
18 %tmp2 = extractelement <4 x i16> %tmp1, i32 2
19 store i16 %tmp2, i16* %A, align 8
20 ret void
21}
22
23define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
24;CHECK: vst1lanei32:
25;Check the alignment value. Max for this instruction is 32 bits:
26;CHECK: vst1.32 {d16[1]}, [r0, :32]
27 %tmp1 = load <2 x i32>* %B
28 %tmp2 = extractelement <2 x i32> %tmp1, i32 1
29 store i32 %tmp2, i32* %A, align 8
30 ret void
31}
32
Bob Wilson746fa172010-12-10 22:13:32 +000033define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
34;CHECK: vst1lanef:
35;CHECK: vst1.32 {d16[1]}, [r0]
36 %tmp1 = load <2 x float>* %B
37 %tmp2 = extractelement <2 x float> %tmp1, i32 1
38 store float %tmp2, float* %A
39 ret void
40}
41
Bob Wilsond168cef2010-11-03 16:24:53 +000042define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
43;CHECK: vst1laneQi8:
44;CHECK: vst1.8 {d17[1]}, [r0]
45 %tmp1 = load <16 x i8>* %B
46 %tmp2 = extractelement <16 x i8> %tmp1, i32 9
47 store i8 %tmp2, i8* %A, align 8
48 ret void
49}
50
51define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
52;CHECK: vst1laneQi16:
53;CHECK: vst1.16 {d17[1]}, [r0, :16]
54 %tmp1 = load <8 x i16>* %B
55 %tmp2 = extractelement <8 x i16> %tmp1, i32 5
56 store i16 %tmp2, i16* %A, align 8
57 ret void
58}
59
60define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
61;CHECK: vst1laneQi32:
62;CHECK: vst1.32 {d17[1]}, [r0, :32]
63 %tmp1 = load <4 x i32>* %B
64 %tmp2 = extractelement <4 x i32> %tmp1, i32 3
65 store i32 %tmp2, i32* %A, align 8
66 ret void
67}
68
Bob Wilson746fa172010-12-10 22:13:32 +000069define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
70;CHECK: vst1laneQf:
71;CHECK: vst1.32 {d17[1]}, [r0]
72 %tmp1 = load <4 x float>* %B
73 %tmp2 = extractelement <4 x float> %tmp1, i32 3
74 store float %tmp2, float* %A
75 ret void
76}
77
Bob Wilson8a3198b2009-09-01 18:51:56 +000078define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
79;CHECK: vst2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000080;Check the alignment value. Max for this instruction is 16 bits:
81;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilson8a3198b2009-09-01 18:51:56 +000082 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000083 call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilson8a3198b2009-09-01 18:51:56 +000084 ret void
85}
86
87define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
88;CHECK: vst2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000089;Check the alignment value. Max for this instruction is 32 bits:
90;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000091 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000092 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000093 call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +000094 ret void
95}
96
Bob Wilson1c3ef902011-02-07 17:43:21 +000097;Check for a post-increment updating store with register increment.
98define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
99;CHECK: vst2lanei16_update:
100;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
101 %A = load i16** %ptr
102 %tmp0 = bitcast i16* %A to i8*
103 %tmp1 = load <4 x i16>* %B
104 call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
105 %tmp2 = getelementptr i16* %A, i32 %inc
106 store i16* %tmp2, i16** %ptr
107 ret void
108}
109
Bob Wilson8a3198b2009-09-01 18:51:56 +0000110define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
111;CHECK: vst2lanei32:
112;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000113 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000114 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000115 call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000116 ret void
117}
118
119define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
120;CHECK: vst2lanef:
121;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000122 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000123 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000124 call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000125 ret void
126}
127
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000128define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
129;CHECK: vst2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000130;Check the (default) alignment.
131;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000132 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000133 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000134 call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000135 ret void
136}
137
138define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
139;CHECK: vst2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000140;Check the alignment value. Max for this instruction is 64 bits:
141;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000142 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000143 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000144 call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000145 ret void
146}
147
148define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
149;CHECK: vst2laneQf:
150;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000151 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000152 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000153 call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000154 ret void
155}
156
Bob Wilson7a9ef442010-08-27 17:13:24 +0000157declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
158declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
159declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
160declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +0000161
Bob Wilson7a9ef442010-08-27 17:13:24 +0000162declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
163declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
164declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000165
Bob Wilson8a3198b2009-09-01 18:51:56 +0000166define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
167;CHECK: vst3lanei8:
168;CHECK: vst3.8
169 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000170 call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000171 ret void
172}
173
174define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
175;CHECK: vst3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000176;Check the (default) alignment value. VST3 does not support alignment.
177;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000178 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000179 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000180 call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000181 ret void
182}
183
184define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
185;CHECK: vst3lanei32:
186;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000187 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000188 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000189 call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000190 ret void
191}
192
193define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
194;CHECK: vst3lanef:
195;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000196 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000197 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000198 call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000199 ret void
200}
201
Bob Wilson8cdb2692009-10-08 23:51:31 +0000202define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
203;CHECK: vst3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000204;Check the (default) alignment value. VST3 does not support alignment.
205;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000206 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000207 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000208 call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000209 ret void
210}
211
212define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
213;CHECK: vst3laneQi32:
214;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000215 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000216 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000217 call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000218 ret void
219}
220
Bob Wilson1c3ef902011-02-07 17:43:21 +0000221;Check for a post-increment updating store.
222define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
223;CHECK: vst3laneQi32_update:
224;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
225 %A = load i32** %ptr
226 %tmp0 = bitcast i32* %A to i8*
227 %tmp1 = load <4 x i32>* %B
228 call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
229 %tmp2 = getelementptr i32* %A, i32 3
230 store i32* %tmp2, i32** %ptr
231 ret void
232}
233
Bob Wilson8cdb2692009-10-08 23:51:31 +0000234define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
235;CHECK: vst3laneQf:
236;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000237 %tmp0 = bitcast float* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000238 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000239 call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000240 ret void
241}
242
Bob Wilson7a9ef442010-08-27 17:13:24 +0000243declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
244declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
245declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
246declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +0000247
Bob Wilson7a9ef442010-08-27 17:13:24 +0000248declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
249declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
250declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilson8cdb2692009-10-08 23:51:31 +0000251
Bob Wilson8a3198b2009-09-01 18:51:56 +0000252
253define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
254;CHECK: vst4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000255;Check the alignment value. Max for this instruction is 32 bits:
256;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilson8a3198b2009-09-01 18:51:56 +0000257 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000258 call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000259 ret void
260}
261
Bob Wilson1c3ef902011-02-07 17:43:21 +0000262;Check for a post-increment updating store.
263define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
264;CHECK: vst4lanei8_update:
265;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
266 %A = load i8** %ptr
267 %tmp1 = load <8 x i8>* %B
268 call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
269 %tmp2 = getelementptr i8* %A, i32 4
270 store i8* %tmp2, i8** %ptr
271 ret void
272}
273
Bob Wilson8a3198b2009-09-01 18:51:56 +0000274define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
275;CHECK: vst4lanei16:
276;CHECK: vst4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000277 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000278 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000279 call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000280 ret void
281}
282
283define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
284;CHECK: vst4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000285;Check the alignment value. Max for this instruction is 128 bits:
286;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000287 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000288 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000289 call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000290 ret void
291}
292
293define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
294;CHECK: vst4lanef:
295;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000296 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000297 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000298 call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000299 ret void
300}
301
Bob Wilson56311392009-10-09 00:01:36 +0000302define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
303;CHECK: vst4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000304;Check the alignment value. Max for this instruction is 64 bits:
305;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000306 %tmp0 = bitcast i16* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000307 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000308 call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
Bob Wilson56311392009-10-09 00:01:36 +0000309 ret void
310}
311
312define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
313;CHECK: vst4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000314;Check the (default) alignment.
315;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000316 %tmp0 = bitcast i32* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000317 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000318 call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000319 ret void
320}
321
322define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
323;CHECK: vst4laneQf:
324;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000325 %tmp0 = bitcast float* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000326 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000327 call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000328 ret void
329}
330
Bob Wilson7a9ef442010-08-27 17:13:24 +0000331declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
332declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
333declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
334declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson56311392009-10-09 00:01:36 +0000335
Bob Wilson7a9ef442010-08-27 17:13:24 +0000336declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
337declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
338declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind