blob: c5387e04bdf7b7628afa9df5aa156e40ba7cc334 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson8a3198b2009-09-01 18:51:56 +00002
Bob Wilsond168cef2010-11-03 16:24:53 +00003define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vst1lanei8:
5;Check the (default) alignment.
6;CHECK: vst1.8 {d16[3]}, [r0]
7 %tmp1 = load <8 x i8>* %B
8 %tmp2 = extractelement <8 x i8> %tmp1, i32 3
9 store i8 %tmp2, i8* %A, align 8
10 ret void
11}
12
13define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
14;CHECK: vst1lanei16:
15;Check the alignment value. Max for this instruction is 16 bits:
16;CHECK: vst1.16 {d16[2]}, [r0, :16]
17 %tmp1 = load <4 x i16>* %B
18 %tmp2 = extractelement <4 x i16> %tmp1, i32 2
19 store i16 %tmp2, i16* %A, align 8
20 ret void
21}
22
23define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
24;CHECK: vst1lanei32:
25;Check the alignment value. Max for this instruction is 32 bits:
26;CHECK: vst1.32 {d16[1]}, [r0, :32]
27 %tmp1 = load <2 x i32>* %B
28 %tmp2 = extractelement <2 x i32> %tmp1, i32 1
29 store i32 %tmp2, i32* %A, align 8
30 ret void
31}
32
33define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
34;CHECK: vst1laneQi8:
35;CHECK: vst1.8 {d17[1]}, [r0]
36 %tmp1 = load <16 x i8>* %B
37 %tmp2 = extractelement <16 x i8> %tmp1, i32 9
38 store i8 %tmp2, i8* %A, align 8
39 ret void
40}
41
42define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
43;CHECK: vst1laneQi16:
44;CHECK: vst1.16 {d17[1]}, [r0, :16]
45 %tmp1 = load <8 x i16>* %B
46 %tmp2 = extractelement <8 x i16> %tmp1, i32 5
47 store i16 %tmp2, i16* %A, align 8
48 ret void
49}
50
51define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
52;CHECK: vst1laneQi32:
53;CHECK: vst1.32 {d17[1]}, [r0, :32]
54 %tmp1 = load <4 x i32>* %B
55 %tmp2 = extractelement <4 x i32> %tmp1, i32 3
56 store i32 %tmp2, i32* %A, align 8
57 ret void
58}
59
Bob Wilson8a3198b2009-09-01 18:51:56 +000060define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
61;CHECK: vst2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000062;Check the alignment value. Max for this instruction is 16 bits:
63;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilson8a3198b2009-09-01 18:51:56 +000064 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000065 call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilson8a3198b2009-09-01 18:51:56 +000066 ret void
67}
68
69define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
70;CHECK: vst2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000071;Check the alignment value. Max for this instruction is 32 bits:
72;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000073 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000074 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000075 call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +000076 ret void
77}
78
79define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
80;CHECK: vst2lanei32:
81;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +000082 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000083 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000084 call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +000085 ret void
86}
87
88define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
89;CHECK: vst2lanef:
90;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +000091 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000092 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000093 call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +000094 ret void
95}
96
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000097define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
98;CHECK: vst2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +000099;Check the (default) alignment.
100;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000101 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000102 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000103 call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000104 ret void
105}
106
107define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
108;CHECK: vst2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000109;Check the alignment value. Max for this instruction is 64 bits:
110;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000111 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000112 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000113 call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000114 ret void
115}
116
117define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
118;CHECK: vst2laneQf:
119;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000120 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000121 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000122 call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000123 ret void
124}
125
Bob Wilson7a9ef442010-08-27 17:13:24 +0000126declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
127declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
128declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
129declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +0000130
Bob Wilson7a9ef442010-08-27 17:13:24 +0000131declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
132declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
133declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilsonc5c6edb2009-10-08 23:38:24 +0000134
Bob Wilson8a3198b2009-09-01 18:51:56 +0000135define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
136;CHECK: vst3lanei8:
137;CHECK: vst3.8
138 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000139 call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000140 ret void
141}
142
143define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
144;CHECK: vst3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000145;Check the (default) alignment value. VST3 does not support alignment.
146;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000147 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000148 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000149 call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000150 ret void
151}
152
153define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
154;CHECK: vst3lanei32:
155;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000156 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000157 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000158 call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000159 ret void
160}
161
162define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
163;CHECK: vst3lanef:
164;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000165 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000166 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000167 call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000168 ret void
169}
170
Bob Wilson8cdb2692009-10-08 23:51:31 +0000171define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
172;CHECK: vst3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000173;Check the (default) alignment value. VST3 does not support alignment.
174;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000175 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000176 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000177 call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000178 ret void
179}
180
181define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
182;CHECK: vst3laneQi32:
183;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000184 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000185 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000186 call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000187 ret void
188}
189
190define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
191;CHECK: vst3laneQf:
192;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000193 %tmp0 = bitcast float* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000194 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000195 call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000196 ret void
197}
198
Bob Wilson7a9ef442010-08-27 17:13:24 +0000199declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
200declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
201declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
202declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +0000203
Bob Wilson7a9ef442010-08-27 17:13:24 +0000204declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
205declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
206declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilson8cdb2692009-10-08 23:51:31 +0000207
Bob Wilson8a3198b2009-09-01 18:51:56 +0000208
209define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
210;CHECK: vst4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000211;Check the alignment value. Max for this instruction is 32 bits:
212;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilson8a3198b2009-09-01 18:51:56 +0000213 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000214 call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000215 ret void
216}
217
218define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
219;CHECK: vst4lanei16:
220;CHECK: vst4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000221 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000222 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000223 call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000224 ret void
225}
226
227define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
228;CHECK: vst4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000229;Check the alignment value. Max for this instruction is 128 bits:
230;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000231 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000232 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000233 call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000234 ret void
235}
236
237define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
238;CHECK: vst4lanef:
239;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000240 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000241 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000242 call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000243 ret void
244}
245
Bob Wilson56311392009-10-09 00:01:36 +0000246define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
247;CHECK: vst4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000248;Check the alignment value. Max for this instruction is 64 bits:
249;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000250 %tmp0 = bitcast i16* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000251 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000252 call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
Bob Wilson56311392009-10-09 00:01:36 +0000253 ret void
254}
255
256define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
257;CHECK: vst4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000258;Check the (default) alignment.
259;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000260 %tmp0 = bitcast i32* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000261 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000262 call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000263 ret void
264}
265
266define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
267;CHECK: vst4laneQf:
268;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000269 %tmp0 = bitcast float* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000270 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000271 call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000272 ret void
273}
274
Bob Wilson7a9ef442010-08-27 17:13:24 +0000275declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
276declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
277declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
278declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson56311392009-10-09 00:01:36 +0000279
Bob Wilson7a9ef442010-08-27 17:13:24 +0000280declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
281declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
282declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind