blob: a48d6a0c1dc59473152421a2de4cbc4845a5b711 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson8a3198b2009-09-01 18:51:56 +00002
3define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vst2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +00005;Check the alignment value. Max for this instruction is 16 bits:
6;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilson8a3198b2009-09-01 18:51:56 +00007 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +00008 call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilson8a3198b2009-09-01 18:51:56 +00009 ret void
10}
11
12define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
13;CHECK: vst2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000014;Check the alignment value. Max for this instruction is 32 bits:
15;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000016 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000017 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000018 call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +000019 ret void
20}
21
22define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
23;CHECK: vst2lanei32:
24;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +000025 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000026 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000027 call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +000028 ret void
29}
30
31define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
32;CHECK: vst2lanef:
33;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +000034 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000035 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000036 call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +000037 ret void
38}
39
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000040define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
41;CHECK: vst2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +000042;Check the (default) alignment.
43;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +000044 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000045 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000046 call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000047 ret void
48}
49
50define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
51;CHECK: vst2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +000052;Check the alignment value. Max for this instruction is 64 bits:
53;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +000054 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000055 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000056 call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000057 ret void
58}
59
60define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
61;CHECK: vst2laneQf:
62;CHECK: vst2.32
Bob Wilson02170c02010-04-20 00:17:16 +000063 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000064 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000065 call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000066 ret void
67}
68
Bob Wilson7a9ef442010-08-27 17:13:24 +000069declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
70declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
71declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
72declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +000073
Bob Wilson7a9ef442010-08-27 17:13:24 +000074declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
75declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
76declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilsonc5c6edb2009-10-08 23:38:24 +000077
Bob Wilson8a3198b2009-09-01 18:51:56 +000078define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
79;CHECK: vst3lanei8:
80;CHECK: vst3.8
81 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000082 call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +000083 ret void
84}
85
86define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
87;CHECK: vst3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000088;Check the (default) alignment value. VST3 does not support alignment.
89;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +000090 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +000091 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000092 call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +000093 ret void
94}
95
96define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
97;CHECK: vst3lanei32:
98;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +000099 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000100 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000101 call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000102 ret void
103}
104
105define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
106;CHECK: vst3lanef:
107;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000108 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000109 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000110 call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000111 ret void
112}
113
Bob Wilson8cdb2692009-10-08 23:51:31 +0000114define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
115;CHECK: vst3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000116;Check the (default) alignment value. VST3 does not support alignment.
117;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000118 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000119 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000120 call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000121 ret void
122}
123
124define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
125;CHECK: vst3laneQi32:
126;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000127 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000128 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000129 call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000130 ret void
131}
132
133define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
134;CHECK: vst3laneQf:
135;CHECK: vst3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000136 %tmp0 = bitcast float* %A to i8*
Bob Wilson8cdb2692009-10-08 23:51:31 +0000137 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000138 call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson8cdb2692009-10-08 23:51:31 +0000139 ret void
140}
141
Bob Wilson7a9ef442010-08-27 17:13:24 +0000142declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
143declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
144declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
145declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson8a3198b2009-09-01 18:51:56 +0000146
Bob Wilson7a9ef442010-08-27 17:13:24 +0000147declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
148declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
149declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
Bob Wilson8cdb2692009-10-08 23:51:31 +0000150
Bob Wilson8a3198b2009-09-01 18:51:56 +0000151
152define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
153;CHECK: vst4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000154;Check the alignment value. Max for this instruction is 32 bits:
155;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilson8a3198b2009-09-01 18:51:56 +0000156 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000157 call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000158 ret void
159}
160
161define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
162;CHECK: vst4lanei16:
163;CHECK: vst4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000164 %tmp0 = bitcast i16* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000165 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000166 call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000167 ret void
168}
169
170define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
171;CHECK: vst4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000172;Check the alignment value. Max for this instruction is 128 bits:
173;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000174 %tmp0 = bitcast i32* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000175 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000176 call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000177 ret void
178}
179
180define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
181;CHECK: vst4lanef:
182;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000183 %tmp0 = bitcast float* %A to i8*
Bob Wilson8a3198b2009-09-01 18:51:56 +0000184 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000185 call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilson8a3198b2009-09-01 18:51:56 +0000186 ret void
187}
188
Bob Wilson56311392009-10-09 00:01:36 +0000189define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
190;CHECK: vst4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000191;Check the alignment value. Max for this instruction is 64 bits:
192;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000193 %tmp0 = bitcast i16* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000194 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000195 call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
Bob Wilson56311392009-10-09 00:01:36 +0000196 ret void
197}
198
199define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
200;CHECK: vst4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000201;Check the (default) alignment.
202;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000203 %tmp0 = bitcast i32* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000204 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000205 call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000206 ret void
207}
208
209define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
210;CHECK: vst4laneQf:
211;CHECK: vst4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000212 %tmp0 = bitcast float* %A to i8*
Bob Wilson56311392009-10-09 00:01:36 +0000213 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000214 call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson56311392009-10-09 00:01:36 +0000215 ret void
216}
217
Bob Wilson7a9ef442010-08-27 17:13:24 +0000218declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
219declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
220declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
221declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
Bob Wilson56311392009-10-09 00:01:36 +0000222
Bob Wilson7a9ef442010-08-27 17:13:24 +0000223declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
224declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
225declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind