blob: d6ff6b9f298ddc358e06c0b0d06c691459a21c1b [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonb796bbb2010-11-01 22:04:05 +00003define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vld1lanei8:
5;CHECK: vld1.8 {d16[3]}, [r0]
6 %tmp1 = load <8 x i8>* %B
7 %tmp2 = load i8* %A, align 1
8 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
9 ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
13;CHECK: vld1lanei16:
14;CHECK: vld1.16 {d16[2]}, [r0]
15 %tmp1 = load <4 x i16>* %B
16 %tmp2 = load i16* %A, align 2
17 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
18 ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
22;CHECK: vld1lanei32:
23;CHECK: vld1.32 {d16[1]}, [r0]
24 %tmp1 = load <2 x i32>* %B
25 %tmp2 = load i32* %A, align 4
26 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
27 ret <2 x i32> %tmp3
28}
29
Bob Wilsonbaf06152010-11-01 23:40:46 +000030define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
31;CHECK: vld1laneQi8:
32;CHECK: vld1.8 {d17[1]}, [r0]
33 %tmp1 = load <16 x i8>* %B
34 %tmp2 = load i8* %A, align 1
35 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
36 ret <16 x i8> %tmp3
37}
38
39define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
40;CHECK: vld1laneQi16:
41;CHECK: vld1.16 {d17[1]}, [r0]
42 %tmp1 = load <8 x i16>* %B
43 %tmp2 = load i16* %A, align 2
44 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
45 ret <8 x i16> %tmp3
46}
47
48define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
49;CHECK: vld1laneQi32:
50;CHECK: vld1.32 {d17[1]}, [r0]
51 %tmp1 = load <4 x i32>* %B
52 %tmp2 = load i32* %A, align 4
53 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
54 ret <4 x i32> %tmp3
55}
56
Bob Wilsonec1d81c2009-10-06 21:16:19 +000057%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
58%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
59%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
60%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000061
Bob Wilson30aea9d2009-10-08 18:56:10 +000062%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
63%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
64%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
65
Bob Wilsonc0110052009-09-01 04:27:10 +000066define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
67;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000068;Check the alignment value. Max for this instruction is 16 bits:
69;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000070 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000071 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000072 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
73 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000074 %tmp5 = add <8 x i8> %tmp3, %tmp4
75 ret <8 x i8> %tmp5
76}
77
78define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
79;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000080;Check the alignment value. Max for this instruction is 32 bits:
81;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000082 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000083 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000084 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000085 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
86 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000087 %tmp5 = add <4 x i16> %tmp3, %tmp4
88 ret <4 x i16> %tmp5
89}
90
91define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
92;CHECK: vld2lanei32:
93;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000094 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000095 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000096 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000097 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
98 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000099 %tmp5 = add <2 x i32> %tmp3, %tmp4
100 ret <2 x i32> %tmp5
101}
102
103define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
104;CHECK: vld2lanef:
105;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000106 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000107 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000108 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000109 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
110 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000111 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000112 ret <2 x float> %tmp5
113}
114
Bob Wilson30aea9d2009-10-08 18:56:10 +0000115define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
116;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000117;Check the (default) alignment.
118;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000119 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000120 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000121 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000122 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
123 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
124 %tmp5 = add <8 x i16> %tmp3, %tmp4
125 ret <8 x i16> %tmp5
126}
127
128define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
129;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000130;Check the alignment value. Max for this instruction is 64 bits:
131;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000132 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000133 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000134 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000135 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
136 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
137 %tmp5 = add <4 x i32> %tmp3, %tmp4
138 ret <4 x i32> %tmp5
139}
140
141define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
142;CHECK: vld2laneQf:
143;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000144 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000145 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000146 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000147 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
148 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000149 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000150 ret <4 x float> %tmp5
151}
152
Bob Wilson7a9ef442010-08-27 17:13:24 +0000153declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
154declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
155declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
156declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000157
Bob Wilson7a9ef442010-08-27 17:13:24 +0000158declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
159declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
160declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000161
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000162%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
163%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
164%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
165%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000166
Bob Wilson0bf7d992009-10-08 22:27:33 +0000167%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
168%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
169%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
170
Bob Wilsonc0110052009-09-01 04:27:10 +0000171define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
172;CHECK: vld3lanei8:
173;CHECK: vld3.8
174 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000175 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000176 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
177 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
178 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000179 %tmp6 = add <8 x i8> %tmp3, %tmp4
180 %tmp7 = add <8 x i8> %tmp5, %tmp6
181 ret <8 x i8> %tmp7
182}
183
184define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
185;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000186;Check the (default) alignment value. VLD3 does not support alignment.
187;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000188 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000189 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000190 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000191 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
192 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
193 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000194 %tmp6 = add <4 x i16> %tmp3, %tmp4
195 %tmp7 = add <4 x i16> %tmp5, %tmp6
196 ret <4 x i16> %tmp7
197}
198
199define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
200;CHECK: vld3lanei32:
201;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000202 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000203 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000204 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000205 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
206 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
207 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000208 %tmp6 = add <2 x i32> %tmp3, %tmp4
209 %tmp7 = add <2 x i32> %tmp5, %tmp6
210 ret <2 x i32> %tmp7
211}
212
213define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
214;CHECK: vld3lanef:
215;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000216 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000217 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000218 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000219 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
220 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
221 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000222 %tmp6 = fadd <2 x float> %tmp3, %tmp4
223 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000224 ret <2 x float> %tmp7
225}
226
Bob Wilson0bf7d992009-10-08 22:27:33 +0000227define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
228;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000229;Check the (default) alignment value. VLD3 does not support alignment.
230;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000231 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000232 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000233 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000234 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
235 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
236 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
237 %tmp6 = add <8 x i16> %tmp3, %tmp4
238 %tmp7 = add <8 x i16> %tmp5, %tmp6
239 ret <8 x i16> %tmp7
240}
241
242define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
243;CHECK: vld3laneQi32:
244;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000245 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000246 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000247 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000248 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
249 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
250 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
251 %tmp6 = add <4 x i32> %tmp3, %tmp4
252 %tmp7 = add <4 x i32> %tmp5, %tmp6
253 ret <4 x i32> %tmp7
254}
255
256define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
257;CHECK: vld3laneQf:
258;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000259 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000260 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000261 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000262 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
263 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
264 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000265 %tmp6 = fadd <4 x float> %tmp3, %tmp4
266 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000267 ret <4 x float> %tmp7
268}
269
Bob Wilson7a9ef442010-08-27 17:13:24 +0000270declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
271declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
272declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
273declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000274
Bob Wilson7a9ef442010-08-27 17:13:24 +0000275declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
276declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
277declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000278
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000279%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
280%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
281%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
282%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000283
Bob Wilson62e053e2009-10-08 22:53:57 +0000284%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
285%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
286%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
287
Bob Wilsonc0110052009-09-01 04:27:10 +0000288define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
289;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000290;Check the alignment value. Max for this instruction is 32 bits:
291;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000292 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000293 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000294 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
295 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
296 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
297 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000298 %tmp7 = add <8 x i8> %tmp3, %tmp4
299 %tmp8 = add <8 x i8> %tmp5, %tmp6
300 %tmp9 = add <8 x i8> %tmp7, %tmp8
301 ret <8 x i8> %tmp9
302}
303
304define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
305;CHECK: vld4lanei16:
306;CHECK: vld4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000307 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000308 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000309 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000310 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
311 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
312 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
313 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000314 %tmp7 = add <4 x i16> %tmp3, %tmp4
315 %tmp8 = add <4 x i16> %tmp5, %tmp6
316 %tmp9 = add <4 x i16> %tmp7, %tmp8
317 ret <4 x i16> %tmp9
318}
319
320define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
321;CHECK: vld4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000322;Check the alignment value. Max for this instruction is 128 bits:
323;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000324 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000325 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000326 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000327 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
328 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
329 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
330 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000331 %tmp7 = add <2 x i32> %tmp3, %tmp4
332 %tmp8 = add <2 x i32> %tmp5, %tmp6
333 %tmp9 = add <2 x i32> %tmp7, %tmp8
334 ret <2 x i32> %tmp9
335}
336
337define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
338;CHECK: vld4lanef:
339;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000340 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000341 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000342 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000343 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
344 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
345 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
346 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000347 %tmp7 = fadd <2 x float> %tmp3, %tmp4
348 %tmp8 = fadd <2 x float> %tmp5, %tmp6
349 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000350 ret <2 x float> %tmp9
351}
352
Bob Wilson62e053e2009-10-08 22:53:57 +0000353define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
354;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000355;Check the alignment value. Max for this instruction is 64 bits:
356;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000357 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000358 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000359 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000360 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
361 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
362 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
363 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
364 %tmp7 = add <8 x i16> %tmp3, %tmp4
365 %tmp8 = add <8 x i16> %tmp5, %tmp6
366 %tmp9 = add <8 x i16> %tmp7, %tmp8
367 ret <8 x i16> %tmp9
368}
369
370define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
371;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000372;Check the (default) alignment.
373;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000374 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000375 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000376 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000377 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
378 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
379 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
380 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
381 %tmp7 = add <4 x i32> %tmp3, %tmp4
382 %tmp8 = add <4 x i32> %tmp5, %tmp6
383 %tmp9 = add <4 x i32> %tmp7, %tmp8
384 ret <4 x i32> %tmp9
385}
386
387define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
388;CHECK: vld4laneQf:
389;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000390 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000391 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000392 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000393 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
394 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
395 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
396 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000397 %tmp7 = fadd <4 x float> %tmp3, %tmp4
398 %tmp8 = fadd <4 x float> %tmp5, %tmp6
399 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000400 ret <4 x float> %tmp9
401}
402
Bob Wilson7a9ef442010-08-27 17:13:24 +0000403declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
404declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
405declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
406declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000407
Bob Wilson7a9ef442010-08-27 17:13:24 +0000408declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
409declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
410declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly