blob: 57e74dfe2498bc7e4244caaaf87e177c691e07dc [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonb796bbb2010-11-01 22:04:05 +00003define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vld1lanei8:
5;CHECK: vld1.8 {d16[3]}, [r0]
6 %tmp1 = load <8 x i8>* %B
7 %tmp2 = load i8* %A, align 1
8 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
9 ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
13;CHECK: vld1lanei16:
14;CHECK: vld1.16 {d16[2]}, [r0]
15 %tmp1 = load <4 x i16>* %B
16 %tmp2 = load i16* %A, align 2
17 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
18 ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
22;CHECK: vld1lanei32:
23;CHECK: vld1.32 {d16[1]}, [r0]
24 %tmp1 = load <2 x i32>* %B
25 %tmp2 = load i32* %A, align 4
26 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
27 ret <2 x i32> %tmp3
28}
29
Bob Wilsonec1d81c2009-10-06 21:16:19 +000030%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
31%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
32%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
33%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000034
Bob Wilson30aea9d2009-10-08 18:56:10 +000035%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
36%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
37%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
38
Bob Wilsonc0110052009-09-01 04:27:10 +000039define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
40;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000041;Check the alignment value. Max for this instruction is 16 bits:
42;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000043 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000044 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000045 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
46 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000047 %tmp5 = add <8 x i8> %tmp3, %tmp4
48 ret <8 x i8> %tmp5
49}
50
51define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
52;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000053;Check the alignment value. Max for this instruction is 32 bits:
54;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000055 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000056 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000057 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000058 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
59 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000060 %tmp5 = add <4 x i16> %tmp3, %tmp4
61 ret <4 x i16> %tmp5
62}
63
64define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
65;CHECK: vld2lanei32:
66;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000067 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000068 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000069 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000070 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
71 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000072 %tmp5 = add <2 x i32> %tmp3, %tmp4
73 ret <2 x i32> %tmp5
74}
75
76define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
77;CHECK: vld2lanef:
78;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000079 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000080 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000081 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000082 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
83 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +000084 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +000085 ret <2 x float> %tmp5
86}
87
Bob Wilson30aea9d2009-10-08 18:56:10 +000088define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
89;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +000090;Check the (default) alignment.
91;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +000092 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +000093 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000094 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +000095 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
96 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
97 %tmp5 = add <8 x i16> %tmp3, %tmp4
98 ret <8 x i16> %tmp5
99}
100
101define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
102;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000103;Check the alignment value. Max for this instruction is 64 bits:
104;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000105 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000106 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000107 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000108 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
109 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
110 %tmp5 = add <4 x i32> %tmp3, %tmp4
111 ret <4 x i32> %tmp5
112}
113
114define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
115;CHECK: vld2laneQf:
116;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000117 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000118 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000119 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000120 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
121 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000122 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000123 ret <4 x float> %tmp5
124}
125
Bob Wilson7a9ef442010-08-27 17:13:24 +0000126declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
127declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
128declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
129declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000130
Bob Wilson7a9ef442010-08-27 17:13:24 +0000131declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
132declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
133declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000134
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000135%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
136%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
137%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
138%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000139
Bob Wilson0bf7d992009-10-08 22:27:33 +0000140%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
141%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
142%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
143
Bob Wilsonc0110052009-09-01 04:27:10 +0000144define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
145;CHECK: vld3lanei8:
146;CHECK: vld3.8
147 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000148 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000149 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
150 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
151 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000152 %tmp6 = add <8 x i8> %tmp3, %tmp4
153 %tmp7 = add <8 x i8> %tmp5, %tmp6
154 ret <8 x i8> %tmp7
155}
156
157define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
158;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000159;Check the (default) alignment value. VLD3 does not support alignment.
160;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000161 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000162 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000163 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000164 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
165 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
166 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000167 %tmp6 = add <4 x i16> %tmp3, %tmp4
168 %tmp7 = add <4 x i16> %tmp5, %tmp6
169 ret <4 x i16> %tmp7
170}
171
172define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
173;CHECK: vld3lanei32:
174;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000175 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000176 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000177 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000178 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
179 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
180 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000181 %tmp6 = add <2 x i32> %tmp3, %tmp4
182 %tmp7 = add <2 x i32> %tmp5, %tmp6
183 ret <2 x i32> %tmp7
184}
185
186define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
187;CHECK: vld3lanef:
188;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000189 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000190 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000191 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000192 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
193 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
194 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000195 %tmp6 = fadd <2 x float> %tmp3, %tmp4
196 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000197 ret <2 x float> %tmp7
198}
199
Bob Wilson0bf7d992009-10-08 22:27:33 +0000200define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
201;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000202;Check the (default) alignment value. VLD3 does not support alignment.
203;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000204 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000205 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000206 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000207 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
208 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
209 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
210 %tmp6 = add <8 x i16> %tmp3, %tmp4
211 %tmp7 = add <8 x i16> %tmp5, %tmp6
212 ret <8 x i16> %tmp7
213}
214
215define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
216;CHECK: vld3laneQi32:
217;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000218 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000219 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000220 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000221 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
222 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
223 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
224 %tmp6 = add <4 x i32> %tmp3, %tmp4
225 %tmp7 = add <4 x i32> %tmp5, %tmp6
226 ret <4 x i32> %tmp7
227}
228
229define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
230;CHECK: vld3laneQf:
231;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000232 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000233 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000234 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000235 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
236 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
237 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000238 %tmp6 = fadd <4 x float> %tmp3, %tmp4
239 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000240 ret <4 x float> %tmp7
241}
242
Bob Wilson7a9ef442010-08-27 17:13:24 +0000243declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
244declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
245declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
246declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000247
Bob Wilson7a9ef442010-08-27 17:13:24 +0000248declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
249declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
250declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000251
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000252%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
253%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
254%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
255%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000256
Bob Wilson62e053e2009-10-08 22:53:57 +0000257%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
258%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
259%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
260
Bob Wilsonc0110052009-09-01 04:27:10 +0000261define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
262;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000263;Check the alignment value. Max for this instruction is 32 bits:
264;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000265 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000266 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000267 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
268 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
269 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
270 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000271 %tmp7 = add <8 x i8> %tmp3, %tmp4
272 %tmp8 = add <8 x i8> %tmp5, %tmp6
273 %tmp9 = add <8 x i8> %tmp7, %tmp8
274 ret <8 x i8> %tmp9
275}
276
277define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
278;CHECK: vld4lanei16:
279;CHECK: vld4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000280 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000281 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000282 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000283 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
284 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
285 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
286 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000287 %tmp7 = add <4 x i16> %tmp3, %tmp4
288 %tmp8 = add <4 x i16> %tmp5, %tmp6
289 %tmp9 = add <4 x i16> %tmp7, %tmp8
290 ret <4 x i16> %tmp9
291}
292
293define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
294;CHECK: vld4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000295;Check the alignment value. Max for this instruction is 128 bits:
296;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000297 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000298 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000299 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000300 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
301 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
302 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
303 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000304 %tmp7 = add <2 x i32> %tmp3, %tmp4
305 %tmp8 = add <2 x i32> %tmp5, %tmp6
306 %tmp9 = add <2 x i32> %tmp7, %tmp8
307 ret <2 x i32> %tmp9
308}
309
310define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
311;CHECK: vld4lanef:
312;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000313 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000314 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000315 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000316 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
317 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
318 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
319 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000320 %tmp7 = fadd <2 x float> %tmp3, %tmp4
321 %tmp8 = fadd <2 x float> %tmp5, %tmp6
322 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000323 ret <2 x float> %tmp9
324}
325
Bob Wilson62e053e2009-10-08 22:53:57 +0000326define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
327;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000328;Check the alignment value. Max for this instruction is 64 bits:
329;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000330 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000331 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000332 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000333 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
334 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
335 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
336 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
337 %tmp7 = add <8 x i16> %tmp3, %tmp4
338 %tmp8 = add <8 x i16> %tmp5, %tmp6
339 %tmp9 = add <8 x i16> %tmp7, %tmp8
340 ret <8 x i16> %tmp9
341}
342
343define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
344;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000345;Check the (default) alignment.
346;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000347 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000348 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000349 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000350 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
351 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
352 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
353 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
354 %tmp7 = add <4 x i32> %tmp3, %tmp4
355 %tmp8 = add <4 x i32> %tmp5, %tmp6
356 %tmp9 = add <4 x i32> %tmp7, %tmp8
357 ret <4 x i32> %tmp9
358}
359
360define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
361;CHECK: vld4laneQf:
362;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000363 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000364 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000365 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000366 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
367 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
368 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
369 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000370 %tmp7 = fadd <4 x float> %tmp3, %tmp4
371 %tmp8 = fadd <4 x float> %tmp5, %tmp6
372 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000373 ret <4 x float> %tmp9
374}
375
Bob Wilson7a9ef442010-08-27 17:13:24 +0000376declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
377declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
378declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
379declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000380
Bob Wilson7a9ef442010-08-27 17:13:24 +0000381declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
382declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
383declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly