blob: 53881a3f924e217b4e0020559bb21c0c1cf0bd0f [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonec1d81c2009-10-06 21:16:19 +00003%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
4%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
5%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
6%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +00007
Bob Wilson30aea9d2009-10-08 18:56:10 +00008%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
9%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
10%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
11
Bob Wilsonc0110052009-09-01 04:27:10 +000012define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
13;CHECK: vld2lanei8:
14;CHECK: vld2.8
15 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +000016 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
17 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
18 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000019 %tmp5 = add <8 x i8> %tmp3, %tmp4
20 ret <8 x i8> %tmp5
21}
22
23define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
24;CHECK: vld2lanei16:
25;CHECK: vld2.16
26 %tmp1 = load <4 x i16>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +000027 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
28 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
29 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000030 %tmp5 = add <4 x i16> %tmp3, %tmp4
31 ret <4 x i16> %tmp5
32}
33
34define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
35;CHECK: vld2lanei32:
36;CHECK: vld2.32
37 %tmp1 = load <2 x i32>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +000038 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
39 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
40 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000041 %tmp5 = add <2 x i32> %tmp3, %tmp4
42 ret <2 x i32> %tmp5
43}
44
45define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
46;CHECK: vld2lanef:
47;CHECK: vld2.32
48 %tmp1 = load <2 x float>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +000049 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
50 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
51 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000052 %tmp5 = add <2 x float> %tmp3, %tmp4
53 ret <2 x float> %tmp5
54}
55
Bob Wilson30aea9d2009-10-08 18:56:10 +000056define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
57;CHECK: vld2laneQi16:
58;CHECK: vld2.16
59 %tmp1 = load <8 x i16>* %B
60 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
61 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
62 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
63 %tmp5 = add <8 x i16> %tmp3, %tmp4
64 ret <8 x i16> %tmp5
65}
66
67define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
68;CHECK: vld2laneQi32:
69;CHECK: vld2.32
70 %tmp1 = load <4 x i32>* %B
71 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
72 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
73 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
74 %tmp5 = add <4 x i32> %tmp3, %tmp4
75 ret <4 x i32> %tmp5
76}
77
78define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
79;CHECK: vld2laneQf:
80;CHECK: vld2.32
81 %tmp1 = load <4 x float>* %B
82 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
83 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
84 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
85 %tmp5 = add <4 x float> %tmp3, %tmp4
86 ret <4 x float> %tmp5
87}
88
Bob Wilsonec1d81c2009-10-06 21:16:19 +000089declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
90declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
91declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
92declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +000093
Bob Wilson30aea9d2009-10-08 18:56:10 +000094declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
95declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
96declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
97
Bob Wilsonec1d81c2009-10-06 21:16:19 +000098%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
99%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
100%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
101%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000102
Bob Wilson0bf7d992009-10-08 22:27:33 +0000103%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
104%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
105%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
106
Bob Wilsonc0110052009-09-01 04:27:10 +0000107define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
108;CHECK: vld3lanei8:
109;CHECK: vld3.8
110 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000111 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
112 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
113 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
114 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000115 %tmp6 = add <8 x i8> %tmp3, %tmp4
116 %tmp7 = add <8 x i8> %tmp5, %tmp6
117 ret <8 x i8> %tmp7
118}
119
120define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
121;CHECK: vld3lanei16:
122;CHECK: vld3.16
123 %tmp1 = load <4 x i16>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000124 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
125 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
126 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
127 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000128 %tmp6 = add <4 x i16> %tmp3, %tmp4
129 %tmp7 = add <4 x i16> %tmp5, %tmp6
130 ret <4 x i16> %tmp7
131}
132
133define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
134;CHECK: vld3lanei32:
135;CHECK: vld3.32
136 %tmp1 = load <2 x i32>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000137 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
138 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
139 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
140 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000141 %tmp6 = add <2 x i32> %tmp3, %tmp4
142 %tmp7 = add <2 x i32> %tmp5, %tmp6
143 ret <2 x i32> %tmp7
144}
145
146define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
147;CHECK: vld3lanef:
148;CHECK: vld3.32
149 %tmp1 = load <2 x float>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000150 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
151 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
152 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
153 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000154 %tmp6 = add <2 x float> %tmp3, %tmp4
155 %tmp7 = add <2 x float> %tmp5, %tmp6
156 ret <2 x float> %tmp7
157}
158
Bob Wilson0bf7d992009-10-08 22:27:33 +0000159define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
160;CHECK: vld3laneQi16:
161;CHECK: vld3.16
162 %tmp1 = load <8 x i16>* %B
163 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
164 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
165 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
166 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
167 %tmp6 = add <8 x i16> %tmp3, %tmp4
168 %tmp7 = add <8 x i16> %tmp5, %tmp6
169 ret <8 x i16> %tmp7
170}
171
172define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
173;CHECK: vld3laneQi32:
174;CHECK: vld3.32
175 %tmp1 = load <4 x i32>* %B
176 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
177 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
178 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
179 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
180 %tmp6 = add <4 x i32> %tmp3, %tmp4
181 %tmp7 = add <4 x i32> %tmp5, %tmp6
182 ret <4 x i32> %tmp7
183}
184
185define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
186;CHECK: vld3laneQf:
187;CHECK: vld3.32
188 %tmp1 = load <4 x float>* %B
189 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
190 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
191 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
192 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
193 %tmp6 = add <4 x float> %tmp3, %tmp4
194 %tmp7 = add <4 x float> %tmp5, %tmp6
195 ret <4 x float> %tmp7
196}
197
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000198declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
199declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
200declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
201declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000202
Bob Wilson0bf7d992009-10-08 22:27:33 +0000203declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
204declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
205declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
206
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000207%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
208%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
209%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
210%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000211
Bob Wilson62e053e2009-10-08 22:53:57 +0000212%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
213%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
214%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
215
Bob Wilsonc0110052009-09-01 04:27:10 +0000216define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
217;CHECK: vld4lanei8:
218;CHECK: vld4.8
219 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000220 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
221 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
222 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
223 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
224 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000225 %tmp7 = add <8 x i8> %tmp3, %tmp4
226 %tmp8 = add <8 x i8> %tmp5, %tmp6
227 %tmp9 = add <8 x i8> %tmp7, %tmp8
228 ret <8 x i8> %tmp9
229}
230
231define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
232;CHECK: vld4lanei16:
233;CHECK: vld4.16
234 %tmp1 = load <4 x i16>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000235 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
236 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
237 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
238 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
239 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000240 %tmp7 = add <4 x i16> %tmp3, %tmp4
241 %tmp8 = add <4 x i16> %tmp5, %tmp6
242 %tmp9 = add <4 x i16> %tmp7, %tmp8
243 ret <4 x i16> %tmp9
244}
245
246define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
247;CHECK: vld4lanei32:
248;CHECK: vld4.32
249 %tmp1 = load <2 x i32>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000250 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
251 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
252 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
253 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
254 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000255 %tmp7 = add <2 x i32> %tmp3, %tmp4
256 %tmp8 = add <2 x i32> %tmp5, %tmp6
257 %tmp9 = add <2 x i32> %tmp7, %tmp8
258 ret <2 x i32> %tmp9
259}
260
261define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
262;CHECK: vld4lanef:
263;CHECK: vld4.32
264 %tmp1 = load <2 x float>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000265 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
266 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
267 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
268 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
269 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000270 %tmp7 = add <2 x float> %tmp3, %tmp4
271 %tmp8 = add <2 x float> %tmp5, %tmp6
272 %tmp9 = add <2 x float> %tmp7, %tmp8
273 ret <2 x float> %tmp9
274}
275
Bob Wilson62e053e2009-10-08 22:53:57 +0000276define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
277;CHECK: vld4laneQi16:
278;CHECK: vld4.16
279 %tmp1 = load <8 x i16>* %B
280 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
281 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
282 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
283 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
284 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
285 %tmp7 = add <8 x i16> %tmp3, %tmp4
286 %tmp8 = add <8 x i16> %tmp5, %tmp6
287 %tmp9 = add <8 x i16> %tmp7, %tmp8
288 ret <8 x i16> %tmp9
289}
290
291define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
292;CHECK: vld4laneQi32:
293;CHECK: vld4.32
294 %tmp1 = load <4 x i32>* %B
295 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
296 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
297 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
298 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
299 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
300 %tmp7 = add <4 x i32> %tmp3, %tmp4
301 %tmp8 = add <4 x i32> %tmp5, %tmp6
302 %tmp9 = add <4 x i32> %tmp7, %tmp8
303 ret <4 x i32> %tmp9
304}
305
306define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
307;CHECK: vld4laneQf:
308;CHECK: vld4.32
309 %tmp1 = load <4 x float>* %B
310 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
311 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
312 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
313 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
314 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
315 %tmp7 = add <4 x float> %tmp3, %tmp4
316 %tmp8 = add <4 x float> %tmp5, %tmp6
317 %tmp9 = add <4 x float> %tmp7, %tmp8
318 ret <4 x float> %tmp9
319}
320
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000321declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
322declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
323declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
324declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000325
326declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
327declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
328declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly