blob: b32c59019f4cbea802605350ff8431683c1f3462 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonec1d81c2009-10-06 21:16:19 +00003%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
4%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
5%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
6%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +00007
Bob Wilson30aea9d2009-10-08 18:56:10 +00008%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
9%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
10%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
11
Bob Wilsonc0110052009-09-01 04:27:10 +000012define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
13;CHECK: vld2lanei8:
14;CHECK: vld2.8
15 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +000016 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
17 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
18 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000019 %tmp5 = add <8 x i8> %tmp3, %tmp4
20 ret <8 x i8> %tmp5
21}
22
23define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
24;CHECK: vld2lanei16:
25;CHECK: vld2.16
Bob Wilson02170c02010-04-20 00:17:16 +000026 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000027 %tmp1 = load <4 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000028 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000029 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
30 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000031 %tmp5 = add <4 x i16> %tmp3, %tmp4
32 ret <4 x i16> %tmp5
33}
34
35define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
36;CHECK: vld2lanei32:
37;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000038 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000039 %tmp1 = load <2 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000040 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000041 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
42 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000043 %tmp5 = add <2 x i32> %tmp3, %tmp4
44 ret <2 x i32> %tmp5
45}
46
47define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
48;CHECK: vld2lanef:
49;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000050 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000051 %tmp1 = load <2 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000052 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000053 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
54 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +000055 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +000056 ret <2 x float> %tmp5
57}
58
Bob Wilson30aea9d2009-10-08 18:56:10 +000059define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
60;CHECK: vld2laneQi16:
61;CHECK: vld2.16
Bob Wilson02170c02010-04-20 00:17:16 +000062 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +000063 %tmp1 = load <8 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000064 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +000065 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
66 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
67 %tmp5 = add <8 x i16> %tmp3, %tmp4
68 ret <8 x i16> %tmp5
69}
70
71define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
72;CHECK: vld2laneQi32:
73;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000074 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +000075 %tmp1 = load <4 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000076 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
Bob Wilson30aea9d2009-10-08 18:56:10 +000077 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
78 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
79 %tmp5 = add <4 x i32> %tmp3, %tmp4
80 ret <4 x i32> %tmp5
81}
82
83define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
84;CHECK: vld2laneQf:
85;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000086 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +000087 %tmp1 = load <4 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +000088 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +000089 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
90 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +000091 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +000092 ret <4 x float> %tmp5
93}
94
Bob Wilsonec1d81c2009-10-06 21:16:19 +000095declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
96declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
97declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
98declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +000099
Bob Wilson30aea9d2009-10-08 18:56:10 +0000100declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
101declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
102declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
103
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000104%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
105%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
106%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
107%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000108
Bob Wilson0bf7d992009-10-08 22:27:33 +0000109%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
110%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
111%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
112
Bob Wilsonc0110052009-09-01 04:27:10 +0000113define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
114;CHECK: vld3lanei8:
115;CHECK: vld3.8
116 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000117 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
118 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
119 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
120 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000121 %tmp6 = add <8 x i8> %tmp3, %tmp4
122 %tmp7 = add <8 x i8> %tmp5, %tmp6
123 ret <8 x i8> %tmp7
124}
125
126define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
127;CHECK: vld3lanei16:
128;CHECK: vld3.16
Bob Wilson02170c02010-04-20 00:17:16 +0000129 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000130 %tmp1 = load <4 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000131 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000132 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
133 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
134 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000135 %tmp6 = add <4 x i16> %tmp3, %tmp4
136 %tmp7 = add <4 x i16> %tmp5, %tmp6
137 ret <4 x i16> %tmp7
138}
139
140define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
141;CHECK: vld3lanei32:
142;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000143 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000144 %tmp1 = load <2 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000145 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000146 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
147 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
148 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000149 %tmp6 = add <2 x i32> %tmp3, %tmp4
150 %tmp7 = add <2 x i32> %tmp5, %tmp6
151 ret <2 x i32> %tmp7
152}
153
154define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
155;CHECK: vld3lanef:
156;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000157 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000158 %tmp1 = load <2 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000159 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000160 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
161 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
162 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000163 %tmp6 = fadd <2 x float> %tmp3, %tmp4
164 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000165 ret <2 x float> %tmp7
166}
167
Bob Wilson0bf7d992009-10-08 22:27:33 +0000168define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
169;CHECK: vld3laneQi16:
170;CHECK: vld3.16
Bob Wilson02170c02010-04-20 00:17:16 +0000171 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000172 %tmp1 = load <8 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000173 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000174 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
175 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
176 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
177 %tmp6 = add <8 x i16> %tmp3, %tmp4
178 %tmp7 = add <8 x i16> %tmp5, %tmp6
179 ret <8 x i16> %tmp7
180}
181
182define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
183;CHECK: vld3laneQi32:
184;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000185 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000186 %tmp1 = load <4 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000187 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000188 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
189 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
190 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
191 %tmp6 = add <4 x i32> %tmp3, %tmp4
192 %tmp7 = add <4 x i32> %tmp5, %tmp6
193 ret <4 x i32> %tmp7
194}
195
196define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
197;CHECK: vld3laneQf:
198;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000199 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000200 %tmp1 = load <4 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000201 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000202 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
203 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
204 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000205 %tmp6 = fadd <4 x float> %tmp3, %tmp4
206 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000207 ret <4 x float> %tmp7
208}
209
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000210declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
211declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
212declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
213declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000214
Bob Wilson0bf7d992009-10-08 22:27:33 +0000215declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
216declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
217declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
218
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000219%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
220%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
221%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
222%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000223
Bob Wilson62e053e2009-10-08 22:53:57 +0000224%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
225%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
226%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
227
Bob Wilsonc0110052009-09-01 04:27:10 +0000228define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
229;CHECK: vld4lanei8:
230;CHECK: vld4.8
231 %tmp1 = load <8 x i8>* %B
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000232 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
233 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
234 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
235 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
236 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000237 %tmp7 = add <8 x i8> %tmp3, %tmp4
238 %tmp8 = add <8 x i8> %tmp5, %tmp6
239 %tmp9 = add <8 x i8> %tmp7, %tmp8
240 ret <8 x i8> %tmp9
241}
242
243define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
244;CHECK: vld4lanei16:
245;CHECK: vld4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000246 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000247 %tmp1 = load <4 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000248 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000249 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
250 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
251 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
252 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000253 %tmp7 = add <4 x i16> %tmp3, %tmp4
254 %tmp8 = add <4 x i16> %tmp5, %tmp6
255 %tmp9 = add <4 x i16> %tmp7, %tmp8
256 ret <4 x i16> %tmp9
257}
258
259define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
260;CHECK: vld4lanei32:
261;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000262 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000263 %tmp1 = load <2 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000264 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000265 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
266 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
267 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
268 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000269 %tmp7 = add <2 x i32> %tmp3, %tmp4
270 %tmp8 = add <2 x i32> %tmp5, %tmp6
271 %tmp9 = add <2 x i32> %tmp7, %tmp8
272 ret <2 x i32> %tmp9
273}
274
275define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
276;CHECK: vld4lanef:
277;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000278 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000279 %tmp1 = load <2 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000280 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000281 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
282 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
283 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
284 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000285 %tmp7 = fadd <2 x float> %tmp3, %tmp4
286 %tmp8 = fadd <2 x float> %tmp5, %tmp6
287 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000288 ret <2 x float> %tmp9
289}
290
Bob Wilson62e053e2009-10-08 22:53:57 +0000291define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
292;CHECK: vld4laneQi16:
293;CHECK: vld4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000294 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000295 %tmp1 = load <8 x i16>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000296 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000297 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
298 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
299 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
300 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
301 %tmp7 = add <8 x i16> %tmp3, %tmp4
302 %tmp8 = add <8 x i16> %tmp5, %tmp6
303 %tmp9 = add <8 x i16> %tmp7, %tmp8
304 ret <8 x i16> %tmp9
305}
306
307define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
308;CHECK: vld4laneQi32:
309;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000310 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000311 %tmp1 = load <4 x i32>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000312 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000313 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
314 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
315 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
316 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
317 %tmp7 = add <4 x i32> %tmp3, %tmp4
318 %tmp8 = add <4 x i32> %tmp5, %tmp6
319 %tmp9 = add <4 x i32> %tmp7, %tmp8
320 ret <4 x i32> %tmp9
321}
322
323define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
324;CHECK: vld4laneQf:
325;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000326 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000327 %tmp1 = load <4 x float>* %B
Bob Wilson02170c02010-04-20 00:17:16 +0000328 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000329 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
330 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
331 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
332 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000333 %tmp7 = fadd <4 x float> %tmp3, %tmp4
334 %tmp8 = fadd <4 x float> %tmp5, %tmp6
335 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000336 ret <4 x float> %tmp9
337}
338
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000339declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
340declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
341declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
342declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000343
344declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
345declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
346declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly