blob: 7bd0cbda02b1dc8728c33df0f6a864496a1919b0 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Jakob Stoklund Olesenca6fd002011-03-31 22:14:03 +00002; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00003
Bob Wilsonb796bbb2010-11-01 22:04:05 +00004define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
5;CHECK: vld1lanei8:
Bob Wilson665814b2010-11-01 23:40:51 +00006;Check the (default) alignment value.
Bob Wilsonb796bbb2010-11-01 22:04:05 +00007;CHECK: vld1.8 {d16[3]}, [r0]
8 %tmp1 = load <8 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +00009 %tmp2 = load i8* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000010 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
11 ret <8 x i8> %tmp3
12}
13
14define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
15;CHECK: vld1lanei16:
Bob Wilson665814b2010-11-01 23:40:51 +000016;Check the alignment value. Max for this instruction is 16 bits:
17;CHECK: vld1.16 {d16[2]}, [r0, :16]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000018 %tmp1 = load <4 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000019 %tmp2 = load i16* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000020 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
21 ret <4 x i16> %tmp3
22}
23
24define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
25;CHECK: vld1lanei32:
Bob Wilsond168cef2010-11-03 16:24:53 +000026;Check the alignment value. Max for this instruction is 32 bits:
Bob Wilson665814b2010-11-01 23:40:51 +000027;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000028 %tmp1 = load <2 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000029 %tmp2 = load i32* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000030 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
31 ret <2 x i32> %tmp3
32}
33
Jakob Stoklund Olesenb0117ee2011-10-27 22:39:16 +000034define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
35;CHECK: vld1lanei32a32:
36;Check the alignment value. Legal values are none or :32.
37;CHECK: vld1.32 {d16[1]}, [r0, :32]
38 %tmp1 = load <2 x i32>* %B
39 %tmp2 = load i32* %A, align 4
40 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
41 ret <2 x i32> %tmp3
42}
43
Bob Wilson746fa172010-12-10 22:13:32 +000044define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
45;CHECK: vld1lanef:
Jakob Stoklund Olesenb0117ee2011-10-27 22:39:16 +000046;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilson746fa172010-12-10 22:13:32 +000047 %tmp1 = load <2 x float>* %B
48 %tmp2 = load float* %A, align 4
49 %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
50 ret <2 x float> %tmp3
51}
52
Bob Wilsonbaf06152010-11-01 23:40:46 +000053define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
54;CHECK: vld1laneQi8:
55;CHECK: vld1.8 {d17[1]}, [r0]
56 %tmp1 = load <16 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000057 %tmp2 = load i8* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000058 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
59 ret <16 x i8> %tmp3
60}
61
62define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
63;CHECK: vld1laneQi16:
Bob Wilson665814b2010-11-01 23:40:51 +000064;CHECK: vld1.16 {d17[1]}, [r0, :16]
Bob Wilsonbaf06152010-11-01 23:40:46 +000065 %tmp1 = load <8 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000066 %tmp2 = load i16* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000067 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
68 ret <8 x i16> %tmp3
69}
70
71define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
72;CHECK: vld1laneQi32:
Bob Wilson665814b2010-11-01 23:40:51 +000073;CHECK: vld1.32 {d17[1]}, [r0, :32]
Bob Wilsonbaf06152010-11-01 23:40:46 +000074 %tmp1 = load <4 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000075 %tmp2 = load i32* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000076 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
77 ret <4 x i32> %tmp3
78}
79
Bob Wilson746fa172010-12-10 22:13:32 +000080define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
81;CHECK: vld1laneQf:
Jakob Stoklund Olesenb0117ee2011-10-27 22:39:16 +000082;CHECK: vld1.32 {d16[0]}, [r0, :32]
Bob Wilson746fa172010-12-10 22:13:32 +000083 %tmp1 = load <4 x float>* %B
84 %tmp2 = load float* %A
85 %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
86 ret <4 x float> %tmp3
87}
88
Bob Wilsonec1d81c2009-10-06 21:16:19 +000089%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
90%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
91%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
92%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000093
Bob Wilson30aea9d2009-10-08 18:56:10 +000094%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
95%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
96%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
97
Bob Wilsonc0110052009-09-01 04:27:10 +000098define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
99;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000100;Check the alignment value. Max for this instruction is 16 bits:
101;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +0000102 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000103 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000104 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
105 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000106 %tmp5 = add <8 x i8> %tmp3, %tmp4
107 ret <8 x i8> %tmp5
108}
109
110define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
111;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000112;Check the alignment value. Max for this instruction is 32 bits:
113;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +0000114 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000115 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000116 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000117 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
118 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000119 %tmp5 = add <4 x i16> %tmp3, %tmp4
120 ret <4 x i16> %tmp5
121}
122
123define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
124;CHECK: vld2lanei32:
125;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000126 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000127 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000128 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000129 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
130 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000131 %tmp5 = add <2 x i32> %tmp3, %tmp4
132 ret <2 x i32> %tmp5
133}
134
Bob Wilson1c3ef902011-02-07 17:43:21 +0000135;Check for a post-increment updating load.
136define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
137;CHECK: vld2lanei32_update:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000138;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
Bob Wilson1c3ef902011-02-07 17:43:21 +0000139 %A = load i32** %ptr
140 %tmp0 = bitcast i32* %A to i8*
141 %tmp1 = load <2 x i32>* %B
142 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
143 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
144 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
145 %tmp5 = add <2 x i32> %tmp3, %tmp4
146 %tmp6 = getelementptr i32* %A, i32 2
147 store i32* %tmp6, i32** %ptr
148 ret <2 x i32> %tmp5
149}
150
Bob Wilsonc0110052009-09-01 04:27:10 +0000151define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
152;CHECK: vld2lanef:
153;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000154 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000155 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000156 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000157 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
158 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000159 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000160 ret <2 x float> %tmp5
161}
162
Bob Wilson30aea9d2009-10-08 18:56:10 +0000163define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
164;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000165;Check the (default) alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000166;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000167 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000168 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000169 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000170 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
171 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
172 %tmp5 = add <8 x i16> %tmp3, %tmp4
173 ret <8 x i16> %tmp5
174}
175
176define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
177;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000178;Check the alignment value. Max for this instruction is 64 bits:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000179;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000180 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000181 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000182 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000183 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
184 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
185 %tmp5 = add <4 x i32> %tmp3, %tmp4
186 ret <4 x i32> %tmp5
187}
188
189define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
190;CHECK: vld2laneQf:
191;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000192 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000193 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000194 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000195 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
196 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000197 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000198 ret <4 x float> %tmp5
199}
200
Bob Wilson7a9ef442010-08-27 17:13:24 +0000201declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
202declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
203declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
204declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000205
Bob Wilson7a9ef442010-08-27 17:13:24 +0000206declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
207declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
208declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000209
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000210%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
211%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
212%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
213%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000214
Bob Wilson0bf7d992009-10-08 22:27:33 +0000215%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
216%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
217%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
218
Bob Wilsonc0110052009-09-01 04:27:10 +0000219define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
220;CHECK: vld3lanei8:
221;CHECK: vld3.8
222 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000223 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000224 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
225 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
226 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000227 %tmp6 = add <8 x i8> %tmp3, %tmp4
228 %tmp7 = add <8 x i8> %tmp5, %tmp6
229 ret <8 x i8> %tmp7
230}
231
232define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
233;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000234;Check the (default) alignment value. VLD3 does not support alignment.
Jakob Stoklund Olesenf5497fb2011-05-16 23:50:05 +0000235;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000236 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000237 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000238 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000239 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
240 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
241 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000242 %tmp6 = add <4 x i16> %tmp3, %tmp4
243 %tmp7 = add <4 x i16> %tmp5, %tmp6
244 ret <4 x i16> %tmp7
245}
246
247define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
248;CHECK: vld3lanei32:
249;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000250 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000251 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000252 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000253 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
254 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
255 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000256 %tmp6 = add <2 x i32> %tmp3, %tmp4
257 %tmp7 = add <2 x i32> %tmp5, %tmp6
258 ret <2 x i32> %tmp7
259}
260
261define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
262;CHECK: vld3lanef:
263;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000264 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000265 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000266 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000267 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
268 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
269 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000270 %tmp6 = fadd <2 x float> %tmp3, %tmp4
271 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000272 ret <2 x float> %tmp7
273}
274
Bob Wilson0bf7d992009-10-08 22:27:33 +0000275define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
276;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000277;Check the (default) alignment value. VLD3 does not support alignment.
Jakob Stoklund Olesenf5497fb2011-05-16 23:50:05 +0000278;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000279 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000280 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000281 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000282 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
283 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
284 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
285 %tmp6 = add <8 x i16> %tmp3, %tmp4
286 %tmp7 = add <8 x i16> %tmp5, %tmp6
287 ret <8 x i16> %tmp7
288}
289
Bob Wilson1c3ef902011-02-07 17:43:21 +0000290;Check for a post-increment updating load with register increment.
291define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
292;CHECK: vld3laneQi16_update:
Jakob Stoklund Olesenf5497fb2011-05-16 23:50:05 +0000293;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
Bob Wilson1c3ef902011-02-07 17:43:21 +0000294 %A = load i16** %ptr
295 %tmp0 = bitcast i16* %A to i8*
296 %tmp1 = load <8 x i16>* %B
297 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
298 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
299 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
300 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
301 %tmp6 = add <8 x i16> %tmp3, %tmp4
302 %tmp7 = add <8 x i16> %tmp5, %tmp6
303 %tmp8 = getelementptr i16* %A, i32 %inc
304 store i16* %tmp8, i16** %ptr
305 ret <8 x i16> %tmp7
306}
307
Bob Wilson0bf7d992009-10-08 22:27:33 +0000308define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
309;CHECK: vld3laneQi32:
310;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000311 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000312 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000313 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000314 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
315 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
316 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
317 %tmp6 = add <4 x i32> %tmp3, %tmp4
318 %tmp7 = add <4 x i32> %tmp5, %tmp6
319 ret <4 x i32> %tmp7
320}
321
322define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
323;CHECK: vld3laneQf:
324;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000325 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000326 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000327 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000328 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
329 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
330 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000331 %tmp6 = fadd <4 x float> %tmp3, %tmp4
332 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000333 ret <4 x float> %tmp7
334}
335
Bob Wilson7a9ef442010-08-27 17:13:24 +0000336declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
337declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
338declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
339declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000340
Bob Wilson7a9ef442010-08-27 17:13:24 +0000341declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
342declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
343declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000344
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000345%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
346%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
347%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
348%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000349
Bob Wilson62e053e2009-10-08 22:53:57 +0000350%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
351%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
352%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
353
Bob Wilsonc0110052009-09-01 04:27:10 +0000354define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
355;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000356;Check the alignment value. Max for this instruction is 32 bits:
Jakob Stoklund Olesenf5497fb2011-05-16 23:50:05 +0000357;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000358 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000359 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000360 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
361 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
362 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
363 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000364 %tmp7 = add <8 x i8> %tmp3, %tmp4
365 %tmp8 = add <8 x i8> %tmp5, %tmp6
366 %tmp9 = add <8 x i8> %tmp7, %tmp8
367 ret <8 x i8> %tmp9
368}
369
Bob Wilson1c3ef902011-02-07 17:43:21 +0000370;Check for a post-increment updating load.
371define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
372;CHECK: vld4lanei8_update:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000373;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
Bob Wilson1c3ef902011-02-07 17:43:21 +0000374 %A = load i8** %ptr
375 %tmp1 = load <8 x i8>* %B
376 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
377 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
378 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
379 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
380 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
381 %tmp7 = add <8 x i8> %tmp3, %tmp4
382 %tmp8 = add <8 x i8> %tmp5, %tmp6
383 %tmp9 = add <8 x i8> %tmp7, %tmp8
384 %tmp10 = getelementptr i8* %A, i32 4
385 store i8* %tmp10, i8** %ptr
386 ret <8 x i8> %tmp9
387}
388
Bob Wilsonc0110052009-09-01 04:27:10 +0000389define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
390;CHECK: vld4lanei16:
Bob Wilsona92bac62010-12-10 19:37:42 +0000391;Check that a power-of-two alignment smaller than the total size of the memory
392;being loaded is ignored.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000393;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000394 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000395 %tmp1 = load <4 x i16>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000396 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000397 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
398 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
399 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
400 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000401 %tmp7 = add <4 x i16> %tmp3, %tmp4
402 %tmp8 = add <4 x i16> %tmp5, %tmp6
403 %tmp9 = add <4 x i16> %tmp7, %tmp8
404 ret <4 x i16> %tmp9
405}
406
407define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
408;CHECK: vld4lanei32:
Bob Wilsona92bac62010-12-10 19:37:42 +0000409;Check the alignment value. An 8-byte alignment is allowed here even though
410;it is smaller than the total size of the memory being loaded.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000411;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000412 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000413 %tmp1 = load <2 x i32>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000414 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000415 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
416 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
417 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
418 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000419 %tmp7 = add <2 x i32> %tmp3, %tmp4
420 %tmp8 = add <2 x i32> %tmp5, %tmp6
421 %tmp9 = add <2 x i32> %tmp7, %tmp8
422 ret <2 x i32> %tmp9
423}
424
425define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
426;CHECK: vld4lanef:
427;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000428 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000429 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000430 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000431 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
432 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
433 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
434 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000435 %tmp7 = fadd <2 x float> %tmp3, %tmp4
436 %tmp8 = fadd <2 x float> %tmp5, %tmp6
437 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000438 ret <2 x float> %tmp9
439}
440
Bob Wilson62e053e2009-10-08 22:53:57 +0000441define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
442;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000443;Check the alignment value. Max for this instruction is 64 bits:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000444;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000445 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000446 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000447 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000448 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
449 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
450 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
451 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
452 %tmp7 = add <8 x i16> %tmp3, %tmp4
453 %tmp8 = add <8 x i16> %tmp5, %tmp6
454 %tmp9 = add <8 x i16> %tmp7, %tmp8
455 ret <8 x i16> %tmp9
456}
457
458define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
459;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000460;Check the (default) alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000461;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000462 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000463 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000464 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000465 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
466 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
467 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
468 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
469 %tmp7 = add <4 x i32> %tmp3, %tmp4
470 %tmp8 = add <4 x i32> %tmp5, %tmp6
471 %tmp9 = add <4 x i32> %tmp7, %tmp8
472 ret <4 x i32> %tmp9
473}
474
475define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
476;CHECK: vld4laneQf:
477;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000478 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000479 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000480 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000481 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
482 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
483 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
484 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000485 %tmp7 = fadd <4 x float> %tmp3, %tmp4
486 %tmp8 = fadd <4 x float> %tmp5, %tmp6
487 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000488 ret <4 x float> %tmp9
489}
490
Bob Wilson7a9ef442010-08-27 17:13:24 +0000491declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
492declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
493declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
494declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000495
Bob Wilson7a9ef442010-08-27 17:13:24 +0000496declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
497declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
498declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson495de3b2010-12-17 01:21:12 +0000499
500; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
501; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
502; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low
503; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
Evan Chengb58a3402011-04-19 00:04:03 +0000504define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
Bob Wilson495de3b2010-12-17 01:21:12 +0000505;CHECK: test_qqqq_regsequence_subreg
506;CHECK: vld3.16
507 %tmp63 = extractvalue [6 x i64] %b, 5
508 %tmp64 = zext i64 %tmp63 to i128
509 %tmp65 = shl i128 %tmp64, 64
510 %ins67 = or i128 %tmp65, 0
511 %tmp78 = bitcast i128 %ins67 to <8 x i16>
512 %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
Evan Chengb58a3402011-04-19 00:04:03 +0000513 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
514 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
515 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
516 %tmp6 = add <8 x i16> %tmp3, %tmp4
517 %tmp7 = add <8 x i16> %tmp5, %tmp6
518 ret <8 x i16> %tmp7
Bob Wilson495de3b2010-12-17 01:21:12 +0000519}
520
521declare void @llvm.trap() nounwind