blob: 2b2c0d78a2ffb1023b8248920bb83f413de5e49d [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Jakob Stoklund Olesenca6fd002011-03-31 22:14:03 +00002; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00003
Bob Wilsonb796bbb2010-11-01 22:04:05 +00004define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
5;CHECK: vld1lanei8:
Bob Wilson665814b2010-11-01 23:40:51 +00006;Check the (default) alignment value.
Bob Wilsonb796bbb2010-11-01 22:04:05 +00007;CHECK: vld1.8 {d16[3]}, [r0]
8 %tmp1 = load <8 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +00009 %tmp2 = load i8* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000010 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
11 ret <8 x i8> %tmp3
12}
13
14define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
15;CHECK: vld1lanei16:
Bob Wilson665814b2010-11-01 23:40:51 +000016;Check the alignment value. Max for this instruction is 16 bits:
17;CHECK: vld1.16 {d16[2]}, [r0, :16]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000018 %tmp1 = load <4 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000019 %tmp2 = load i16* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000020 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
21 ret <4 x i16> %tmp3
22}
23
24define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
25;CHECK: vld1lanei32:
Bob Wilsond168cef2010-11-03 16:24:53 +000026;Check the alignment value. Max for this instruction is 32 bits:
Bob Wilson665814b2010-11-01 23:40:51 +000027;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000028 %tmp1 = load <2 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000029 %tmp2 = load i32* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000030 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
31 ret <2 x i32> %tmp3
32}
33
Bob Wilson746fa172010-12-10 22:13:32 +000034define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
35;CHECK: vld1lanef:
36;CHECK: vld1.32 {d16[1]}, [r0]
37 %tmp1 = load <2 x float>* %B
38 %tmp2 = load float* %A, align 4
39 %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
40 ret <2 x float> %tmp3
41}
42
Bob Wilsonbaf06152010-11-01 23:40:46 +000043define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
44;CHECK: vld1laneQi8:
45;CHECK: vld1.8 {d17[1]}, [r0]
46 %tmp1 = load <16 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000047 %tmp2 = load i8* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000048 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
49 ret <16 x i8> %tmp3
50}
51
52define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
53;CHECK: vld1laneQi16:
Bob Wilson665814b2010-11-01 23:40:51 +000054;CHECK: vld1.16 {d17[1]}, [r0, :16]
Bob Wilsonbaf06152010-11-01 23:40:46 +000055 %tmp1 = load <8 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000056 %tmp2 = load i16* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000057 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
58 ret <8 x i16> %tmp3
59}
60
61define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
62;CHECK: vld1laneQi32:
Bob Wilson665814b2010-11-01 23:40:51 +000063;CHECK: vld1.32 {d17[1]}, [r0, :32]
Bob Wilsonbaf06152010-11-01 23:40:46 +000064 %tmp1 = load <4 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000065 %tmp2 = load i32* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000066 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
67 ret <4 x i32> %tmp3
68}
69
Bob Wilson746fa172010-12-10 22:13:32 +000070define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
71;CHECK: vld1laneQf:
72;CHECK: vld1.32 {d16[0]}, [r0]
73 %tmp1 = load <4 x float>* %B
74 %tmp2 = load float* %A
75 %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
76 ret <4 x float> %tmp3
77}
78
Bob Wilsonec1d81c2009-10-06 21:16:19 +000079%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
80%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
81%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
82%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000083
Bob Wilson30aea9d2009-10-08 18:56:10 +000084%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
85%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
86%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
87
Bob Wilsonc0110052009-09-01 04:27:10 +000088define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
89;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000090;Check the alignment value. Max for this instruction is 16 bits:
91;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000092 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000093 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000094 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
95 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000096 %tmp5 = add <8 x i8> %tmp3, %tmp4
97 ret <8 x i8> %tmp5
98}
99
100define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
101;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000102;Check the alignment value. Max for this instruction is 32 bits:
103;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +0000104 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000105 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000106 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000107 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
108 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000109 %tmp5 = add <4 x i16> %tmp3, %tmp4
110 ret <4 x i16> %tmp5
111}
112
113define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
114;CHECK: vld2lanei32:
115;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000116 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000117 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000118 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000119 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
120 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000121 %tmp5 = add <2 x i32> %tmp3, %tmp4
122 ret <2 x i32> %tmp5
123}
124
Bob Wilson1c3ef902011-02-07 17:43:21 +0000125;Check for a post-increment updating load.
126define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
127;CHECK: vld2lanei32_update:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000128;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
Bob Wilson1c3ef902011-02-07 17:43:21 +0000129 %A = load i32** %ptr
130 %tmp0 = bitcast i32* %A to i8*
131 %tmp1 = load <2 x i32>* %B
132 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
133 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
134 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
135 %tmp5 = add <2 x i32> %tmp3, %tmp4
136 %tmp6 = getelementptr i32* %A, i32 2
137 store i32* %tmp6, i32** %ptr
138 ret <2 x i32> %tmp5
139}
140
Bob Wilsonc0110052009-09-01 04:27:10 +0000141define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
142;CHECK: vld2lanef:
143;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000144 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000145 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000146 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000147 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
148 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000149 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000150 ret <2 x float> %tmp5
151}
152
Bob Wilson30aea9d2009-10-08 18:56:10 +0000153define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
154;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000155;Check the (default) alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000156;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000157 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000158 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000159 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000160 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
161 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
162 %tmp5 = add <8 x i16> %tmp3, %tmp4
163 ret <8 x i16> %tmp5
164}
165
166define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
167;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000168;Check the alignment value. Max for this instruction is 64 bits:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000169;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000170 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000171 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000172 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000173 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
174 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
175 %tmp5 = add <4 x i32> %tmp3, %tmp4
176 ret <4 x i32> %tmp5
177}
178
179define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
180;CHECK: vld2laneQf:
181;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000182 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000183 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000184 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000185 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
186 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000187 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000188 ret <4 x float> %tmp5
189}
190
Bob Wilson7a9ef442010-08-27 17:13:24 +0000191declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
192declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
193declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
194declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000195
Bob Wilson7a9ef442010-08-27 17:13:24 +0000196declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
197declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
198declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000199
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000200%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
201%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
202%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
203%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000204
Bob Wilson0bf7d992009-10-08 22:27:33 +0000205%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
206%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
207%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
208
Bob Wilsonc0110052009-09-01 04:27:10 +0000209define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
210;CHECK: vld3lanei8:
211;CHECK: vld3.8
212 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000213 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000214 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
215 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
216 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000217 %tmp6 = add <8 x i8> %tmp3, %tmp4
218 %tmp7 = add <8 x i8> %tmp5, %tmp6
219 ret <8 x i8> %tmp7
220}
221
222define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
223;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000224;Check the (default) alignment value. VLD3 does not support alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000225;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000226 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000227 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000228 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000229 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
230 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
231 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000232 %tmp6 = add <4 x i16> %tmp3, %tmp4
233 %tmp7 = add <4 x i16> %tmp5, %tmp6
234 ret <4 x i16> %tmp7
235}
236
237define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
238;CHECK: vld3lanei32:
239;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000240 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000241 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000242 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000243 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
244 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
245 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000246 %tmp6 = add <2 x i32> %tmp3, %tmp4
247 %tmp7 = add <2 x i32> %tmp5, %tmp6
248 ret <2 x i32> %tmp7
249}
250
251define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
252;CHECK: vld3lanef:
253;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000254 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000255 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000256 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000257 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
258 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
259 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000260 %tmp6 = fadd <2 x float> %tmp3, %tmp4
261 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000262 ret <2 x float> %tmp7
263}
264
Bob Wilson0bf7d992009-10-08 22:27:33 +0000265define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
266;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000267;Check the (default) alignment value. VLD3 does not support alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000268;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000269 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000270 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000271 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000272 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
273 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
274 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
275 %tmp6 = add <8 x i16> %tmp3, %tmp4
276 %tmp7 = add <8 x i16> %tmp5, %tmp6
277 ret <8 x i16> %tmp7
278}
279
Bob Wilson1c3ef902011-02-07 17:43:21 +0000280;Check for a post-increment updating load with register increment.
281define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
282;CHECK: vld3laneQi16_update:
Jakob Stoklund Olesenca6fd002011-03-31 22:14:03 +0000283;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
Bob Wilson1c3ef902011-02-07 17:43:21 +0000284 %A = load i16** %ptr
285 %tmp0 = bitcast i16* %A to i8*
286 %tmp1 = load <8 x i16>* %B
287 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
288 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
289 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
290 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
291 %tmp6 = add <8 x i16> %tmp3, %tmp4
292 %tmp7 = add <8 x i16> %tmp5, %tmp6
293 %tmp8 = getelementptr i16* %A, i32 %inc
294 store i16* %tmp8, i16** %ptr
295 ret <8 x i16> %tmp7
296}
297
Bob Wilson0bf7d992009-10-08 22:27:33 +0000298define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
299;CHECK: vld3laneQi32:
300;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000301 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000302 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000303 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000304 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
305 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
306 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
307 %tmp6 = add <4 x i32> %tmp3, %tmp4
308 %tmp7 = add <4 x i32> %tmp5, %tmp6
309 ret <4 x i32> %tmp7
310}
311
312define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
313;CHECK: vld3laneQf:
314;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000315 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000316 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000317 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000318 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
319 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
320 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000321 %tmp6 = fadd <4 x float> %tmp3, %tmp4
322 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000323 ret <4 x float> %tmp7
324}
325
Bob Wilson7a9ef442010-08-27 17:13:24 +0000326declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
327declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
328declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
329declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000330
Bob Wilson7a9ef442010-08-27 17:13:24 +0000331declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
332declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
333declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000334
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000335%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
336%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
337%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
338%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000339
Bob Wilson62e053e2009-10-08 22:53:57 +0000340%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
341%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
342%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
343
Bob Wilsonc0110052009-09-01 04:27:10 +0000344define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
345;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000346;Check the alignment value. Max for this instruction is 32 bits:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000347;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000348 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000349 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000350 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
351 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
352 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
353 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000354 %tmp7 = add <8 x i8> %tmp3, %tmp4
355 %tmp8 = add <8 x i8> %tmp5, %tmp6
356 %tmp9 = add <8 x i8> %tmp7, %tmp8
357 ret <8 x i8> %tmp9
358}
359
Bob Wilson1c3ef902011-02-07 17:43:21 +0000360;Check for a post-increment updating load.
361define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
362;CHECK: vld4lanei8_update:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000363;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
Bob Wilson1c3ef902011-02-07 17:43:21 +0000364 %A = load i8** %ptr
365 %tmp1 = load <8 x i8>* %B
366 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
367 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
368 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
369 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
370 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
371 %tmp7 = add <8 x i8> %tmp3, %tmp4
372 %tmp8 = add <8 x i8> %tmp5, %tmp6
373 %tmp9 = add <8 x i8> %tmp7, %tmp8
374 %tmp10 = getelementptr i8* %A, i32 4
375 store i8* %tmp10, i8** %ptr
376 ret <8 x i8> %tmp9
377}
378
Bob Wilsonc0110052009-09-01 04:27:10 +0000379define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
380;CHECK: vld4lanei16:
Bob Wilsona92bac62010-12-10 19:37:42 +0000381;Check that a power-of-two alignment smaller than the total size of the memory
382;being loaded is ignored.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000383;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000384 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000385 %tmp1 = load <4 x i16>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000386 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000387 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
388 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
389 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
390 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000391 %tmp7 = add <4 x i16> %tmp3, %tmp4
392 %tmp8 = add <4 x i16> %tmp5, %tmp6
393 %tmp9 = add <4 x i16> %tmp7, %tmp8
394 ret <4 x i16> %tmp9
395}
396
397define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
398;CHECK: vld4lanei32:
Bob Wilsona92bac62010-12-10 19:37:42 +0000399;Check the alignment value. An 8-byte alignment is allowed here even though
400;it is smaller than the total size of the memory being loaded.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000401;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000402 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000403 %tmp1 = load <2 x i32>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000404 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000405 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
406 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
407 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
408 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000409 %tmp7 = add <2 x i32> %tmp3, %tmp4
410 %tmp8 = add <2 x i32> %tmp5, %tmp6
411 %tmp9 = add <2 x i32> %tmp7, %tmp8
412 ret <2 x i32> %tmp9
413}
414
415define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
416;CHECK: vld4lanef:
417;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000418 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000419 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000420 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000421 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
422 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
423 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
424 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000425 %tmp7 = fadd <2 x float> %tmp3, %tmp4
426 %tmp8 = fadd <2 x float> %tmp5, %tmp6
427 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000428 ret <2 x float> %tmp9
429}
430
Bob Wilson62e053e2009-10-08 22:53:57 +0000431define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
432;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000433;Check the alignment value. Max for this instruction is 64 bits:
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000434;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000435 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000436 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000437 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000438 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
439 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
440 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
441 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
442 %tmp7 = add <8 x i16> %tmp3, %tmp4
443 %tmp8 = add <8 x i16> %tmp5, %tmp6
444 %tmp9 = add <8 x i16> %tmp7, %tmp8
445 ret <8 x i16> %tmp9
446}
447
448define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
449;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000450;Check the (default) alignment.
Jakob Stoklund Olesenfeaf3472011-05-03 22:31:21 +0000451;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
Bob Wilson02170c02010-04-20 00:17:16 +0000452 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000453 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000454 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000455 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
456 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
457 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
458 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
459 %tmp7 = add <4 x i32> %tmp3, %tmp4
460 %tmp8 = add <4 x i32> %tmp5, %tmp6
461 %tmp9 = add <4 x i32> %tmp7, %tmp8
462 ret <4 x i32> %tmp9
463}
464
465define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
466;CHECK: vld4laneQf:
467;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000468 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000469 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000470 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000471 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
472 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
473 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
474 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000475 %tmp7 = fadd <4 x float> %tmp3, %tmp4
476 %tmp8 = fadd <4 x float> %tmp5, %tmp6
477 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000478 ret <4 x float> %tmp9
479}
480
Bob Wilson7a9ef442010-08-27 17:13:24 +0000481declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
482declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
483declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
484declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000485
Bob Wilson7a9ef442010-08-27 17:13:24 +0000486declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
487declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
488declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson495de3b2010-12-17 01:21:12 +0000489
490; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
491; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
492; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low
493; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
Evan Chengb58a3402011-04-19 00:04:03 +0000494define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
Bob Wilson495de3b2010-12-17 01:21:12 +0000495;CHECK: test_qqqq_regsequence_subreg
496;CHECK: vld3.16
497 %tmp63 = extractvalue [6 x i64] %b, 5
498 %tmp64 = zext i64 %tmp63 to i128
499 %tmp65 = shl i128 %tmp64, 64
500 %ins67 = or i128 %tmp65, 0
501 %tmp78 = bitcast i128 %ins67 to <8 x i16>
502 %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
Evan Chengb58a3402011-04-19 00:04:03 +0000503 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
504 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
505 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
506 %tmp6 = add <8 x i16> %tmp3, %tmp4
507 %tmp7 = add <8 x i16> %tmp5, %tmp6
508 ret <8 x i16> %tmp7
Bob Wilson495de3b2010-12-17 01:21:12 +0000509}
510
511declare void @llvm.trap() nounwind