blob: c5514a63fd2d9a0cb676e6cbb00718eacd5d73ed [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonb796bbb2010-11-01 22:04:05 +00003define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vld1lanei8:
Bob Wilson665814b2010-11-01 23:40:51 +00005;Check the (default) alignment value.
Bob Wilsonb796bbb2010-11-01 22:04:05 +00006;CHECK: vld1.8 {d16[3]}, [r0]
7 %tmp1 = load <8 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +00008 %tmp2 = load i8* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +00009 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
10 ret <8 x i8> %tmp3
11}
12
13define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
14;CHECK: vld1lanei16:
Bob Wilson665814b2010-11-01 23:40:51 +000015;Check the alignment value. Max for this instruction is 16 bits:
16;CHECK: vld1.16 {d16[2]}, [r0, :16]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000017 %tmp1 = load <4 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000018 %tmp2 = load i16* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000019 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
20 ret <4 x i16> %tmp3
21}
22
23define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
24;CHECK: vld1lanei32:
Bob Wilsond168cef2010-11-03 16:24:53 +000025;Check the alignment value. Max for this instruction is 32 bits:
Bob Wilson665814b2010-11-01 23:40:51 +000026;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000027 %tmp1 = load <2 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000028 %tmp2 = load i32* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000029 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
30 ret <2 x i32> %tmp3
31}
32
Bob Wilson746fa172010-12-10 22:13:32 +000033define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
34;CHECK: vld1lanef:
35;CHECK: vld1.32 {d16[1]}, [r0]
36 %tmp1 = load <2 x float>* %B
37 %tmp2 = load float* %A, align 4
38 %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
39 ret <2 x float> %tmp3
40}
41
Bob Wilsonbaf06152010-11-01 23:40:46 +000042define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
43;CHECK: vld1laneQi8:
44;CHECK: vld1.8 {d17[1]}, [r0]
45 %tmp1 = load <16 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000046 %tmp2 = load i8* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000047 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
48 ret <16 x i8> %tmp3
49}
50
51define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
52;CHECK: vld1laneQi16:
Bob Wilson665814b2010-11-01 23:40:51 +000053;CHECK: vld1.16 {d17[1]}, [r0, :16]
Bob Wilsonbaf06152010-11-01 23:40:46 +000054 %tmp1 = load <8 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000055 %tmp2 = load i16* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000056 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
57 ret <8 x i16> %tmp3
58}
59
60define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
61;CHECK: vld1laneQi32:
Bob Wilson665814b2010-11-01 23:40:51 +000062;CHECK: vld1.32 {d17[1]}, [r0, :32]
Bob Wilsonbaf06152010-11-01 23:40:46 +000063 %tmp1 = load <4 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000064 %tmp2 = load i32* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000065 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
66 ret <4 x i32> %tmp3
67}
68
Bob Wilson746fa172010-12-10 22:13:32 +000069define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
70;CHECK: vld1laneQf:
71;CHECK: vld1.32 {d16[0]}, [r0]
72 %tmp1 = load <4 x float>* %B
73 %tmp2 = load float* %A
74 %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
75 ret <4 x float> %tmp3
76}
77
Bob Wilsonec1d81c2009-10-06 21:16:19 +000078%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
79%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
80%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
81%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000082
Bob Wilson30aea9d2009-10-08 18:56:10 +000083%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
84%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
85%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
86
Bob Wilsonc0110052009-09-01 04:27:10 +000087define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
88;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000089;Check the alignment value. Max for this instruction is 16 bits:
90;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000091 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000092 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000093 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
94 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000095 %tmp5 = add <8 x i8> %tmp3, %tmp4
96 ret <8 x i8> %tmp5
97}
98
99define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
100;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000101;Check the alignment value. Max for this instruction is 32 bits:
102;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +0000103 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000104 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000105 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000106 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
107 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000108 %tmp5 = add <4 x i16> %tmp3, %tmp4
109 ret <4 x i16> %tmp5
110}
111
112define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
113;CHECK: vld2lanei32:
114;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000115 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000116 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000117 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000118 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
119 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000120 %tmp5 = add <2 x i32> %tmp3, %tmp4
121 ret <2 x i32> %tmp5
122}
123
124define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
125;CHECK: vld2lanef:
126;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000127 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000128 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000129 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000130 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
131 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000132 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000133 ret <2 x float> %tmp5
134}
135
Bob Wilson30aea9d2009-10-08 18:56:10 +0000136define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
137;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000138;Check the (default) alignment.
139;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000140 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000141 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000142 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000143 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
144 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
145 %tmp5 = add <8 x i16> %tmp3, %tmp4
146 ret <8 x i16> %tmp5
147}
148
149define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
150;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000151;Check the alignment value. Max for this instruction is 64 bits:
152;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000153 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000154 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000155 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000156 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
157 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
158 %tmp5 = add <4 x i32> %tmp3, %tmp4
159 ret <4 x i32> %tmp5
160}
161
162define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
163;CHECK: vld2laneQf:
164;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000165 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000166 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000167 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000168 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
169 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000170 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000171 ret <4 x float> %tmp5
172}
173
Bob Wilson7a9ef442010-08-27 17:13:24 +0000174declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
175declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
176declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
177declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000178
Bob Wilson7a9ef442010-08-27 17:13:24 +0000179declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
180declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
181declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000182
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000183%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
184%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
185%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
186%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000187
Bob Wilson0bf7d992009-10-08 22:27:33 +0000188%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
189%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
190%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
191
Bob Wilsonc0110052009-09-01 04:27:10 +0000192define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
193;CHECK: vld3lanei8:
194;CHECK: vld3.8
195 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000196 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000197 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
198 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
199 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000200 %tmp6 = add <8 x i8> %tmp3, %tmp4
201 %tmp7 = add <8 x i8> %tmp5, %tmp6
202 ret <8 x i8> %tmp7
203}
204
205define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
206;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000207;Check the (default) alignment value. VLD3 does not support alignment.
208;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000209 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000210 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000211 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000212 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
213 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
214 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000215 %tmp6 = add <4 x i16> %tmp3, %tmp4
216 %tmp7 = add <4 x i16> %tmp5, %tmp6
217 ret <4 x i16> %tmp7
218}
219
220define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
221;CHECK: vld3lanei32:
222;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000223 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000224 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000225 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000226 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
227 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
228 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000229 %tmp6 = add <2 x i32> %tmp3, %tmp4
230 %tmp7 = add <2 x i32> %tmp5, %tmp6
231 ret <2 x i32> %tmp7
232}
233
234define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
235;CHECK: vld3lanef:
236;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000237 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000238 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000239 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000240 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
241 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
242 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000243 %tmp6 = fadd <2 x float> %tmp3, %tmp4
244 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000245 ret <2 x float> %tmp7
246}
247
Bob Wilson0bf7d992009-10-08 22:27:33 +0000248define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
249;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000250;Check the (default) alignment value. VLD3 does not support alignment.
251;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000252 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000253 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000254 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000255 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
256 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
257 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
258 %tmp6 = add <8 x i16> %tmp3, %tmp4
259 %tmp7 = add <8 x i16> %tmp5, %tmp6
260 ret <8 x i16> %tmp7
261}
262
263define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
264;CHECK: vld3laneQi32:
265;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000266 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000267 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000268 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000269 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
270 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
271 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
272 %tmp6 = add <4 x i32> %tmp3, %tmp4
273 %tmp7 = add <4 x i32> %tmp5, %tmp6
274 ret <4 x i32> %tmp7
275}
276
277define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
278;CHECK: vld3laneQf:
279;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000280 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000281 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000282 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000283 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
284 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
285 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000286 %tmp6 = fadd <4 x float> %tmp3, %tmp4
287 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000288 ret <4 x float> %tmp7
289}
290
Bob Wilson7a9ef442010-08-27 17:13:24 +0000291declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
292declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
293declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
294declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000295
Bob Wilson7a9ef442010-08-27 17:13:24 +0000296declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
297declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
298declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000299
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000300%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
301%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
302%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
303%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000304
Bob Wilson62e053e2009-10-08 22:53:57 +0000305%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
306%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
307%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
308
Bob Wilsonc0110052009-09-01 04:27:10 +0000309define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
310;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000311;Check the alignment value. Max for this instruction is 32 bits:
312;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000313 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000314 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000315 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
316 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
317 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
318 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000319 %tmp7 = add <8 x i8> %tmp3, %tmp4
320 %tmp8 = add <8 x i8> %tmp5, %tmp6
321 %tmp9 = add <8 x i8> %tmp7, %tmp8
322 ret <8 x i8> %tmp9
323}
324
325define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
326;CHECK: vld4lanei16:
Bob Wilsona92bac62010-12-10 19:37:42 +0000327;Check that a power-of-two alignment smaller than the total size of the memory
328;being loaded is ignored.
329;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000330 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000331 %tmp1 = load <4 x i16>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000332 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000333 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
334 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
335 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
336 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000337 %tmp7 = add <4 x i16> %tmp3, %tmp4
338 %tmp8 = add <4 x i16> %tmp5, %tmp6
339 %tmp9 = add <4 x i16> %tmp7, %tmp8
340 ret <4 x i16> %tmp9
341}
342
343define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
344;CHECK: vld4lanei32:
Bob Wilsona92bac62010-12-10 19:37:42 +0000345;Check the alignment value. An 8-byte alignment is allowed here even though
346;it is smaller than the total size of the memory being loaded.
347;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000348 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000349 %tmp1 = load <2 x i32>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000350 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000351 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
352 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
353 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
354 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000355 %tmp7 = add <2 x i32> %tmp3, %tmp4
356 %tmp8 = add <2 x i32> %tmp5, %tmp6
357 %tmp9 = add <2 x i32> %tmp7, %tmp8
358 ret <2 x i32> %tmp9
359}
360
361define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
362;CHECK: vld4lanef:
363;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000364 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000365 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000366 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000367 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
368 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
369 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
370 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000371 %tmp7 = fadd <2 x float> %tmp3, %tmp4
372 %tmp8 = fadd <2 x float> %tmp5, %tmp6
373 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000374 ret <2 x float> %tmp9
375}
376
Bob Wilson62e053e2009-10-08 22:53:57 +0000377define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
378;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000379;Check the alignment value. Max for this instruction is 64 bits:
380;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000381 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000382 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000383 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000384 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
385 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
386 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
387 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
388 %tmp7 = add <8 x i16> %tmp3, %tmp4
389 %tmp8 = add <8 x i16> %tmp5, %tmp6
390 %tmp9 = add <8 x i16> %tmp7, %tmp8
391 ret <8 x i16> %tmp9
392}
393
394define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
395;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000396;Check the (default) alignment.
397;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000398 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000399 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000400 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000401 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
402 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
403 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
404 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
405 %tmp7 = add <4 x i32> %tmp3, %tmp4
406 %tmp8 = add <4 x i32> %tmp5, %tmp6
407 %tmp9 = add <4 x i32> %tmp7, %tmp8
408 ret <4 x i32> %tmp9
409}
410
411define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
412;CHECK: vld4laneQf:
413;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000414 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000415 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000416 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000417 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
418 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
419 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
420 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000421 %tmp7 = fadd <4 x float> %tmp3, %tmp4
422 %tmp8 = fadd <4 x float> %tmp5, %tmp6
423 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000424 ret <4 x float> %tmp9
425}
426
Bob Wilson7a9ef442010-08-27 17:13:24 +0000427declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
428declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
429declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
430declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000431
Bob Wilson7a9ef442010-08-27 17:13:24 +0000432declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
433declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
434declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson495de3b2010-12-17 01:21:12 +0000435
436; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
437; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
438; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low
439; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
440define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
441;CHECK: test_qqqq_regsequence_subreg
442;CHECK: vld3.16
443 %tmp63 = extractvalue [6 x i64] %b, 5
444 %tmp64 = zext i64 %tmp63 to i128
445 %tmp65 = shl i128 %tmp64, 64
446 %ins67 = or i128 %tmp65, 0
447 %tmp78 = bitcast i128 %ins67 to <8 x i16>
448 %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
449 call void @llvm.trap()
450 unreachable
451}
452
453declare void @llvm.trap() nounwind