blob: 97ab399043a529566a85f78a27d8c0263e0527df [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonb796bbb2010-11-01 22:04:05 +00003define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vld1lanei8:
Bob Wilson665814b2010-11-01 23:40:51 +00005;Check the (default) alignment value.
Bob Wilsonb796bbb2010-11-01 22:04:05 +00006;CHECK: vld1.8 {d16[3]}, [r0]
7 %tmp1 = load <8 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +00008 %tmp2 = load i8* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +00009 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
10 ret <8 x i8> %tmp3
11}
12
13define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
14;CHECK: vld1lanei16:
Bob Wilson665814b2010-11-01 23:40:51 +000015;Check the alignment value. Max for this instruction is 16 bits:
16;CHECK: vld1.16 {d16[2]}, [r0, :16]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000017 %tmp1 = load <4 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000018 %tmp2 = load i16* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000019 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
20 ret <4 x i16> %tmp3
21}
22
23define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
24;CHECK: vld1lanei32:
Bob Wilson665814b2010-11-01 23:40:51 +000025;Check the alignment value. Max for this instruction is 16 bits:
26;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000027 %tmp1 = load <2 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000028 %tmp2 = load i32* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000029 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
30 ret <2 x i32> %tmp3
31}
32
Bob Wilsonbaf06152010-11-01 23:40:46 +000033define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
34;CHECK: vld1laneQi8:
35;CHECK: vld1.8 {d17[1]}, [r0]
36 %tmp1 = load <16 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000037 %tmp2 = load i8* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000038 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
39 ret <16 x i8> %tmp3
40}
41
42define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
43;CHECK: vld1laneQi16:
Bob Wilson665814b2010-11-01 23:40:51 +000044;CHECK: vld1.16 {d17[1]}, [r0, :16]
Bob Wilsonbaf06152010-11-01 23:40:46 +000045 %tmp1 = load <8 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000046 %tmp2 = load i16* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000047 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
48 ret <8 x i16> %tmp3
49}
50
51define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
52;CHECK: vld1laneQi32:
Bob Wilson665814b2010-11-01 23:40:51 +000053;CHECK: vld1.32 {d17[1]}, [r0, :32]
Bob Wilsonbaf06152010-11-01 23:40:46 +000054 %tmp1 = load <4 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000055 %tmp2 = load i32* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000056 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
57 ret <4 x i32> %tmp3
58}
59
Bob Wilsonec1d81c2009-10-06 21:16:19 +000060%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
61%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
62%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
63%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000064
Bob Wilson30aea9d2009-10-08 18:56:10 +000065%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
66%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
67%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
68
Bob Wilsonc0110052009-09-01 04:27:10 +000069define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
70;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000071;Check the alignment value. Max for this instruction is 16 bits:
72;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000073 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000074 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000075 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
76 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000077 %tmp5 = add <8 x i8> %tmp3, %tmp4
78 ret <8 x i8> %tmp5
79}
80
81define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
82;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +000083;Check the alignment value. Max for this instruction is 32 bits:
84;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +000085 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000086 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000087 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000088 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
89 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000090 %tmp5 = add <4 x i16> %tmp3, %tmp4
91 ret <4 x i16> %tmp5
92}
93
94define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
95;CHECK: vld2lanei32:
96;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000097 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +000098 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +000099 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000100 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
101 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000102 %tmp5 = add <2 x i32> %tmp3, %tmp4
103 ret <2 x i32> %tmp5
104}
105
106define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
107;CHECK: vld2lanef:
108;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000109 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000110 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000111 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000112 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
113 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000114 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000115 ret <2 x float> %tmp5
116}
117
Bob Wilson30aea9d2009-10-08 18:56:10 +0000118define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
119;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000120;Check the (default) alignment.
121;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000122 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000123 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000124 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000125 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
126 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
127 %tmp5 = add <8 x i16> %tmp3, %tmp4
128 ret <8 x i16> %tmp5
129}
130
131define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
132;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000133;Check the alignment value. Max for this instruction is 64 bits:
134;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000135 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000136 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000137 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000138 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
139 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
140 %tmp5 = add <4 x i32> %tmp3, %tmp4
141 ret <4 x i32> %tmp5
142}
143
144define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
145;CHECK: vld2laneQf:
146;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000147 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000148 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000149 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000150 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
151 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000152 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000153 ret <4 x float> %tmp5
154}
155
Bob Wilson7a9ef442010-08-27 17:13:24 +0000156declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
157declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
158declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
159declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000160
Bob Wilson7a9ef442010-08-27 17:13:24 +0000161declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
162declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
163declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000164
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000165%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
166%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
167%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
168%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000169
Bob Wilson0bf7d992009-10-08 22:27:33 +0000170%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
171%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
172%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
173
Bob Wilsonc0110052009-09-01 04:27:10 +0000174define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
175;CHECK: vld3lanei8:
176;CHECK: vld3.8
177 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000178 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000179 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
180 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
181 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000182 %tmp6 = add <8 x i8> %tmp3, %tmp4
183 %tmp7 = add <8 x i8> %tmp5, %tmp6
184 ret <8 x i8> %tmp7
185}
186
187define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
188;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000189;Check the (default) alignment value. VLD3 does not support alignment.
190;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000191 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000192 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000193 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000194 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
195 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
196 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000197 %tmp6 = add <4 x i16> %tmp3, %tmp4
198 %tmp7 = add <4 x i16> %tmp5, %tmp6
199 ret <4 x i16> %tmp7
200}
201
202define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
203;CHECK: vld3lanei32:
204;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000205 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000206 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000207 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000208 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
209 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
210 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000211 %tmp6 = add <2 x i32> %tmp3, %tmp4
212 %tmp7 = add <2 x i32> %tmp5, %tmp6
213 ret <2 x i32> %tmp7
214}
215
216define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
217;CHECK: vld3lanef:
218;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000219 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000220 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000221 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000222 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
223 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
224 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000225 %tmp6 = fadd <2 x float> %tmp3, %tmp4
226 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000227 ret <2 x float> %tmp7
228}
229
Bob Wilson0bf7d992009-10-08 22:27:33 +0000230define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
231;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000232;Check the (default) alignment value. VLD3 does not support alignment.
233;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000234 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000235 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000236 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000237 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
238 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
239 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
240 %tmp6 = add <8 x i16> %tmp3, %tmp4
241 %tmp7 = add <8 x i16> %tmp5, %tmp6
242 ret <8 x i16> %tmp7
243}
244
245define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
246;CHECK: vld3laneQi32:
247;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000248 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000249 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000250 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000251 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
252 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
253 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
254 %tmp6 = add <4 x i32> %tmp3, %tmp4
255 %tmp7 = add <4 x i32> %tmp5, %tmp6
256 ret <4 x i32> %tmp7
257}
258
259define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
260;CHECK: vld3laneQf:
261;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000262 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000263 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000264 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000265 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
266 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
267 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000268 %tmp6 = fadd <4 x float> %tmp3, %tmp4
269 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000270 ret <4 x float> %tmp7
271}
272
Bob Wilson7a9ef442010-08-27 17:13:24 +0000273declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
274declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
275declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
276declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000277
Bob Wilson7a9ef442010-08-27 17:13:24 +0000278declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
279declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
280declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000281
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000282%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
283%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
284%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
285%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000286
Bob Wilson62e053e2009-10-08 22:53:57 +0000287%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
288%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
289%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
290
Bob Wilsonc0110052009-09-01 04:27:10 +0000291define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
292;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000293;Check the alignment value. Max for this instruction is 32 bits:
294;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000295 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000296 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000297 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
298 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
299 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
300 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000301 %tmp7 = add <8 x i8> %tmp3, %tmp4
302 %tmp8 = add <8 x i8> %tmp5, %tmp6
303 %tmp9 = add <8 x i8> %tmp7, %tmp8
304 ret <8 x i8> %tmp9
305}
306
307define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
308;CHECK: vld4lanei16:
309;CHECK: vld4.16
Bob Wilson02170c02010-04-20 00:17:16 +0000310 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000311 %tmp1 = load <4 x i16>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000312 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000313 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
314 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
315 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
316 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000317 %tmp7 = add <4 x i16> %tmp3, %tmp4
318 %tmp8 = add <4 x i16> %tmp5, %tmp6
319 %tmp9 = add <4 x i16> %tmp7, %tmp8
320 ret <4 x i16> %tmp9
321}
322
323define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
324;CHECK: vld4lanei32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000325;Check the alignment value. Max for this instruction is 128 bits:
326;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000327 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000328 %tmp1 = load <2 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000329 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000330 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
331 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
332 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
333 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000334 %tmp7 = add <2 x i32> %tmp3, %tmp4
335 %tmp8 = add <2 x i32> %tmp5, %tmp6
336 %tmp9 = add <2 x i32> %tmp7, %tmp8
337 ret <2 x i32> %tmp9
338}
339
340define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
341;CHECK: vld4lanef:
342;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000343 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000344 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000345 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000346 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
347 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
348 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
349 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000350 %tmp7 = fadd <2 x float> %tmp3, %tmp4
351 %tmp8 = fadd <2 x float> %tmp5, %tmp6
352 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000353 ret <2 x float> %tmp9
354}
355
Bob Wilson62e053e2009-10-08 22:53:57 +0000356define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
357;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000358;Check the alignment value. Max for this instruction is 64 bits:
359;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000360 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000361 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000362 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000363 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
364 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
365 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
366 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
367 %tmp7 = add <8 x i16> %tmp3, %tmp4
368 %tmp8 = add <8 x i16> %tmp5, %tmp6
369 %tmp9 = add <8 x i16> %tmp7, %tmp8
370 ret <8 x i16> %tmp9
371}
372
373define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
374;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000375;Check the (default) alignment.
376;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000377 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000378 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000379 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000380 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
381 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
382 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
383 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
384 %tmp7 = add <4 x i32> %tmp3, %tmp4
385 %tmp8 = add <4 x i32> %tmp5, %tmp6
386 %tmp9 = add <4 x i32> %tmp7, %tmp8
387 ret <4 x i32> %tmp9
388}
389
390define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
391;CHECK: vld4laneQf:
392;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000393 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000394 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000395 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000396 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
397 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
398 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
399 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000400 %tmp7 = fadd <4 x float> %tmp3, %tmp4
401 %tmp8 = fadd <4 x float> %tmp5, %tmp6
402 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000403 ret <4 x float> %tmp9
404}
405
Bob Wilson7a9ef442010-08-27 17:13:24 +0000406declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
407declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
408declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
409declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000410
Bob Wilson7a9ef442010-08-27 17:13:24 +0000411declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
412declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
413declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly