blob: 770ed071ac1265fc4bf4e8d0dc8fce401ba729e8 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsonc0110052009-09-01 04:27:10 +00002
Bob Wilsonb796bbb2010-11-01 22:04:05 +00003define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
4;CHECK: vld1lanei8:
Bob Wilson665814b2010-11-01 23:40:51 +00005;Check the (default) alignment value.
Bob Wilsonb796bbb2010-11-01 22:04:05 +00006;CHECK: vld1.8 {d16[3]}, [r0]
7 %tmp1 = load <8 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +00008 %tmp2 = load i8* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +00009 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
10 ret <8 x i8> %tmp3
11}
12
13define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
14;CHECK: vld1lanei16:
Bob Wilson665814b2010-11-01 23:40:51 +000015;Check the alignment value. Max for this instruction is 16 bits:
16;CHECK: vld1.16 {d16[2]}, [r0, :16]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000017 %tmp1 = load <4 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000018 %tmp2 = load i16* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000019 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
20 ret <4 x i16> %tmp3
21}
22
23define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
24;CHECK: vld1lanei32:
Bob Wilsond168cef2010-11-03 16:24:53 +000025;Check the alignment value. Max for this instruction is 32 bits:
Bob Wilson665814b2010-11-01 23:40:51 +000026;CHECK: vld1.32 {d16[1]}, [r0, :32]
Bob Wilsonb796bbb2010-11-01 22:04:05 +000027 %tmp1 = load <2 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000028 %tmp2 = load i32* %A, align 8
Bob Wilsonb796bbb2010-11-01 22:04:05 +000029 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
30 ret <2 x i32> %tmp3
31}
32
Bob Wilson746fa172010-12-10 22:13:32 +000033define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
34;CHECK: vld1lanef:
35;CHECK: vld1.32 {d16[1]}, [r0]
36 %tmp1 = load <2 x float>* %B
37 %tmp2 = load float* %A, align 4
38 %tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
39 ret <2 x float> %tmp3
40}
41
Bob Wilsonbaf06152010-11-01 23:40:46 +000042define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
43;CHECK: vld1laneQi8:
44;CHECK: vld1.8 {d17[1]}, [r0]
45 %tmp1 = load <16 x i8>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000046 %tmp2 = load i8* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000047 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
48 ret <16 x i8> %tmp3
49}
50
51define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
52;CHECK: vld1laneQi16:
Bob Wilson665814b2010-11-01 23:40:51 +000053;CHECK: vld1.16 {d17[1]}, [r0, :16]
Bob Wilsonbaf06152010-11-01 23:40:46 +000054 %tmp1 = load <8 x i16>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000055 %tmp2 = load i16* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000056 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
57 ret <8 x i16> %tmp3
58}
59
60define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
61;CHECK: vld1laneQi32:
Bob Wilson665814b2010-11-01 23:40:51 +000062;CHECK: vld1.32 {d17[1]}, [r0, :32]
Bob Wilsonbaf06152010-11-01 23:40:46 +000063 %tmp1 = load <4 x i32>* %B
Bob Wilson665814b2010-11-01 23:40:51 +000064 %tmp2 = load i32* %A, align 8
Bob Wilsonbaf06152010-11-01 23:40:46 +000065 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
66 ret <4 x i32> %tmp3
67}
68
Bob Wilson746fa172010-12-10 22:13:32 +000069define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
70;CHECK: vld1laneQf:
71;CHECK: vld1.32 {d16[0]}, [r0]
72 %tmp1 = load <4 x float>* %B
73 %tmp2 = load float* %A
74 %tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
75 ret <4 x float> %tmp3
76}
77
Bob Wilsonec1d81c2009-10-06 21:16:19 +000078%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
79%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
80%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
81%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +000082
Bob Wilson30aea9d2009-10-08 18:56:10 +000083%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
84%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
85%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
86
Bob Wilsonc0110052009-09-01 04:27:10 +000087define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
88;CHECK: vld2lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +000089;Check the alignment value. Max for this instruction is 16 bits:
90;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
Bob Wilsonc0110052009-09-01 04:27:10 +000091 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +000092 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000093 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
94 %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +000095 %tmp5 = add <8 x i8> %tmp3, %tmp4
96 ret <8 x i8> %tmp5
97}
98
99define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
100;CHECK: vld2lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000101;Check the alignment value. Max for this instruction is 32 bits:
102;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
Bob Wilson02170c02010-04-20 00:17:16 +0000103 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000104 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000105 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000106 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
107 %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000108 %tmp5 = add <4 x i16> %tmp3, %tmp4
109 ret <4 x i16> %tmp5
110}
111
112define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
113;CHECK: vld2lanei32:
114;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000115 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000116 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000117 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000118 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
119 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
Bob Wilsonc0110052009-09-01 04:27:10 +0000120 %tmp5 = add <2 x i32> %tmp3, %tmp4
121 ret <2 x i32> %tmp5
122}
123
Bob Wilson1c3ef902011-02-07 17:43:21 +0000124;Check for a post-increment updating load.
125define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
126;CHECK: vld2lanei32_update:
127;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
128 %A = load i32** %ptr
129 %tmp0 = bitcast i32* %A to i8*
130 %tmp1 = load <2 x i32>* %B
131 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
132 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
133 %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
134 %tmp5 = add <2 x i32> %tmp3, %tmp4
135 %tmp6 = getelementptr i32* %A, i32 2
136 store i32* %tmp6, i32** %ptr
137 ret <2 x i32> %tmp5
138}
139
Bob Wilsonc0110052009-09-01 04:27:10 +0000140define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
141;CHECK: vld2lanef:
142;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000143 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000144 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000145 %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000146 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
147 %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000148 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsonc0110052009-09-01 04:27:10 +0000149 ret <2 x float> %tmp5
150}
151
Bob Wilson30aea9d2009-10-08 18:56:10 +0000152define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
153;CHECK: vld2laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000154;Check the (default) alignment.
155;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000156 %tmp0 = bitcast i16* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000157 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000158 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000159 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
160 %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
161 %tmp5 = add <8 x i16> %tmp3, %tmp4
162 ret <8 x i16> %tmp5
163}
164
165define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
166;CHECK: vld2laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000167;Check the alignment value. Max for this instruction is 64 bits:
168;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000169 %tmp0 = bitcast i32* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000170 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000171 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000172 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
173 %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
174 %tmp5 = add <4 x i32> %tmp3, %tmp4
175 ret <4 x i32> %tmp5
176}
177
178define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
179;CHECK: vld2laneQf:
180;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000181 %tmp0 = bitcast float* %A to i8*
Bob Wilson30aea9d2009-10-08 18:56:10 +0000182 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000183 %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson30aea9d2009-10-08 18:56:10 +0000184 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
185 %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000186 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilson30aea9d2009-10-08 18:56:10 +0000187 ret <4 x float> %tmp5
188}
189
Bob Wilson7a9ef442010-08-27 17:13:24 +0000190declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
191declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
192declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
193declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000194
Bob Wilson7a9ef442010-08-27 17:13:24 +0000195declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
196declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
197declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson30aea9d2009-10-08 18:56:10 +0000198
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000199%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
200%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
201%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
202%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000203
Bob Wilson0bf7d992009-10-08 22:27:33 +0000204%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
205%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
206%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
207
Bob Wilsonc0110052009-09-01 04:27:10 +0000208define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
209;CHECK: vld3lanei8:
210;CHECK: vld3.8
211 %tmp1 = load <8 x i8>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000212 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000213 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
214 %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
215 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000216 %tmp6 = add <8 x i8> %tmp3, %tmp4
217 %tmp7 = add <8 x i8> %tmp5, %tmp6
218 ret <8 x i8> %tmp7
219}
220
221define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
222;CHECK: vld3lanei16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000223;Check the (default) alignment value. VLD3 does not support alignment.
224;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000225 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000226 %tmp1 = load <4 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000227 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000228 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
229 %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
230 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000231 %tmp6 = add <4 x i16> %tmp3, %tmp4
232 %tmp7 = add <4 x i16> %tmp5, %tmp6
233 ret <4 x i16> %tmp7
234}
235
236define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
237;CHECK: vld3lanei32:
238;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000239 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000240 %tmp1 = load <2 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000241 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000242 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
243 %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
244 %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
Bob Wilsonc0110052009-09-01 04:27:10 +0000245 %tmp6 = add <2 x i32> %tmp3, %tmp4
246 %tmp7 = add <2 x i32> %tmp5, %tmp6
247 ret <2 x i32> %tmp7
248}
249
250define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
251;CHECK: vld3lanef:
252;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000253 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000254 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000255 %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000256 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
257 %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
258 %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000259 %tmp6 = fadd <2 x float> %tmp3, %tmp4
260 %tmp7 = fadd <2 x float> %tmp5, %tmp6
Bob Wilsonc0110052009-09-01 04:27:10 +0000261 ret <2 x float> %tmp7
262}
263
Bob Wilson0bf7d992009-10-08 22:27:33 +0000264define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
265;CHECK: vld3laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000266;Check the (default) alignment value. VLD3 does not support alignment.
267;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000268 %tmp0 = bitcast i16* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000269 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000270 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000271 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
272 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
273 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
274 %tmp6 = add <8 x i16> %tmp3, %tmp4
275 %tmp7 = add <8 x i16> %tmp5, %tmp6
276 ret <8 x i16> %tmp7
277}
278
Bob Wilson1c3ef902011-02-07 17:43:21 +0000279;Check for a post-increment updating load with register increment.
280define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
281;CHECK: vld3laneQi16_update:
282;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
283 %A = load i16** %ptr
284 %tmp0 = bitcast i16* %A to i8*
285 %tmp1 = load <8 x i16>* %B
286 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
287 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
288 %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
289 %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
290 %tmp6 = add <8 x i16> %tmp3, %tmp4
291 %tmp7 = add <8 x i16> %tmp5, %tmp6
292 %tmp8 = getelementptr i16* %A, i32 %inc
293 store i16* %tmp8, i16** %ptr
294 ret <8 x i16> %tmp7
295}
296
Bob Wilson0bf7d992009-10-08 22:27:33 +0000297define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
298;CHECK: vld3laneQi32:
299;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000300 %tmp0 = bitcast i32* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000301 %tmp1 = load <4 x i32>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000302 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000303 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
304 %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
305 %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
306 %tmp6 = add <4 x i32> %tmp3, %tmp4
307 %tmp7 = add <4 x i32> %tmp5, %tmp6
308 ret <4 x i32> %tmp7
309}
310
311define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
312;CHECK: vld3laneQf:
313;CHECK: vld3.32
Bob Wilson02170c02010-04-20 00:17:16 +0000314 %tmp0 = bitcast float* %A to i8*
Bob Wilson0bf7d992009-10-08 22:27:33 +0000315 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000316 %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson0bf7d992009-10-08 22:27:33 +0000317 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
318 %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
319 %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
Dan Gohmand4d01152010-05-03 22:36:46 +0000320 %tmp6 = fadd <4 x float> %tmp3, %tmp4
321 %tmp7 = fadd <4 x float> %tmp5, %tmp6
Bob Wilson0bf7d992009-10-08 22:27:33 +0000322 ret <4 x float> %tmp7
323}
324
Bob Wilson7a9ef442010-08-27 17:13:24 +0000325declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
326declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
327declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
328declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilsonc0110052009-09-01 04:27:10 +0000329
Bob Wilson7a9ef442010-08-27 17:13:24 +0000330declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
331declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
332declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson0bf7d992009-10-08 22:27:33 +0000333
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000334%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
335%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
336%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
337%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
Bob Wilsonc0110052009-09-01 04:27:10 +0000338
Bob Wilson62e053e2009-10-08 22:53:57 +0000339%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
340%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
341%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
342
Bob Wilsonc0110052009-09-01 04:27:10 +0000343define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
344;CHECK: vld4lanei8:
Bob Wilson3454ed92010-10-19 00:16:32 +0000345;Check the alignment value. Max for this instruction is 32 bits:
346;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
Bob Wilsonc0110052009-09-01 04:27:10 +0000347 %tmp1 = load <8 x i8>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000348 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000349 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
350 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
351 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
352 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000353 %tmp7 = add <8 x i8> %tmp3, %tmp4
354 %tmp8 = add <8 x i8> %tmp5, %tmp6
355 %tmp9 = add <8 x i8> %tmp7, %tmp8
356 ret <8 x i8> %tmp9
357}
358
Bob Wilson1c3ef902011-02-07 17:43:21 +0000359;Check for a post-increment updating load.
360define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
361;CHECK: vld4lanei8_update:
362;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
363 %A = load i8** %ptr
364 %tmp1 = load <8 x i8>* %B
365 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
366 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
367 %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
368 %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
369 %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
370 %tmp7 = add <8 x i8> %tmp3, %tmp4
371 %tmp8 = add <8 x i8> %tmp5, %tmp6
372 %tmp9 = add <8 x i8> %tmp7, %tmp8
373 %tmp10 = getelementptr i8* %A, i32 4
374 store i8* %tmp10, i8** %ptr
375 ret <8 x i8> %tmp9
376}
377
Bob Wilsonc0110052009-09-01 04:27:10 +0000378define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
379;CHECK: vld4lanei16:
Bob Wilsona92bac62010-12-10 19:37:42 +0000380;Check that a power-of-two alignment smaller than the total size of the memory
381;being loaded is ignored.
382;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000383 %tmp0 = bitcast i16* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000384 %tmp1 = load <4 x i16>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000385 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000386 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
387 %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
388 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
389 %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000390 %tmp7 = add <4 x i16> %tmp3, %tmp4
391 %tmp8 = add <4 x i16> %tmp5, %tmp6
392 %tmp9 = add <4 x i16> %tmp7, %tmp8
393 ret <4 x i16> %tmp9
394}
395
396define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
397;CHECK: vld4lanei32:
Bob Wilsona92bac62010-12-10 19:37:42 +0000398;Check the alignment value. An 8-byte alignment is allowed here even though
399;it is smaller than the total size of the memory being loaded.
400;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000401 %tmp0 = bitcast i32* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000402 %tmp1 = load <2 x i32>* %B
Bob Wilsona92bac62010-12-10 19:37:42 +0000403 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000404 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
405 %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
406 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
407 %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
Bob Wilsonc0110052009-09-01 04:27:10 +0000408 %tmp7 = add <2 x i32> %tmp3, %tmp4
409 %tmp8 = add <2 x i32> %tmp5, %tmp6
410 %tmp9 = add <2 x i32> %tmp7, %tmp8
411 ret <2 x i32> %tmp9
412}
413
414define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
415;CHECK: vld4lanef:
416;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000417 %tmp0 = bitcast float* %A to i8*
Bob Wilsonc0110052009-09-01 04:27:10 +0000418 %tmp1 = load <2 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000419 %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +0000420 %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
421 %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
422 %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
423 %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000424 %tmp7 = fadd <2 x float> %tmp3, %tmp4
425 %tmp8 = fadd <2 x float> %tmp5, %tmp6
426 %tmp9 = fadd <2 x float> %tmp7, %tmp8
Bob Wilsonc0110052009-09-01 04:27:10 +0000427 ret <2 x float> %tmp9
428}
429
Bob Wilson62e053e2009-10-08 22:53:57 +0000430define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
431;CHECK: vld4laneQi16:
Bob Wilson3454ed92010-10-19 00:16:32 +0000432;Check the alignment value. Max for this instruction is 64 bits:
433;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
Bob Wilson02170c02010-04-20 00:17:16 +0000434 %tmp0 = bitcast i16* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000435 %tmp1 = load <8 x i16>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000436 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
Bob Wilson62e053e2009-10-08 22:53:57 +0000437 %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
438 %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
439 %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
440 %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
441 %tmp7 = add <8 x i16> %tmp3, %tmp4
442 %tmp8 = add <8 x i16> %tmp5, %tmp6
443 %tmp9 = add <8 x i16> %tmp7, %tmp8
444 ret <8 x i16> %tmp9
445}
446
447define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
448;CHECK: vld4laneQi32:
Bob Wilson3454ed92010-10-19 00:16:32 +0000449;Check the (default) alignment.
450;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
Bob Wilson02170c02010-04-20 00:17:16 +0000451 %tmp0 = bitcast i32* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000452 %tmp1 = load <4 x i32>* %B
Bob Wilson3454ed92010-10-19 00:16:32 +0000453 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000454 %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
455 %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
456 %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
457 %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
458 %tmp7 = add <4 x i32> %tmp3, %tmp4
459 %tmp8 = add <4 x i32> %tmp5, %tmp6
460 %tmp9 = add <4 x i32> %tmp7, %tmp8
461 ret <4 x i32> %tmp9
462}
463
464define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
465;CHECK: vld4laneQf:
466;CHECK: vld4.32
Bob Wilson02170c02010-04-20 00:17:16 +0000467 %tmp0 = bitcast float* %A to i8*
Bob Wilson62e053e2009-10-08 22:53:57 +0000468 %tmp1 = load <4 x float>* %B
Bob Wilson7a9ef442010-08-27 17:13:24 +0000469 %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
Bob Wilson62e053e2009-10-08 22:53:57 +0000470 %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
471 %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
472 %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
473 %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
Dan Gohmand4d01152010-05-03 22:36:46 +0000474 %tmp7 = fadd <4 x float> %tmp3, %tmp4
475 %tmp8 = fadd <4 x float> %tmp5, %tmp6
476 %tmp9 = fadd <4 x float> %tmp7, %tmp8
Bob Wilson62e053e2009-10-08 22:53:57 +0000477 ret <4 x float> %tmp9
478}
479
Bob Wilson7a9ef442010-08-27 17:13:24 +0000480declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
481declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
482declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
483declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
Bob Wilson62e053e2009-10-08 22:53:57 +0000484
Bob Wilson7a9ef442010-08-27 17:13:24 +0000485declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
486declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
487declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
Bob Wilson495de3b2010-12-17 01:21:12 +0000488
489; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
490; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
491; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low
492; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
493define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
494;CHECK: test_qqqq_regsequence_subreg
495;CHECK: vld3.16
496 %tmp63 = extractvalue [6 x i64] %b, 5
497 %tmp64 = zext i64 %tmp63 to i128
498 %tmp65 = shl i128 %tmp64, 64
499 %ins67 = or i128 %tmp65, 0
500 %tmp78 = bitcast i128 %ins67 to <8 x i16>
501 %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
502 call void @llvm.trap()
503 unreachable
504}
505
506declare void @llvm.trap() nounwind