blob: 29b379465db55a950d73315c3b57058abcb6de44 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson9b600c62009-08-06 00:38:31 +00002
Bob Wilsonec1d81c2009-10-06 21:16:19 +00003%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
4%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
5%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
6%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
Bob Wilsona4288082009-10-07 22:57:01 +00007%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
Bob Wilson9b600c62009-08-06 00:38:31 +00008
Bob Wilson967f8752009-10-07 17:19:13 +00009%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> }
10%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
11%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
12%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
13
Bob Wilson9b600c62009-08-06 00:38:31 +000014define <8 x i8> @vld2i8(i8* %A) nounwind {
15;CHECK: vld2i8:
Bob Wilson40ff01a2010-09-23 21:43:54 +000016;Check the alignment value. Max for this instruction is 128 bits:
Bob Wilson7d247052010-10-08 06:15:13 +000017;CHECK: vld2.8 {d16, d17}, [r0, :64]
Bob Wilson40ff01a2010-09-23 21:43:54 +000018 %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000019 %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
20 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
Bob Wilson9b600c62009-08-06 00:38:31 +000021 %tmp4 = add <8 x i8> %tmp2, %tmp3
22 ret <8 x i8> %tmp4
23}
24
25define <4 x i16> @vld2i16(i16* %A) nounwind {
26;CHECK: vld2i16:
Bob Wilson40ff01a2010-09-23 21:43:54 +000027;Check the alignment value. Max for this instruction is 128 bits:
Bob Wilson7d247052010-10-08 06:15:13 +000028;CHECK: vld2.16 {d16, d17}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +000029 %tmp0 = bitcast i16* %A to i8*
Bob Wilson40ff01a2010-09-23 21:43:54 +000030 %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000031 %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
32 %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
Bob Wilson9b600c62009-08-06 00:38:31 +000033 %tmp4 = add <4 x i16> %tmp2, %tmp3
34 ret <4 x i16> %tmp4
35}
36
37define <2 x i32> @vld2i32(i32* %A) nounwind {
38;CHECK: vld2i32:
39;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000040 %tmp0 = bitcast i32* %A to i8*
Bob Wilson7a9ef442010-08-27 17:13:24 +000041 %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000042 %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
43 %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
Bob Wilson9b600c62009-08-06 00:38:31 +000044 %tmp4 = add <2 x i32> %tmp2, %tmp3
45 ret <2 x i32> %tmp4
46}
47
48define <2 x float> @vld2f(float* %A) nounwind {
49;CHECK: vld2f:
50;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +000051 %tmp0 = bitcast float* %A to i8*
Bob Wilson7a9ef442010-08-27 17:13:24 +000052 %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
Bob Wilsonec1d81c2009-10-06 21:16:19 +000053 %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
54 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
Dan Gohmand4d01152010-05-03 22:36:46 +000055 %tmp4 = fadd <2 x float> %tmp2, %tmp3
Bob Wilson9b600c62009-08-06 00:38:31 +000056 ret <2 x float> %tmp4
57}
58
Bob Wilson1c3ef902011-02-07 17:43:21 +000059;Check for a post-increment updating load.
60define <2 x float> @vld2f_update(float** %ptr) nounwind {
61;CHECK: vld2f_update:
62;CHECK: vld2.32 {d16, d17}, [r1]!
63 %A = load float** %ptr
64 %tmp0 = bitcast float* %A to i8*
65 %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
66 %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
67 %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
68 %tmp4 = fadd <2 x float> %tmp2, %tmp3
69 %tmp5 = getelementptr float* %A, i32 4
70 store float* %tmp5, float** %ptr
71 ret <2 x float> %tmp4
72}
73
Bob Wilsona4288082009-10-07 22:57:01 +000074define <1 x i64> @vld2i64(i64* %A) nounwind {
75;CHECK: vld2i64:
Bob Wilson40ff01a2010-09-23 21:43:54 +000076;Check the alignment value. Max for this instruction is 128 bits:
Bob Wilson7d247052010-10-08 06:15:13 +000077;CHECK: vld1.64 {d16, d17}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +000078 %tmp0 = bitcast i64* %A to i8*
Bob Wilson40ff01a2010-09-23 21:43:54 +000079 %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
Bob Wilsona4288082009-10-07 22:57:01 +000080 %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
81 %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
82 %tmp4 = add <1 x i64> %tmp2, %tmp3
83 ret <1 x i64> %tmp4
84}
85
Bob Wilson967f8752009-10-07 17:19:13 +000086define <16 x i8> @vld2Qi8(i8* %A) nounwind {
87;CHECK: vld2Qi8:
Bob Wilson40ff01a2010-09-23 21:43:54 +000088;Check the alignment value. Max for this instruction is 256 bits:
Bob Wilson7d247052010-10-08 06:15:13 +000089;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
Bob Wilson40ff01a2010-09-23 21:43:54 +000090 %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
Bob Wilson967f8752009-10-07 17:19:13 +000091 %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
92 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
93 %tmp4 = add <16 x i8> %tmp2, %tmp3
94 ret <16 x i8> %tmp4
95}
96
Bob Wilson1c3ef902011-02-07 17:43:21 +000097;Check for a post-increment updating load with register increment.
98define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
99;CHECK: vld2Qi8_update:
100;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
101 %A = load i8** %ptr
102 %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
103 %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
104 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
105 %tmp4 = add <16 x i8> %tmp2, %tmp3
106 %tmp5 = getelementptr i8* %A, i32 %inc
107 store i8* %tmp5, i8** %ptr
108 ret <16 x i8> %tmp4
109}
110
Bob Wilson967f8752009-10-07 17:19:13 +0000111define <8 x i16> @vld2Qi16(i16* %A) nounwind {
112;CHECK: vld2Qi16:
Bob Wilson40ff01a2010-09-23 21:43:54 +0000113;Check the alignment value. Max for this instruction is 256 bits:
Bob Wilson7d247052010-10-08 06:15:13 +0000114;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
Bob Wilson02170c02010-04-20 00:17:16 +0000115 %tmp0 = bitcast i16* %A to i8*
Bob Wilson40ff01a2010-09-23 21:43:54 +0000116 %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
Bob Wilson967f8752009-10-07 17:19:13 +0000117 %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
118 %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
119 %tmp4 = add <8 x i16> %tmp2, %tmp3
120 ret <8 x i16> %tmp4
121}
122
123define <4 x i32> @vld2Qi32(i32* %A) nounwind {
124;CHECK: vld2Qi32:
Bob Wilson40ff01a2010-09-23 21:43:54 +0000125;Check the alignment value. Max for this instruction is 256 bits:
Bob Wilson7d247052010-10-08 06:15:13 +0000126;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
Bob Wilson02170c02010-04-20 00:17:16 +0000127 %tmp0 = bitcast i32* %A to i8*
Bob Wilson40ff01a2010-09-23 21:43:54 +0000128 %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
Bob Wilson967f8752009-10-07 17:19:13 +0000129 %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
130 %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
131 %tmp4 = add <4 x i32> %tmp2, %tmp3
132 ret <4 x i32> %tmp4
133}
134
135define <4 x float> @vld2Qf(float* %A) nounwind {
136;CHECK: vld2Qf:
137;CHECK: vld2.32
Bob Wilson02170c02010-04-20 00:17:16 +0000138 %tmp0 = bitcast float* %A to i8*
Bob Wilson7a9ef442010-08-27 17:13:24 +0000139 %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
Bob Wilson967f8752009-10-07 17:19:13 +0000140 %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
141 %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
Dan Gohmand4d01152010-05-03 22:36:46 +0000142 %tmp4 = fadd <4 x float> %tmp2, %tmp3
Bob Wilson967f8752009-10-07 17:19:13 +0000143 ret <4 x float> %tmp4
144}
145
Bob Wilson7a9ef442010-08-27 17:13:24 +0000146declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
147declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
148declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
149declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
150declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
Bob Wilson967f8752009-10-07 17:19:13 +0000151
Bob Wilson7a9ef442010-08-27 17:13:24 +0000152declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
153declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
154declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
155declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly