blob: 01bf1a4a97ffb4bef2dc5c7b92b9d24b543558dd [file] [log] [blame]
Bob Wilson35b61732009-10-09 20:20:54 +00001; RUN: llc -mattr=+neon < %s | FileCheck %s
2target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
3target triple = "thumbv7-elf"
4
5define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +00006;CHECK-LABEL: vqdmulhs16:
Bob Wilson35b61732009-10-09 20:20:54 +00007;CHECK: vqdmulh.s16
8 %tmp1 = load <4 x i16>* %A
9 %tmp2 = load <4 x i16>* %B
10 %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
11 ret <4 x i16> %tmp3
12}
13
14define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +000015;CHECK-LABEL: vqdmulhs32:
Bob Wilson35b61732009-10-09 20:20:54 +000016;CHECK: vqdmulh.s32
17 %tmp1 = load <2 x i32>* %A
18 %tmp2 = load <2 x i32>* %B
19 %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
20 ret <2 x i32> %tmp3
21}
22
23define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +000024;CHECK-LABEL: vqdmulhQs16:
Bob Wilson35b61732009-10-09 20:20:54 +000025;CHECK: vqdmulh.s16
26 %tmp1 = load <8 x i16>* %A
27 %tmp2 = load <8 x i16>* %B
28 %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
29 ret <8 x i16> %tmp3
30}
31
32define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +000033;CHECK-LABEL: vqdmulhQs32:
Bob Wilson35b61732009-10-09 20:20:54 +000034;CHECK: vqdmulh.s32
35 %tmp1 = load <4 x i32>* %A
36 %tmp2 = load <4 x i32>* %B
37 %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
38 ret <4 x i32> %tmp3
39}
40
41define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
42entry:
43; CHECK: test_vqdmulhQ_lanes16
44; CHECK: vqdmulh.s16 q0, q0, d2[1]
45 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
46 %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
47 ret <8 x i16> %1
48}
49
50define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
51entry:
52; CHECK: test_vqdmulhQ_lanes32
53; CHECK: vqdmulh.s32 q0, q0, d2[1]
54 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
55 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
56 ret <4 x i32> %1
57}
58
59define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
60entry:
61; CHECK: test_vqdmulh_lanes16
62; CHECK: vqdmulh.s16 d0, d0, d1[1]
63 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
64 %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
65 ret <4 x i16> %1
66}
67
68define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
69entry:
70; CHECK: test_vqdmulh_lanes32
71; CHECK: vqdmulh.s32 d0, d0, d1[1]
72 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
73 %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
74 ret <2 x i32> %1
75}
76
77declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
78declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
79
80declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
81declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
82
83define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +000084;CHECK-LABEL: vqrdmulhs16:
Bob Wilson35b61732009-10-09 20:20:54 +000085;CHECK: vqrdmulh.s16
86 %tmp1 = load <4 x i16>* %A
87 %tmp2 = load <4 x i16>* %B
88 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
89 ret <4 x i16> %tmp3
90}
91
92define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +000093;CHECK-LABEL: vqrdmulhs32:
Bob Wilson35b61732009-10-09 20:20:54 +000094;CHECK: vqrdmulh.s32
95 %tmp1 = load <2 x i32>* %A
96 %tmp2 = load <2 x i32>* %B
97 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
98 ret <2 x i32> %tmp3
99}
100
101define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000102;CHECK-LABEL: vqrdmulhQs16:
Bob Wilson35b61732009-10-09 20:20:54 +0000103;CHECK: vqrdmulh.s16
104 %tmp1 = load <8 x i16>* %A
105 %tmp2 = load <8 x i16>* %B
106 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
107 ret <8 x i16> %tmp3
108}
109
110define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000111;CHECK-LABEL: vqrdmulhQs32:
Bob Wilson35b61732009-10-09 20:20:54 +0000112;CHECK: vqrdmulh.s32
113 %tmp1 = load <4 x i32>* %A
114 %tmp2 = load <4 x i32>* %B
115 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
116 ret <4 x i32> %tmp3
117}
118
119define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
120entry:
121; CHECK: test_vqRdmulhQ_lanes16
122; CHECK: vqrdmulh.s16 q0, q0, d2[1]
123 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
124 %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
125 ret <8 x i16> %1
126}
127
128define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
129entry:
130; CHECK: test_vqRdmulhQ_lanes32
131; CHECK: vqrdmulh.s32 q0, q0, d2[1]
132 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
133 %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
134 ret <4 x i32> %1
135}
136
137define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
138entry:
139; CHECK: test_vqRdmulh_lanes16
140; CHECK: vqrdmulh.s16 d0, d0, d1[1]
141 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
142 %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
143 ret <4 x i16> %1
144}
145
146define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
147entry:
148; CHECK: test_vqRdmulh_lanes32
149; CHECK: vqrdmulh.s32 d0, d0, d1[1]
150 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
151 %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
152 ret <2 x i32> %1
153}
154
Bob Wilson35b61732009-10-09 20:20:54 +0000155declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
156declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
157
158declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
159declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
160
161define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000162;CHECK-LABEL: vqdmulls16:
Bob Wilson35b61732009-10-09 20:20:54 +0000163;CHECK: vqdmull.s16
164 %tmp1 = load <4 x i16>* %A
165 %tmp2 = load <4 x i16>* %B
166 %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
167 ret <4 x i32> %tmp3
168}
169
170define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000171;CHECK-LABEL: vqdmulls32:
Bob Wilson35b61732009-10-09 20:20:54 +0000172;CHECK: vqdmull.s32
173 %tmp1 = load <2 x i32>* %A
174 %tmp2 = load <2 x i32>* %B
175 %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
176 ret <2 x i64> %tmp3
177}
178
179define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
180entry:
181; CHECK: test_vqdmull_lanes16
182; CHECK: vqdmull.s16 q0, d0, d1[1]
183 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
184 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
185 ret <4 x i32> %1
186}
187
188define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
189entry:
190; CHECK: test_vqdmull_lanes32
191; CHECK: vqdmull.s32 q0, d0, d1[1]
192 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
193 %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
194 ret <2 x i64> %1
195}
196
197declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
198declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
199
200define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000201;CHECK-LABEL: vqdmlals16:
Bob Wilson35b61732009-10-09 20:20:54 +0000202;CHECK: vqdmlal.s16
203 %tmp1 = load <4 x i32>* %A
204 %tmp2 = load <4 x i16>* %B
205 %tmp3 = load <4 x i16>* %C
206 %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
207 ret <4 x i32> %tmp4
208}
209
210define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000211;CHECK-LABEL: vqdmlals32:
Bob Wilson35b61732009-10-09 20:20:54 +0000212;CHECK: vqdmlal.s32
213 %tmp1 = load <2 x i64>* %A
214 %tmp2 = load <2 x i32>* %B
215 %tmp3 = load <2 x i32>* %C
216 %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
217 ret <2 x i64> %tmp4
218}
219
220define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
221entry:
222; CHECK: test_vqdmlal_lanes16
223; CHECK: vqdmlal.s16 q0, d2, d3[1]
224 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
225 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
226 ret <4 x i32> %1
227}
228
229define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
230entry:
231; CHECK: test_vqdmlal_lanes32
232; CHECK: vqdmlal.s32 q0, d2, d3[1]
233 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
234 %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
235 ret <2 x i64> %1
236}
237
238declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
239declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
240
Tim Northover8854ba72013-08-28 12:15:16 +0000241define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
242;CHECK-LABEL: vqdmlals16_natural:
243;CHECK: vqdmlal.s16
244 %tmp1 = load <4 x i32>* %A
245 %tmp2 = load <4 x i16>* %B
246 %tmp3 = load <4 x i16>* %C
247 %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
248 %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
249 ret <4 x i32> %tmp5
250}
251
252define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
253;CHECK-LABEL: vqdmlals32_natural:
254;CHECK: vqdmlal.s32
255 %tmp1 = load <2 x i64>* %A
256 %tmp2 = load <2 x i32>* %B
257 %tmp3 = load <2 x i32>* %C
258 %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
259 %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
260 ret <2 x i64> %tmp5
261}
262
263define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
264entry:
265; CHECK-LABEL: test_vqdmlal_lanes16_natural:
266; CHECK: vqdmlal.s16 q0, d2, d3[1]
267 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
268 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
269 %2 = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
270 ret <4 x i32> %2
271}
272
273define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
274entry:
275; CHECK-LABEL: test_vqdmlal_lanes32_natural:
276; CHECK: vqdmlal.s32 q0, d2, d3[1]
277 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
278 %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
279 %2 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
280 ret <2 x i64> %2
281}
282
283declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
284declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
285
Bob Wilson35b61732009-10-09 20:20:54 +0000286define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000287;CHECK-LABEL: vqdmlsls16:
Bob Wilson35b61732009-10-09 20:20:54 +0000288;CHECK: vqdmlsl.s16
289 %tmp1 = load <4 x i32>* %A
290 %tmp2 = load <4 x i16>* %B
291 %tmp3 = load <4 x i16>* %C
292 %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
293 ret <4 x i32> %tmp4
294}
295
296define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Stephen Lind24ab202013-07-14 06:24:09 +0000297;CHECK-LABEL: vqdmlsls32:
Bob Wilson35b61732009-10-09 20:20:54 +0000298;CHECK: vqdmlsl.s32
299 %tmp1 = load <2 x i64>* %A
300 %tmp2 = load <2 x i32>* %B
301 %tmp3 = load <2 x i32>* %C
302 %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
303 ret <2 x i64> %tmp4
304}
305
306define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
307entry:
308; CHECK: test_vqdmlsl_lanes16
309; CHECK: vqdmlsl.s16 q0, d2, d3[1]
310 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
311 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
312 ret <4 x i32> %1
313}
314
315define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
316entry:
317; CHECK: test_vqdmlsl_lanes32
318; CHECK: vqdmlsl.s32 q0, d2, d3[1]
319 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
320 %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
321 ret <2 x i64> %1
322}
323
324declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
325declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Tim Northover8854ba72013-08-28 12:15:16 +0000326
327define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
328;CHECK-LABEL: vqdmlsls16_natural:
329;CHECK: vqdmlsl.s16
330 %tmp1 = load <4 x i32>* %A
331 %tmp2 = load <4 x i16>* %B
332 %tmp3 = load <4 x i16>* %C
333 %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
334 %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
335 ret <4 x i32> %tmp5
336}
337
338define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
339;CHECK-LABEL: vqdmlsls32_natural:
340;CHECK: vqdmlsl.s32
341 %tmp1 = load <2 x i64>* %A
342 %tmp2 = load <2 x i32>* %B
343 %tmp3 = load <2 x i32>* %C
344 %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
345 %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
346 ret <2 x i64> %tmp5
347}
348
349define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
350entry:
351; CHECK-LABEL: test_vqdmlsl_lanes16_natural:
352; CHECK: vqdmlsl.s16 q0, d2, d3[1]
353 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
354 %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
355 %2 = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
356 ret <4 x i32> %2
357}
358
359define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
360entry:
361; CHECK-LABEL: test_vqdmlsl_lanes32_natural:
362; CHECK: vqdmlsl.s32 q0, d2, d3[1]
363 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
364 %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
365 %2 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
366 ret <2 x i64> %2
367}
368
369declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
370declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone