blob: 0716f585baf60f8c76213648d52f24b2f3d7d4fb [file] [log] [blame]
David Green0ac4f6b2020-02-17 11:41:16 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
David Greeneecba952020-04-22 16:33:11 +01002; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
David Green0ac4f6b2020-02-17 11:41:16 +00003
4define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) {
5; CHECK-LABEL: add_v4i32_v4i32:
6; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00007; CHECK-NEXT: vmlav.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00008; CHECK-NEXT: bx lr
9entry:
10 %m = mul <4 x i32> %x, %y
11 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
12 ret i32 %z
13}
14
15define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
16; CHECK-LABEL: add_v4i32_v4i64_zext:
17; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +000018; CHECK-NEXT: vmlalv.u32 r0, r1, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +000019; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +000020entry:
21 %xx = zext <4 x i32> %x to <4 x i64>
22 %yy = zext <4 x i32> %y to <4 x i64>
23 %m = mul <4 x i64> %xx, %yy
24 %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
25 ret i64 %z
26}
27
28define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) {
29; CHECK-LABEL: add_v4i32_v4i64_sext:
30; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +000031; CHECK-NEXT: vmlalv.s32 r0, r1, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +000032; CHECK-NEXT: bx lr
33entry:
34 %xx = sext <4 x i32> %x to <4 x i64>
35 %yy = sext <4 x i32> %y to <4 x i64>
36 %m = mul <4 x i64> %xx, %yy
37 %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
38 ret i64 %z
39}
40
41define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
42; CHECK-LABEL: add_v2i32_v2i64_zext:
43; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +010044; CHECK-NEXT: vmullb.u32 q2, q0, q1
45; CHECK-NEXT: vmov r0, s10
46; CHECK-NEXT: vmov r3, s8
47; CHECK-NEXT: vmov r1, s11
48; CHECK-NEXT: vmov r2, s9
49; CHECK-NEXT: adds r0, r0, r3
50; CHECK-NEXT: adcs r1, r2
David Green0ac4f6b2020-02-17 11:41:16 +000051; CHECK-NEXT: bx lr
52entry:
53 %xx = zext <2 x i32> %x to <2 x i64>
54 %yy = zext <2 x i32> %y to <2 x i64>
55 %m = mul <2 x i64> %xx, %yy
56 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
57 ret i64 %z
58}
59
60define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) {
61; CHECK-LABEL: add_v2i32_v2i64_sext:
62; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +010063; CHECK-NEXT: vmullb.s32 q2, q0, q1
64; CHECK-NEXT: vmov r0, s10
65; CHECK-NEXT: vmov r3, s8
66; CHECK-NEXT: vmov r1, s11
67; CHECK-NEXT: vmov r2, s9
68; CHECK-NEXT: adds r0, r0, r3
69; CHECK-NEXT: adcs r1, r2
David Green0ac4f6b2020-02-17 11:41:16 +000070; CHECK-NEXT: bx lr
71entry:
72 %xx = sext <2 x i32> %x to <2 x i64>
73 %yy = sext <2 x i32> %y to <2 x i64>
74 %m = mul <2 x i64> %xx, %yy
75 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
76 ret i64 %z
77}
78
79define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
80; CHECK-LABEL: add_v8i16_v8i32_zext:
81; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +000082; CHECK-NEXT: vmlav.u16 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +000083; CHECK-NEXT: bx lr
84entry:
85 %xx = zext <8 x i16> %x to <8 x i32>
86 %yy = zext <8 x i16> %y to <8 x i32>
87 %m = mul <8 x i32> %xx, %yy
88 %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
89 ret i32 %z
90}
91
92define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) {
93; CHECK-LABEL: add_v8i16_v8i32_sext:
94; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +000095; CHECK-NEXT: vmlav.s16 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +000096; CHECK-NEXT: bx lr
97entry:
98 %xx = sext <8 x i16> %x to <8 x i32>
99 %yy = sext <8 x i16> %y to <8 x i32>
100 %m = mul <8 x i32> %xx, %yy
101 %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
102 ret i32 %z
103}
104
105define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
106; CHECK-LABEL: add_v4i16_v4i32_zext:
107; CHECK: @ %bb.0: @ %entry
108; CHECK-NEXT: vmovlb.u16 q1, q1
109; CHECK-NEXT: vmovlb.u16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +0000110; CHECK-NEXT: vmlav.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000111; CHECK-NEXT: bx lr
112entry:
113 %xx = zext <4 x i16> %x to <4 x i32>
114 %yy = zext <4 x i16> %y to <4 x i32>
115 %m = mul <4 x i32> %xx, %yy
116 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
117 ret i32 %z
118}
119
120define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) {
121; CHECK-LABEL: add_v4i16_v4i32_sext:
122; CHECK: @ %bb.0: @ %entry
123; CHECK-NEXT: vmovlb.s16 q1, q1
124; CHECK-NEXT: vmovlb.s16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +0000125; CHECK-NEXT: vmlav.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000126; CHECK-NEXT: bx lr
127entry:
128 %xx = sext <4 x i16> %x to <4 x i32>
129 %yy = sext <4 x i16> %y to <4 x i32>
130 %m = mul <4 x i32> %xx, %yy
131 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
132 ret i32 %z
133}
134
135define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
136; CHECK-LABEL: add_v8i16_v8i16:
137; CHECK: @ %bb.0: @ %entry
138; CHECK-NEXT: vmul.i16 q0, q0, q1
139; CHECK-NEXT: vaddv.u16 r0, q0
140; CHECK-NEXT: uxth r0, r0
141; CHECK-NEXT: bx lr
142entry:
143 %m = mul <8 x i16> %x, %y
144 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
145 ret i16 %z
146}
147
148define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) {
149; CHECK-LABEL: add_v8i16_v8i64_zext:
150; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000151; CHECK-NEXT: vmlalv.u16 r0, r1, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000152; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +0000153entry:
154 %xx = zext <8 x i16> %x to <8 x i64>
155 %yy = zext <8 x i16> %y to <8 x i64>
156 %m = mul <8 x i64> %xx, %yy
157 %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
158 ret i64 %z
159}
160
161define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) {
162; CHECK-LABEL: add_v8i16_v8i64_sext:
163; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000164; CHECK-NEXT: vmlalv.s16 r0, r1, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000165; CHECK-NEXT: bx lr
166entry:
167 %xx = sext <8 x i16> %x to <8 x i64>
168 %yy = sext <8 x i16> %y to <8 x i64>
169 %m = mul <8 x i64> %xx, %yy
170 %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
171 ret i64 %z
172}
173
174define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
175; CHECK-LABEL: add_v2i16_v2i64_zext:
176; CHECK: @ %bb.0: @ %entry
David Greenc9eaed52020-03-28 16:22:05 +0000177; CHECK-NEXT: vmov.i64 q2, #0xffff
David Green0ac4f6b2020-02-17 11:41:16 +0000178; CHECK-NEXT: vand q1, q1, q2
179; CHECK-NEXT: vand q0, q0, q2
180; CHECK-NEXT: vmov r0, s4
181; CHECK-NEXT: vmov r1, s0
182; CHECK-NEXT: vmov r2, s6
183; CHECK-NEXT: vmov r3, s2
184; CHECK-NEXT: umull r0, r1, r1, r0
185; CHECK-NEXT: umlal r0, r1, r3, r2
186; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +0000187entry:
188 %xx = zext <2 x i16> %x to <2 x i64>
189 %yy = zext <2 x i16> %y to <2 x i64>
190 %m = mul <2 x i64> %xx, %yy
191 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
192 ret i64 %z
193}
194
195define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) {
196; CHECK-LABEL: add_v2i16_v2i64_sext:
197; CHECK: @ %bb.0: @ %entry
198; CHECK-NEXT: vmov r0, s4
199; CHECK-NEXT: vmov r1, s0
200; CHECK-NEXT: vmov r2, s6
201; CHECK-NEXT: vmov r3, s2
202; CHECK-NEXT: sxth r0, r0
203; CHECK-NEXT: sxth r1, r1
204; CHECK-NEXT: smull r0, r1, r1, r0
205; CHECK-NEXT: sxth r2, r2
206; CHECK-NEXT: sxth r3, r3
207; CHECK-NEXT: smlal r0, r1, r3, r2
208; CHECK-NEXT: bx lr
209entry:
210 %xx = sext <2 x i16> %x to <2 x i64>
211 %yy = sext <2 x i16> %y to <2 x i64>
212 %m = mul <2 x i64> %xx, %yy
213 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
214 ret i64 %z
215}
216
217define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) {
218; CHECK-LABEL: add_v16i8_v16i32_zext:
219; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000220; CHECK-NEXT: vmlav.u8 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000221; CHECK-NEXT: bx lr
222entry:
223 %xx = zext <16 x i8> %x to <16 x i32>
224 %yy = zext <16 x i8> %y to <16 x i32>
225 %m = mul <16 x i32> %xx, %yy
226 %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
227 ret i32 %z
228}
229
230define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) {
231; CHECK-LABEL: add_v16i8_v16i32_sext:
232; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000233; CHECK-NEXT: vmlav.s8 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000234; CHECK-NEXT: bx lr
235entry:
236 %xx = sext <16 x i8> %x to <16 x i32>
237 %yy = sext <16 x i8> %y to <16 x i32>
238 %m = mul <16 x i32> %xx, %yy
239 %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
240 ret i32 %z
241}
242
243define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
244; CHECK-LABEL: add_v4i8_v4i32_zext:
245; CHECK: @ %bb.0: @ %entry
246; CHECK-NEXT: vmov.i32 q2, #0xff
247; CHECK-NEXT: vand q1, q1, q2
248; CHECK-NEXT: vand q0, q0, q2
David Green33aa5df2020-02-17 12:00:17 +0000249; CHECK-NEXT: vmlav.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000250; CHECK-NEXT: bx lr
251entry:
252 %xx = zext <4 x i8> %x to <4 x i32>
253 %yy = zext <4 x i8> %y to <4 x i32>
254 %m = mul <4 x i32> %xx, %yy
255 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
256 ret i32 %z
257}
258
259define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) {
260; CHECK-LABEL: add_v4i8_v4i32_sext:
261; CHECK: @ %bb.0: @ %entry
262; CHECK-NEXT: vmovlb.s8 q1, q1
263; CHECK-NEXT: vmovlb.s8 q0, q0
264; CHECK-NEXT: vmovlb.s16 q1, q1
265; CHECK-NEXT: vmovlb.s16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +0000266; CHECK-NEXT: vmlav.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000267; CHECK-NEXT: bx lr
268entry:
269 %xx = sext <4 x i8> %x to <4 x i32>
270 %yy = sext <4 x i8> %y to <4 x i32>
271 %m = mul <4 x i32> %xx, %yy
272 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
273 ret i32 %z
274}
275
276define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
277; CHECK-LABEL: add_v16i8_v16i16_zext:
278; CHECK: @ %bb.0: @ %entry
279; CHECK-NEXT: vmov.u8 r0, q1[8]
280; CHECK-NEXT: vmov.16 q2[0], r0
281; CHECK-NEXT: vmov.u8 r0, q1[9]
282; CHECK-NEXT: vmov.16 q2[1], r0
283; CHECK-NEXT: vmov.u8 r0, q1[10]
284; CHECK-NEXT: vmov.16 q2[2], r0
285; CHECK-NEXT: vmov.u8 r0, q1[11]
286; CHECK-NEXT: vmov.16 q2[3], r0
287; CHECK-NEXT: vmov.u8 r0, q1[12]
288; CHECK-NEXT: vmov.16 q2[4], r0
289; CHECK-NEXT: vmov.u8 r0, q1[13]
290; CHECK-NEXT: vmov.16 q2[5], r0
291; CHECK-NEXT: vmov.u8 r0, q1[14]
292; CHECK-NEXT: vmov.16 q2[6], r0
293; CHECK-NEXT: vmov.u8 r0, q1[15]
294; CHECK-NEXT: vmov.16 q2[7], r0
295; CHECK-NEXT: vmov.u8 r0, q0[8]
296; CHECK-NEXT: vmov.16 q3[0], r0
297; CHECK-NEXT: vmov.u8 r0, q0[9]
298; CHECK-NEXT: vmov.16 q3[1], r0
299; CHECK-NEXT: vmov.u8 r0, q0[10]
300; CHECK-NEXT: vmov.16 q3[2], r0
301; CHECK-NEXT: vmov.u8 r0, q0[11]
302; CHECK-NEXT: vmov.16 q3[3], r0
303; CHECK-NEXT: vmov.u8 r0, q0[12]
304; CHECK-NEXT: vmov.16 q3[4], r0
305; CHECK-NEXT: vmov.u8 r0, q0[13]
306; CHECK-NEXT: vmov.16 q3[5], r0
307; CHECK-NEXT: vmov.u8 r0, q0[14]
308; CHECK-NEXT: vmov.16 q3[6], r0
309; CHECK-NEXT: vmov.u8 r0, q0[15]
310; CHECK-NEXT: vmov.16 q3[7], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000311; CHECK-NEXT: vmov.u8 r0, q1[0]
David Greenfbd53ff2020-04-01 13:58:42 +0100312; CHECK-NEXT: vmullb.u8 q2, q3, q2
David Green0ac4f6b2020-02-17 11:41:16 +0000313; CHECK-NEXT: vmov.16 q3[0], r0
314; CHECK-NEXT: vmov.u8 r0, q1[1]
315; CHECK-NEXT: vmov.16 q3[1], r0
316; CHECK-NEXT: vmov.u8 r0, q1[2]
317; CHECK-NEXT: vmov.16 q3[2], r0
318; CHECK-NEXT: vmov.u8 r0, q1[3]
319; CHECK-NEXT: vmov.16 q3[3], r0
320; CHECK-NEXT: vmov.u8 r0, q1[4]
321; CHECK-NEXT: vmov.16 q3[4], r0
322; CHECK-NEXT: vmov.u8 r0, q1[5]
323; CHECK-NEXT: vmov.16 q3[5], r0
324; CHECK-NEXT: vmov.u8 r0, q1[6]
325; CHECK-NEXT: vmov.16 q3[6], r0
326; CHECK-NEXT: vmov.u8 r0, q1[7]
327; CHECK-NEXT: vmov.16 q3[7], r0
328; CHECK-NEXT: vmov.u8 r0, q0[0]
David Greenfbd53ff2020-04-01 13:58:42 +0100329; CHECK-NEXT: vmov.16 q1[0], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000330; CHECK-NEXT: vmov.u8 r0, q0[1]
David Greenfbd53ff2020-04-01 13:58:42 +0100331; CHECK-NEXT: vmov.16 q1[1], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000332; CHECK-NEXT: vmov.u8 r0, q0[2]
David Greenfbd53ff2020-04-01 13:58:42 +0100333; CHECK-NEXT: vmov.16 q1[2], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000334; CHECK-NEXT: vmov.u8 r0, q0[3]
David Greenfbd53ff2020-04-01 13:58:42 +0100335; CHECK-NEXT: vmov.16 q1[3], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000336; CHECK-NEXT: vmov.u8 r0, q0[4]
David Greenfbd53ff2020-04-01 13:58:42 +0100337; CHECK-NEXT: vmov.16 q1[4], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000338; CHECK-NEXT: vmov.u8 r0, q0[5]
David Greenfbd53ff2020-04-01 13:58:42 +0100339; CHECK-NEXT: vmov.16 q1[5], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000340; CHECK-NEXT: vmov.u8 r0, q0[6]
David Greenfbd53ff2020-04-01 13:58:42 +0100341; CHECK-NEXT: vmov.16 q1[6], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000342; CHECK-NEXT: vmov.u8 r0, q0[7]
David Greenfbd53ff2020-04-01 13:58:42 +0100343; CHECK-NEXT: vmov.16 q1[7], r0
344; CHECK-NEXT: vmullb.u8 q0, q1, q3
David Green0ac4f6b2020-02-17 11:41:16 +0000345; CHECK-NEXT: vadd.i16 q0, q0, q2
346; CHECK-NEXT: vaddv.u16 r0, q0
347; CHECK-NEXT: uxth r0, r0
348; CHECK-NEXT: bx lr
349entry:
350 %xx = zext <16 x i8> %x to <16 x i16>
351 %yy = zext <16 x i8> %y to <16 x i16>
352 %m = mul <16 x i16> %xx, %yy
353 %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
354 ret i16 %z
355}
356
357define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
358; CHECK-LABEL: add_v16i8_v16i16_sext:
359; CHECK: @ %bb.0: @ %entry
360; CHECK-NEXT: vmov.u8 r0, q1[8]
361; CHECK-NEXT: vmov.16 q2[0], r0
362; CHECK-NEXT: vmov.u8 r0, q1[9]
363; CHECK-NEXT: vmov.16 q2[1], r0
364; CHECK-NEXT: vmov.u8 r0, q1[10]
365; CHECK-NEXT: vmov.16 q2[2], r0
366; CHECK-NEXT: vmov.u8 r0, q1[11]
367; CHECK-NEXT: vmov.16 q2[3], r0
368; CHECK-NEXT: vmov.u8 r0, q1[12]
369; CHECK-NEXT: vmov.16 q2[4], r0
370; CHECK-NEXT: vmov.u8 r0, q1[13]
371; CHECK-NEXT: vmov.16 q2[5], r0
372; CHECK-NEXT: vmov.u8 r0, q1[14]
373; CHECK-NEXT: vmov.16 q2[6], r0
374; CHECK-NEXT: vmov.u8 r0, q1[15]
375; CHECK-NEXT: vmov.16 q2[7], r0
376; CHECK-NEXT: vmov.u8 r0, q0[8]
377; CHECK-NEXT: vmov.16 q3[0], r0
378; CHECK-NEXT: vmov.u8 r0, q0[9]
379; CHECK-NEXT: vmov.16 q3[1], r0
380; CHECK-NEXT: vmov.u8 r0, q0[10]
381; CHECK-NEXT: vmov.16 q3[2], r0
382; CHECK-NEXT: vmov.u8 r0, q0[11]
383; CHECK-NEXT: vmov.16 q3[3], r0
384; CHECK-NEXT: vmov.u8 r0, q0[12]
385; CHECK-NEXT: vmov.16 q3[4], r0
386; CHECK-NEXT: vmov.u8 r0, q0[13]
387; CHECK-NEXT: vmov.16 q3[5], r0
388; CHECK-NEXT: vmov.u8 r0, q0[14]
389; CHECK-NEXT: vmov.16 q3[6], r0
390; CHECK-NEXT: vmov.u8 r0, q0[15]
391; CHECK-NEXT: vmov.16 q3[7], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000392; CHECK-NEXT: vmov.u8 r0, q1[0]
David Greenfbd53ff2020-04-01 13:58:42 +0100393; CHECK-NEXT: vmullb.s8 q2, q3, q2
David Green0ac4f6b2020-02-17 11:41:16 +0000394; CHECK-NEXT: vmov.16 q3[0], r0
395; CHECK-NEXT: vmov.u8 r0, q1[1]
396; CHECK-NEXT: vmov.16 q3[1], r0
397; CHECK-NEXT: vmov.u8 r0, q1[2]
398; CHECK-NEXT: vmov.16 q3[2], r0
399; CHECK-NEXT: vmov.u8 r0, q1[3]
400; CHECK-NEXT: vmov.16 q3[3], r0
401; CHECK-NEXT: vmov.u8 r0, q1[4]
402; CHECK-NEXT: vmov.16 q3[4], r0
403; CHECK-NEXT: vmov.u8 r0, q1[5]
404; CHECK-NEXT: vmov.16 q3[5], r0
405; CHECK-NEXT: vmov.u8 r0, q1[6]
406; CHECK-NEXT: vmov.16 q3[6], r0
407; CHECK-NEXT: vmov.u8 r0, q1[7]
408; CHECK-NEXT: vmov.16 q3[7], r0
409; CHECK-NEXT: vmov.u8 r0, q0[0]
David Greenfbd53ff2020-04-01 13:58:42 +0100410; CHECK-NEXT: vmov.16 q1[0], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000411; CHECK-NEXT: vmov.u8 r0, q0[1]
David Greenfbd53ff2020-04-01 13:58:42 +0100412; CHECK-NEXT: vmov.16 q1[1], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000413; CHECK-NEXT: vmov.u8 r0, q0[2]
David Greenfbd53ff2020-04-01 13:58:42 +0100414; CHECK-NEXT: vmov.16 q1[2], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000415; CHECK-NEXT: vmov.u8 r0, q0[3]
David Greenfbd53ff2020-04-01 13:58:42 +0100416; CHECK-NEXT: vmov.16 q1[3], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000417; CHECK-NEXT: vmov.u8 r0, q0[4]
David Greenfbd53ff2020-04-01 13:58:42 +0100418; CHECK-NEXT: vmov.16 q1[4], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000419; CHECK-NEXT: vmov.u8 r0, q0[5]
David Greenfbd53ff2020-04-01 13:58:42 +0100420; CHECK-NEXT: vmov.16 q1[5], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000421; CHECK-NEXT: vmov.u8 r0, q0[6]
David Greenfbd53ff2020-04-01 13:58:42 +0100422; CHECK-NEXT: vmov.16 q1[6], r0
David Green0ac4f6b2020-02-17 11:41:16 +0000423; CHECK-NEXT: vmov.u8 r0, q0[7]
David Greenfbd53ff2020-04-01 13:58:42 +0100424; CHECK-NEXT: vmov.16 q1[7], r0
425; CHECK-NEXT: vmullb.s8 q0, q1, q3
David Green0ac4f6b2020-02-17 11:41:16 +0000426; CHECK-NEXT: vadd.i16 q0, q0, q2
427; CHECK-NEXT: vaddv.u16 r0, q0
428; CHECK-NEXT: sxth r0, r0
429; CHECK-NEXT: bx lr
430entry:
431 %xx = sext <16 x i8> %x to <16 x i16>
432 %yy = sext <16 x i8> %y to <16 x i16>
433 %m = mul <16 x i16> %xx, %yy
434 %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
435 ret i16 %z
436}
437
438define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
439; CHECK-LABEL: add_v8i8_v8i16_zext:
440; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +0100441; CHECK-NEXT: vmullb.u8 q0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000442; CHECK-NEXT: vaddv.u16 r0, q0
443; CHECK-NEXT: uxth r0, r0
444; CHECK-NEXT: bx lr
445entry:
446 %xx = zext <8 x i8> %x to <8 x i16>
447 %yy = zext <8 x i8> %y to <8 x i16>
448 %m = mul <8 x i16> %xx, %yy
449 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
450 ret i16 %z
451}
452
453define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
454; CHECK-LABEL: add_v8i8_v8i16_sext:
455; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +0100456; CHECK-NEXT: vmullb.s8 q0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000457; CHECK-NEXT: vaddv.u16 r0, q0
458; CHECK-NEXT: sxth r0, r0
459; CHECK-NEXT: bx lr
460entry:
461 %xx = sext <8 x i8> %x to <8 x i16>
462 %yy = sext <8 x i8> %y to <8 x i16>
463 %m = mul <8 x i16> %xx, %yy
464 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
465 ret i16 %z
466}
467
468define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
469; CHECK-LABEL: add_v16i8_v16i8:
470; CHECK: @ %bb.0: @ %entry
471; CHECK-NEXT: vmul.i8 q0, q0, q1
472; CHECK-NEXT: vaddv.u8 r0, q0
473; CHECK-NEXT: uxtb r0, r0
474; CHECK-NEXT: bx lr
475entry:
476 %m = mul <16 x i8> %x, %y
477 %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m)
478 ret i8 %z
479}
480
481define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
482; CHECK-LABEL: add_v16i8_v16i64_zext:
483; CHECK: @ %bb.0: @ %entry
484; CHECK-NEXT: .save {r7, lr}
485; CHECK-NEXT: push {r7, lr}
486; CHECK-NEXT: .vsave {d8, d9, d10, d11}
487; CHECK-NEXT: vpush {d8, d9, d10, d11}
488; CHECK-NEXT: vmov.u8 r0, q1[0]
489; CHECK-NEXT: vmov.u8 r1, q0[0]
490; CHECK-NEXT: vmov.32 q3[0], r0
491; CHECK-NEXT: vmov.u8 r0, q1[1]
David Green0ac4f6b2020-02-17 11:41:16 +0000492; CHECK-NEXT: vmov.32 q4[0], r1
493; CHECK-NEXT: vmov.u8 r1, q0[1]
David Greenc9eaed52020-03-28 16:22:05 +0000494; CHECK-NEXT: vmov.32 q3[2], r0
495; CHECK-NEXT: vmov.i64 q2, #0xff
David Green0ac4f6b2020-02-17 11:41:16 +0000496; CHECK-NEXT: vmov.32 q4[2], r1
497; CHECK-NEXT: vand q3, q3, q2
498; CHECK-NEXT: vand q4, q4, q2
499; CHECK-NEXT: vmov r0, s14
500; CHECK-NEXT: vmov r1, s18
501; CHECK-NEXT: vmov r2, s12
502; CHECK-NEXT: vmov r3, s16
503; CHECK-NEXT: umull r12, r1, r1, r0
504; CHECK-NEXT: vmov.u8 r0, q0[2]
505; CHECK-NEXT: vmov.32 q4[0], r0
506; CHECK-NEXT: vmov.u8 r0, q0[3]
507; CHECK-NEXT: vmov.32 q4[2], r0
508; CHECK-NEXT: umull r2, r3, r3, r2
509; CHECK-NEXT: vand q4, q4, q2
510; CHECK-NEXT: vmov r0, s16
511; CHECK-NEXT: orr.w lr, r3, r1
512; CHECK-NEXT: vmov.u8 r3, q1[2]
513; CHECK-NEXT: vmov.32 q3[0], r3
514; CHECK-NEXT: vmov.u8 r3, q1[3]
515; CHECK-NEXT: vmov.32 q3[2], r3
516; CHECK-NEXT: add r2, r12
517; CHECK-NEXT: vand q3, q3, q2
518; CHECK-NEXT: vmov r3, s12
519; CHECK-NEXT: umull r0, r3, r0, r3
520; CHECK-NEXT: vmov.32 q5[0], r0
521; CHECK-NEXT: vmov r0, s14
522; CHECK-NEXT: vmov.32 q5[1], r3
523; CHECK-NEXT: vmov r3, s18
524; CHECK-NEXT: umull r0, r3, r3, r0
525; CHECK-NEXT: vmov.32 q5[2], r0
526; CHECK-NEXT: vmov.32 q5[3], r3
527; CHECK-NEXT: vmov r1, s20
528; CHECK-NEXT: vmov r0, s21
529; CHECK-NEXT: adds r1, r1, r2
530; CHECK-NEXT: adc.w r2, lr, r0
531; CHECK-NEXT: vmov r0, s22
532; CHECK-NEXT: adds.w r12, r1, r0
533; CHECK-NEXT: adc.w r1, r2, r3
534; CHECK-NEXT: vmov.u8 r2, q1[4]
535; CHECK-NEXT: vmov.u8 r3, q0[4]
536; CHECK-NEXT: vmov.32 q3[0], r2
537; CHECK-NEXT: vmov.u8 r2, q1[5]
538; CHECK-NEXT: vmov.32 q4[0], r3
539; CHECK-NEXT: vmov.u8 r3, q0[5]
540; CHECK-NEXT: vmov.32 q3[2], r2
541; CHECK-NEXT: vmov.32 q4[2], r3
542; CHECK-NEXT: vand q3, q3, q2
543; CHECK-NEXT: vand q4, q4, q2
544; CHECK-NEXT: vmov r2, s12
545; CHECK-NEXT: vmov r3, s16
546; CHECK-NEXT: umull r2, r3, r3, r2
547; CHECK-NEXT: vmov.32 q5[0], r2
548; CHECK-NEXT: vmov r2, s14
549; CHECK-NEXT: vmov.32 q5[1], r3
550; CHECK-NEXT: vmov r3, s18
551; CHECK-NEXT: umull r2, r3, r3, r2
552; CHECK-NEXT: vmov.32 q5[2], r2
553; CHECK-NEXT: vmov.32 q5[3], r3
554; CHECK-NEXT: vmov r0, s20
555; CHECK-NEXT: vmov r2, s21
556; CHECK-NEXT: adds.w r0, r0, r12
557; CHECK-NEXT: adcs r1, r2
558; CHECK-NEXT: vmov r2, s22
559; CHECK-NEXT: adds.w r12, r0, r2
560; CHECK-NEXT: vmov.u8 r2, q1[6]
561; CHECK-NEXT: adcs r1, r3
562; CHECK-NEXT: vmov.u8 r3, q0[6]
563; CHECK-NEXT: vmov.32 q3[0], r2
564; CHECK-NEXT: vmov.u8 r2, q1[7]
565; CHECK-NEXT: vmov.32 q4[0], r3
566; CHECK-NEXT: vmov.u8 r3, q0[7]
567; CHECK-NEXT: vmov.32 q3[2], r2
568; CHECK-NEXT: vmov.32 q4[2], r3
569; CHECK-NEXT: vand q3, q3, q2
570; CHECK-NEXT: vand q4, q4, q2
571; CHECK-NEXT: vmov r2, s12
572; CHECK-NEXT: vmov r3, s16
573; CHECK-NEXT: umull r2, r3, r3, r2
574; CHECK-NEXT: vmov.32 q5[0], r2
575; CHECK-NEXT: vmov r2, s14
576; CHECK-NEXT: vmov.32 q5[1], r3
577; CHECK-NEXT: vmov r3, s18
578; CHECK-NEXT: umull r2, r3, r3, r2
579; CHECK-NEXT: vmov.32 q5[2], r2
580; CHECK-NEXT: vmov.32 q5[3], r3
581; CHECK-NEXT: vmov r0, s20
582; CHECK-NEXT: vmov r2, s21
583; CHECK-NEXT: adds.w r0, r0, r12
584; CHECK-NEXT: adcs r1, r2
585; CHECK-NEXT: vmov r2, s22
586; CHECK-NEXT: adds.w r12, r0, r2
587; CHECK-NEXT: vmov.u8 r2, q1[8]
588; CHECK-NEXT: adcs r1, r3
589; CHECK-NEXT: vmov.u8 r3, q0[8]
590; CHECK-NEXT: vmov.32 q3[0], r2
591; CHECK-NEXT: vmov.u8 r2, q1[9]
592; CHECK-NEXT: vmov.32 q4[0], r3
593; CHECK-NEXT: vmov.u8 r3, q0[9]
594; CHECK-NEXT: vmov.32 q3[2], r2
595; CHECK-NEXT: vmov.32 q4[2], r3
596; CHECK-NEXT: vand q3, q3, q2
597; CHECK-NEXT: vand q4, q4, q2
598; CHECK-NEXT: vmov r2, s12
599; CHECK-NEXT: vmov r3, s16
600; CHECK-NEXT: umull r2, r3, r3, r2
601; CHECK-NEXT: vmov.32 q5[0], r2
602; CHECK-NEXT: vmov r2, s14
603; CHECK-NEXT: vmov.32 q5[1], r3
604; CHECK-NEXT: vmov r3, s18
605; CHECK-NEXT: umull r2, r3, r3, r2
606; CHECK-NEXT: vmov.32 q5[2], r2
607; CHECK-NEXT: vmov.32 q5[3], r3
608; CHECK-NEXT: vmov r0, s20
609; CHECK-NEXT: vmov r2, s21
610; CHECK-NEXT: adds.w r0, r0, r12
611; CHECK-NEXT: adcs r1, r2
612; CHECK-NEXT: vmov r2, s22
613; CHECK-NEXT: adds.w r12, r0, r2
614; CHECK-NEXT: vmov.u8 r2, q1[10]
615; CHECK-NEXT: adcs r1, r3
616; CHECK-NEXT: vmov.u8 r3, q0[10]
617; CHECK-NEXT: vmov.32 q3[0], r2
618; CHECK-NEXT: vmov.u8 r2, q1[11]
619; CHECK-NEXT: vmov.32 q4[0], r3
620; CHECK-NEXT: vmov.u8 r3, q0[11]
621; CHECK-NEXT: vmov.32 q3[2], r2
622; CHECK-NEXT: vmov.32 q4[2], r3
623; CHECK-NEXT: vand q3, q3, q2
624; CHECK-NEXT: vand q4, q4, q2
625; CHECK-NEXT: vmov r2, s12
626; CHECK-NEXT: vmov r3, s16
627; CHECK-NEXT: umull r2, r3, r3, r2
628; CHECK-NEXT: vmov.32 q5[0], r2
629; CHECK-NEXT: vmov r2, s14
630; CHECK-NEXT: vmov.32 q5[1], r3
631; CHECK-NEXT: vmov r3, s18
632; CHECK-NEXT: umull r2, r3, r3, r2
633; CHECK-NEXT: vmov.32 q5[2], r2
634; CHECK-NEXT: vmov.32 q5[3], r3
635; CHECK-NEXT: vmov r0, s20
636; CHECK-NEXT: vmov r2, s21
637; CHECK-NEXT: adds.w r0, r0, r12
638; CHECK-NEXT: adcs r1, r2
639; CHECK-NEXT: vmov r2, s22
640; CHECK-NEXT: adds.w r12, r0, r2
641; CHECK-NEXT: vmov.u8 r2, q1[12]
642; CHECK-NEXT: adcs r1, r3
643; CHECK-NEXT: vmov.u8 r3, q0[12]
644; CHECK-NEXT: vmov.32 q3[0], r2
645; CHECK-NEXT: vmov.u8 r2, q1[13]
646; CHECK-NEXT: vmov.32 q4[0], r3
647; CHECK-NEXT: vmov.u8 r3, q0[13]
648; CHECK-NEXT: vmov.32 q3[2], r2
649; CHECK-NEXT: vmov.32 q4[2], r3
650; CHECK-NEXT: vand q3, q3, q2
651; CHECK-NEXT: vand q4, q4, q2
652; CHECK-NEXT: vmov r2, s12
653; CHECK-NEXT: vmov r3, s16
654; CHECK-NEXT: umull r2, r3, r3, r2
655; CHECK-NEXT: vmov.32 q5[0], r2
656; CHECK-NEXT: vmov r2, s14
657; CHECK-NEXT: vmov.32 q5[1], r3
658; CHECK-NEXT: vmov r3, s18
659; CHECK-NEXT: umull r2, r3, r3, r2
660; CHECK-NEXT: vmov.32 q5[2], r2
661; CHECK-NEXT: vmov.32 q5[3], r3
662; CHECK-NEXT: vmov r0, s20
663; CHECK-NEXT: vmov r2, s21
664; CHECK-NEXT: adds.w r0, r0, r12
665; CHECK-NEXT: adcs r1, r2
666; CHECK-NEXT: vmov r2, s22
667; CHECK-NEXT: adds r0, r0, r2
668; CHECK-NEXT: vmov.u8 r2, q1[14]
669; CHECK-NEXT: vmov.32 q3[0], r2
670; CHECK-NEXT: vmov.u8 r2, q1[15]
671; CHECK-NEXT: adcs r1, r3
672; CHECK-NEXT: vmov.32 q3[2], r2
673; CHECK-NEXT: vmov.u8 r3, q0[14]
674; CHECK-NEXT: vand q1, q3, q2
675; CHECK-NEXT: vmov.32 q3[0], r3
676; CHECK-NEXT: vmov.u8 r3, q0[15]
677; CHECK-NEXT: vmov.32 q3[2], r3
678; CHECK-NEXT: vmov r2, s4
679; CHECK-NEXT: vand q0, q3, q2
680; CHECK-NEXT: vmov r3, s0
681; CHECK-NEXT: umlal r0, r1, r3, r2
682; CHECK-NEXT: vmov r2, s6
683; CHECK-NEXT: vmov r3, s2
684; CHECK-NEXT: umlal r0, r1, r3, r2
685; CHECK-NEXT: vpop {d8, d9, d10, d11}
686; CHECK-NEXT: pop {r7, pc}
David Green0ac4f6b2020-02-17 11:41:16 +0000687entry:
688 %xx = zext <16 x i8> %x to <16 x i64>
689 %yy = zext <16 x i8> %y to <16 x i64>
690 %m = mul <16 x i64> %xx, %yy
691 %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
692 ret i64 %z
693}
694
695define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
696; CHECK-LABEL: add_v16i8_v16i64_sext:
697; CHECK: @ %bb.0: @ %entry
698; CHECK-NEXT: vmov.u8 r0, q1[0]
699; CHECK-NEXT: vmov.u8 r1, q0[0]
700; CHECK-NEXT: sxtb r0, r0
701; CHECK-NEXT: sxtb r1, r1
702; CHECK-NEXT: smull r0, r1, r1, r0
703; CHECK-NEXT: vmov.32 q2[0], r0
704; CHECK-NEXT: vmov.u8 r0, q1[1]
705; CHECK-NEXT: vmov.32 q2[1], r1
706; CHECK-NEXT: vmov.u8 r1, q0[1]
707; CHECK-NEXT: sxtb r0, r0
708; CHECK-NEXT: sxtb r1, r1
709; CHECK-NEXT: smull r0, r1, r1, r0
710; CHECK-NEXT: vmov.32 q2[2], r0
711; CHECK-NEXT: vmov.32 q2[3], r1
712; CHECK-NEXT: vmov r2, s10
713; CHECK-NEXT: vmov r3, s8
714; CHECK-NEXT: vmov r0, s9
715; CHECK-NEXT: adds r2, r2, r3
716; CHECK-NEXT: vmov.u8 r3, q0[2]
717; CHECK-NEXT: adc.w r12, r0, r1
718; CHECK-NEXT: vmov.u8 r1, q1[2]
719; CHECK-NEXT: sxtb r1, r1
720; CHECK-NEXT: sxtb r3, r3
721; CHECK-NEXT: smull r1, r3, r3, r1
722; CHECK-NEXT: vmov.32 q2[0], r1
723; CHECK-NEXT: vmov.u8 r1, q1[3]
724; CHECK-NEXT: vmov.32 q2[1], r3
725; CHECK-NEXT: vmov.u8 r3, q0[3]
726; CHECK-NEXT: sxtb r1, r1
727; CHECK-NEXT: sxtb r3, r3
728; CHECK-NEXT: smull r1, r3, r3, r1
729; CHECK-NEXT: vmov.32 q2[2], r1
730; CHECK-NEXT: vmov.32 q2[3], r3
731; CHECK-NEXT: vmov r0, s8
732; CHECK-NEXT: vmov r1, s9
733; CHECK-NEXT: adds r0, r0, r2
734; CHECK-NEXT: vmov r2, s10
735; CHECK-NEXT: adc.w r1, r1, r12
736; CHECK-NEXT: adds.w r12, r0, r2
737; CHECK-NEXT: vmov.u8 r2, q1[4]
738; CHECK-NEXT: adcs r1, r3
739; CHECK-NEXT: vmov.u8 r3, q0[4]
740; CHECK-NEXT: sxtb r2, r2
741; CHECK-NEXT: sxtb r3, r3
742; CHECK-NEXT: smull r2, r3, r3, r2
743; CHECK-NEXT: vmov.32 q2[0], r2
744; CHECK-NEXT: vmov.u8 r2, q1[5]
745; CHECK-NEXT: vmov.32 q2[1], r3
746; CHECK-NEXT: vmov.u8 r3, q0[5]
747; CHECK-NEXT: sxtb r2, r2
748; CHECK-NEXT: sxtb r3, r3
749; CHECK-NEXT: smull r2, r3, r3, r2
750; CHECK-NEXT: vmov.32 q2[2], r2
751; CHECK-NEXT: vmov.32 q2[3], r3
752; CHECK-NEXT: vmov r0, s8
753; CHECK-NEXT: vmov r2, s9
754; CHECK-NEXT: adds.w r0, r0, r12
755; CHECK-NEXT: adcs r1, r2
756; CHECK-NEXT: vmov r2, s10
757; CHECK-NEXT: adds.w r12, r0, r2
758; CHECK-NEXT: vmov.u8 r2, q1[6]
759; CHECK-NEXT: adcs r1, r3
760; CHECK-NEXT: vmov.u8 r3, q0[6]
761; CHECK-NEXT: sxtb r2, r2
762; CHECK-NEXT: sxtb r3, r3
763; CHECK-NEXT: smull r2, r3, r3, r2
764; CHECK-NEXT: vmov.32 q2[0], r2
765; CHECK-NEXT: vmov.u8 r2, q1[7]
766; CHECK-NEXT: vmov.32 q2[1], r3
767; CHECK-NEXT: vmov.u8 r3, q0[7]
768; CHECK-NEXT: sxtb r2, r2
769; CHECK-NEXT: sxtb r3, r3
770; CHECK-NEXT: smull r2, r3, r3, r2
771; CHECK-NEXT: vmov.32 q2[2], r2
772; CHECK-NEXT: vmov.32 q2[3], r3
773; CHECK-NEXT: vmov r0, s8
774; CHECK-NEXT: vmov r2, s9
775; CHECK-NEXT: adds.w r0, r0, r12
776; CHECK-NEXT: adcs r1, r2
777; CHECK-NEXT: vmov r2, s10
778; CHECK-NEXT: adds.w r12, r0, r2
779; CHECK-NEXT: vmov.u8 r2, q1[8]
780; CHECK-NEXT: adcs r1, r3
781; CHECK-NEXT: vmov.u8 r3, q0[8]
782; CHECK-NEXT: sxtb r2, r2
783; CHECK-NEXT: sxtb r3, r3
784; CHECK-NEXT: smull r2, r3, r3, r2
785; CHECK-NEXT: vmov.32 q2[0], r2
786; CHECK-NEXT: vmov.u8 r2, q1[9]
787; CHECK-NEXT: vmov.32 q2[1], r3
788; CHECK-NEXT: vmov.u8 r3, q0[9]
789; CHECK-NEXT: sxtb r2, r2
790; CHECK-NEXT: sxtb r3, r3
791; CHECK-NEXT: smull r2, r3, r3, r2
792; CHECK-NEXT: vmov.32 q2[2], r2
793; CHECK-NEXT: vmov.32 q2[3], r3
794; CHECK-NEXT: vmov r0, s8
795; CHECK-NEXT: vmov r2, s9
796; CHECK-NEXT: adds.w r0, r0, r12
797; CHECK-NEXT: adcs r1, r2
798; CHECK-NEXT: vmov r2, s10
799; CHECK-NEXT: adds.w r12, r0, r2
800; CHECK-NEXT: vmov.u8 r2, q1[10]
801; CHECK-NEXT: adcs r1, r3
802; CHECK-NEXT: vmov.u8 r3, q0[10]
803; CHECK-NEXT: sxtb r2, r2
804; CHECK-NEXT: sxtb r3, r3
805; CHECK-NEXT: smull r2, r3, r3, r2
806; CHECK-NEXT: vmov.32 q2[0], r2
807; CHECK-NEXT: vmov.u8 r2, q1[11]
808; CHECK-NEXT: vmov.32 q2[1], r3
809; CHECK-NEXT: vmov.u8 r3, q0[11]
810; CHECK-NEXT: sxtb r2, r2
811; CHECK-NEXT: sxtb r3, r3
812; CHECK-NEXT: smull r2, r3, r3, r2
813; CHECK-NEXT: vmov.32 q2[2], r2
814; CHECK-NEXT: vmov.32 q2[3], r3
815; CHECK-NEXT: vmov r0, s8
816; CHECK-NEXT: vmov r2, s9
817; CHECK-NEXT: adds.w r0, r0, r12
818; CHECK-NEXT: adcs r1, r2
819; CHECK-NEXT: vmov r2, s10
820; CHECK-NEXT: adds.w r12, r0, r2
821; CHECK-NEXT: vmov.u8 r2, q1[12]
822; CHECK-NEXT: adcs r1, r3
823; CHECK-NEXT: vmov.u8 r3, q0[12]
824; CHECK-NEXT: sxtb r2, r2
825; CHECK-NEXT: sxtb r3, r3
826; CHECK-NEXT: smull r2, r3, r3, r2
827; CHECK-NEXT: vmov.32 q2[0], r2
828; CHECK-NEXT: vmov.u8 r2, q1[13]
829; CHECK-NEXT: vmov.32 q2[1], r3
830; CHECK-NEXT: vmov.u8 r3, q0[13]
831; CHECK-NEXT: sxtb r2, r2
832; CHECK-NEXT: sxtb r3, r3
833; CHECK-NEXT: smull r2, r3, r3, r2
834; CHECK-NEXT: vmov.32 q2[2], r2
835; CHECK-NEXT: vmov.32 q2[3], r3
836; CHECK-NEXT: vmov r0, s8
837; CHECK-NEXT: vmov r2, s9
838; CHECK-NEXT: adds.w r0, r0, r12
839; CHECK-NEXT: adcs r1, r2
840; CHECK-NEXT: vmov r2, s10
841; CHECK-NEXT: adds r0, r0, r2
842; CHECK-NEXT: vmov.u8 r2, q1[14]
843; CHECK-NEXT: adcs r1, r3
844; CHECK-NEXT: vmov.u8 r3, q0[14]
845; CHECK-NEXT: sxtb r2, r2
846; CHECK-NEXT: sxtb r3, r3
847; CHECK-NEXT: smlal r0, r1, r3, r2
848; CHECK-NEXT: vmov.u8 r2, q1[15]
849; CHECK-NEXT: vmov.u8 r3, q0[15]
850; CHECK-NEXT: sxtb r2, r2
851; CHECK-NEXT: sxtb r3, r3
852; CHECK-NEXT: smlal r0, r1, r3, r2
853; CHECK-NEXT: bx lr
854entry:
855 %xx = sext <16 x i8> %x to <16 x i64>
856 %yy = sext <16 x i8> %y to <16 x i64>
857 %m = mul <16 x i64> %xx, %yy
858 %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
859 ret i64 %z
860}
861
862define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
863; CHECK-LABEL: add_v2i8_v2i64_zext:
864; CHECK: @ %bb.0: @ %entry
David Greenc9eaed52020-03-28 16:22:05 +0000865; CHECK-NEXT: vmov.i64 q2, #0xff
David Green0ac4f6b2020-02-17 11:41:16 +0000866; CHECK-NEXT: vand q1, q1, q2
867; CHECK-NEXT: vand q0, q0, q2
868; CHECK-NEXT: vmov r0, s6
869; CHECK-NEXT: vmov r1, s2
870; CHECK-NEXT: vmov r2, s4
871; CHECK-NEXT: vmov r3, s0
872; CHECK-NEXT: umull r0, r1, r1, r0
873; CHECK-NEXT: umull r2, r3, r3, r2
874; CHECK-NEXT: add r0, r2
875; CHECK-NEXT: orrs r1, r3
876; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +0000877entry:
878 %xx = zext <2 x i8> %x to <2 x i64>
879 %yy = zext <2 x i8> %y to <2 x i64>
880 %m = mul <2 x i64> %xx, %yy
881 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
882 ret i64 %z
883}
884
885define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) {
886; CHECK-LABEL: add_v2i8_v2i64_sext:
887; CHECK: @ %bb.0: @ %entry
888; CHECK-NEXT: vmov r0, s4
889; CHECK-NEXT: vmov r1, s0
890; CHECK-NEXT: vmov r2, s6
891; CHECK-NEXT: vmov r3, s2
892; CHECK-NEXT: sxtb r0, r0
893; CHECK-NEXT: sxtb r1, r1
894; CHECK-NEXT: smull r0, r1, r1, r0
895; CHECK-NEXT: sxtb r2, r2
896; CHECK-NEXT: sxtb r3, r3
897; CHECK-NEXT: smlal r0, r1, r3, r2
898; CHECK-NEXT: bx lr
899entry:
900 %xx = sext <2 x i8> %x to <2 x i64>
901 %yy = sext <2 x i8> %y to <2 x i64>
902 %m = mul <2 x i64> %xx, %yy
903 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
904 ret i64 %z
905}
906
907define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
908; CHECK-LABEL: add_v2i64_v2i64:
909; CHECK: @ %bb.0: @ %entry
910; CHECK-NEXT: .save {r4, lr}
911; CHECK-NEXT: push {r4, lr}
912; CHECK-NEXT: vmov r0, s4
913; CHECK-NEXT: vmov r1, s0
914; CHECK-NEXT: vmov r2, s5
915; CHECK-NEXT: vmov r4, s7
916; CHECK-NEXT: umull r12, r3, r1, r0
917; CHECK-NEXT: mla r1, r1, r2, r3
918; CHECK-NEXT: vmov r2, s1
919; CHECK-NEXT: vmov r3, s2
920; CHECK-NEXT: vmov.32 q2[0], r12
921; CHECK-NEXT: mla r1, r2, r0, r1
922; CHECK-NEXT: vmov r2, s6
923; CHECK-NEXT: vmov.32 q2[1], r1
924; CHECK-NEXT: vmov r12, s8
925; CHECK-NEXT: umull lr, r0, r3, r2
926; CHECK-NEXT: mla r0, r3, r4, r0
927; CHECK-NEXT: vmov r3, s3
928; CHECK-NEXT: mla r2, r3, r2, r0
929; CHECK-NEXT: adds.w r0, r12, lr
930; CHECK-NEXT: adcs r1, r2
931; CHECK-NEXT: pop {r4, pc}
932entry:
933 %m = mul <2 x i64> %x, %y
934 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
935 ret i64 %z
936}
937
938define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %y, i32 %a) {
939; CHECK-LABEL: add_v4i32_v4i32_acc:
940; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000941; CHECK-NEXT: vmlava.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +0000942; CHECK-NEXT: bx lr
943entry:
944 %m = mul <4 x i32> %x, %y
945 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
946 %r = add i32 %z, %a
947 ret i32 %r
948}
949
950define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, <4 x i32> %y, i64 %a) {
951; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
952; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000953; CHECK-NEXT: vmlalva.u32 r0, r1, q0, q1
954; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +0000955entry:
956 %xx = zext <4 x i32> %x to <4 x i64>
957 %yy = zext <4 x i32> %y to <4 x i64>
958 %m = mul <4 x i64> %xx, %yy
959 %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
960 %r = add i64 %z, %a
961 ret i64 %r
962}
963
964define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, <4 x i32> %y, i64 %a) {
965; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
966; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +0000967; CHECK-NEXT: vmlalva.s32 r0, r1, q0, q1
968; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +0000969entry:
970 %xx = sext <4 x i32> %x to <4 x i64>
971 %yy = sext <4 x i32> %y to <4 x i64>
972 %m = mul <4 x i64> %xx, %yy
973 %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
974 %r = add i64 %z, %a
975 ret i64 %r
976}
977
978define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %y, i64 %a) {
979; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
980; CHECK: @ %bb.0: @ %entry
981; CHECK-NEXT: .save {r7, lr}
982; CHECK-NEXT: push {r7, lr}
David Greenfbd53ff2020-04-01 13:58:42 +0100983; CHECK-NEXT: vmullb.u32 q2, q0, q1
984; CHECK-NEXT: vmov r2, s10
985; CHECK-NEXT: vmov r3, s8
986; CHECK-NEXT: vmov r12, s11
987; CHECK-NEXT: vmov lr, s9
988; CHECK-NEXT: adds r2, r2, r3
989; CHECK-NEXT: adc.w r3, lr, r12
David Green0ac4f6b2020-02-17 11:41:16 +0000990; CHECK-NEXT: adds r0, r0, r2
David Greenfbd53ff2020-04-01 13:58:42 +0100991; CHECK-NEXT: adcs r1, r3
David Green0ac4f6b2020-02-17 11:41:16 +0000992; CHECK-NEXT: pop {r7, pc}
993entry:
994 %xx = zext <2 x i32> %x to <2 x i64>
995 %yy = zext <2 x i32> %y to <2 x i64>
996 %m = mul <2 x i64> %xx, %yy
997 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
998 %r = add i64 %z, %a
999 ret i64 %r
1000}
1001
1002define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, <2 x i32> %y, i64 %a) {
1003; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
1004; CHECK: @ %bb.0: @ %entry
1005; CHECK-NEXT: .save {r7, lr}
1006; CHECK-NEXT: push {r7, lr}
David Greenfbd53ff2020-04-01 13:58:42 +01001007; CHECK-NEXT: vmullb.s32 q2, q0, q1
1008; CHECK-NEXT: vmov r2, s10
1009; CHECK-NEXT: vmov r3, s8
1010; CHECK-NEXT: vmov r12, s11
1011; CHECK-NEXT: vmov lr, s9
1012; CHECK-NEXT: adds r2, r2, r3
1013; CHECK-NEXT: adc.w r3, lr, r12
David Green0ac4f6b2020-02-17 11:41:16 +00001014; CHECK-NEXT: adds r0, r0, r2
David Greenfbd53ff2020-04-01 13:58:42 +01001015; CHECK-NEXT: adcs r1, r3
David Green0ac4f6b2020-02-17 11:41:16 +00001016; CHECK-NEXT: pop {r7, pc}
1017entry:
1018 %xx = sext <2 x i32> %x to <2 x i64>
1019 %yy = sext <2 x i32> %y to <2 x i64>
1020 %m = mul <2 x i64> %xx, %yy
1021 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1022 %r = add i64 %z, %a
1023 ret i64 %r
1024}
1025
1026define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, <8 x i16> %y, i32 %a) {
1027; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
1028; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001029; CHECK-NEXT: vmlava.u16 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001030; CHECK-NEXT: bx lr
1031entry:
1032 %xx = zext <8 x i16> %x to <8 x i32>
1033 %yy = zext <8 x i16> %y to <8 x i32>
1034 %m = mul <8 x i32> %xx, %yy
1035 %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
1036 %r = add i32 %z, %a
1037 ret i32 %r
1038}
1039
1040define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, <8 x i16> %y, i32 %a) {
1041; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
1042; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001043; CHECK-NEXT: vmlava.s16 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001044; CHECK-NEXT: bx lr
1045entry:
1046 %xx = sext <8 x i16> %x to <8 x i32>
1047 %yy = sext <8 x i16> %y to <8 x i32>
1048 %m = mul <8 x i32> %xx, %yy
1049 %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
1050 %r = add i32 %z, %a
1051 ret i32 %r
1052}
1053
1054define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %y, i32 %a) {
1055; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
1056; CHECK: @ %bb.0: @ %entry
1057; CHECK-NEXT: vmovlb.u16 q1, q1
1058; CHECK-NEXT: vmovlb.u16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +00001059; CHECK-NEXT: vmlava.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001060; CHECK-NEXT: bx lr
1061entry:
1062 %xx = zext <4 x i16> %x to <4 x i32>
1063 %yy = zext <4 x i16> %y to <4 x i32>
1064 %m = mul <4 x i32> %xx, %yy
1065 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
1066 %r = add i32 %z, %a
1067 ret i32 %r
1068}
1069
1070define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %y, i32 %a) {
1071; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
1072; CHECK: @ %bb.0: @ %entry
1073; CHECK-NEXT: vmovlb.s16 q1, q1
1074; CHECK-NEXT: vmovlb.s16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +00001075; CHECK-NEXT: vmlava.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001076; CHECK-NEXT: bx lr
1077entry:
1078 %xx = sext <4 x i16> %x to <4 x i32>
1079 %yy = sext <4 x i16> %y to <4 x i32>
1080 %m = mul <4 x i32> %xx, %yy
1081 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
1082 %r = add i32 %z, %a
1083 ret i32 %r
1084}
1085
1086define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %y, i16 %a) {
1087; CHECK-LABEL: add_v8i16_v8i16_acc:
1088; CHECK: @ %bb.0: @ %entry
1089; CHECK-NEXT: vmul.i16 q0, q0, q1
1090; CHECK-NEXT: vaddva.u16 r0, q0
1091; CHECK-NEXT: uxth r0, r0
1092; CHECK-NEXT: bx lr
1093entry:
1094 %m = mul <8 x i16> %x, %y
1095 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
1096 %r = add i16 %z, %a
1097 ret i16 %r
1098}
1099
1100define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %y, i64 %a) {
1101; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
1102; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001103; CHECK-NEXT: vmlalva.u16 r0, r1, q0, q1
1104; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +00001105entry:
1106 %xx = zext <8 x i16> %x to <8 x i64>
1107 %yy = zext <8 x i16> %y to <8 x i64>
1108 %m = mul <8 x i64> %xx, %yy
1109 %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
1110 %r = add i64 %z, %a
1111 ret i64 %r
1112}
1113
1114define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %y, i64 %a) {
1115; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
1116; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001117; CHECK-NEXT: vmlalva.s16 r0, r1, q0, q1
1118; CHECK-NEXT: bx lr
David Green0ac4f6b2020-02-17 11:41:16 +00001119entry:
1120 %xx = sext <8 x i16> %x to <8 x i64>
1121 %yy = sext <8 x i16> %y to <8 x i64>
1122 %m = mul <8 x i64> %xx, %yy
1123 %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
1124 %r = add i64 %z, %a
1125 ret i64 %r
1126}
1127
1128define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y, i64 %a) {
1129; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
1130; CHECK: @ %bb.0: @ %entry
1131; CHECK-NEXT: .save {r7, lr}
1132; CHECK-NEXT: push {r7, lr}
David Greenc9eaed52020-03-28 16:22:05 +00001133; CHECK-NEXT: vmov.i64 q2, #0xffff
David Green0ac4f6b2020-02-17 11:41:16 +00001134; CHECK-NEXT: vand q1, q1, q2
1135; CHECK-NEXT: vand q0, q0, q2
1136; CHECK-NEXT: vmov r2, s4
1137; CHECK-NEXT: vmov r3, s0
1138; CHECK-NEXT: vmov r12, s6
1139; CHECK-NEXT: umull r2, lr, r3, r2
1140; CHECK-NEXT: vmov r3, s2
1141; CHECK-NEXT: umlal r2, lr, r3, r12
1142; CHECK-NEXT: adds r0, r0, r2
1143; CHECK-NEXT: adc.w r1, r1, lr
1144; CHECK-NEXT: pop {r7, pc}
David Green0ac4f6b2020-02-17 11:41:16 +00001145entry:
1146 %xx = zext <2 x i16> %x to <2 x i64>
1147 %yy = zext <2 x i16> %y to <2 x i64>
1148 %m = mul <2 x i64> %xx, %yy
1149 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1150 %r = add i64 %z, %a
1151 ret i64 %r
1152}
1153
1154define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, <2 x i16> %y, i64 %a) {
1155; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
1156; CHECK: @ %bb.0: @ %entry
1157; CHECK-NEXT: .save {r7, lr}
1158; CHECK-NEXT: push {r7, lr}
1159; CHECK-NEXT: vmov r2, s4
1160; CHECK-NEXT: vmov r3, s0
1161; CHECK-NEXT: sxth r2, r2
1162; CHECK-NEXT: sxth r3, r3
1163; CHECK-NEXT: smull r2, r12, r3, r2
1164; CHECK-NEXT: vmov r3, s6
1165; CHECK-NEXT: sxth.w lr, r3
1166; CHECK-NEXT: vmov r3, s2
1167; CHECK-NEXT: sxth r3, r3
1168; CHECK-NEXT: smlal r2, r12, r3, lr
1169; CHECK-NEXT: adds r0, r0, r2
1170; CHECK-NEXT: adc.w r1, r1, r12
1171; CHECK-NEXT: pop {r7, pc}
1172entry:
1173 %xx = sext <2 x i16> %x to <2 x i64>
1174 %yy = sext <2 x i16> %y to <2 x i64>
1175 %m = mul <2 x i64> %xx, %yy
1176 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1177 %r = add i64 %z, %a
1178 ret i64 %r
1179}
1180
1181define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %y, i32 %a) {
1182; CHECK-LABEL: add_v16i8_v16i32_acc_zext:
1183; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001184; CHECK-NEXT: vmlava.u8 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001185; CHECK-NEXT: bx lr
1186entry:
1187 %xx = zext <16 x i8> %x to <16 x i32>
1188 %yy = zext <16 x i8> %y to <16 x i32>
1189 %m = mul <16 x i32> %xx, %yy
1190 %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
1191 %r = add i32 %z, %a
1192 ret i32 %r
1193}
1194
1195define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %y, i32 %a) {
1196; CHECK-LABEL: add_v16i8_v16i32_acc_sext:
1197; CHECK: @ %bb.0: @ %entry
David Green33aa5df2020-02-17 12:00:17 +00001198; CHECK-NEXT: vmlava.s8 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001199; CHECK-NEXT: bx lr
1200entry:
1201 %xx = sext <16 x i8> %x to <16 x i32>
1202 %yy = sext <16 x i8> %y to <16 x i32>
1203 %m = mul <16 x i32> %xx, %yy
1204 %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
1205 %r = add i32 %z, %a
1206 ret i32 %r
1207}
1208
1209define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %y, i32 %a) {
1210; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
1211; CHECK: @ %bb.0: @ %entry
1212; CHECK-NEXT: vmov.i32 q2, #0xff
1213; CHECK-NEXT: vand q1, q1, q2
1214; CHECK-NEXT: vand q0, q0, q2
David Green33aa5df2020-02-17 12:00:17 +00001215; CHECK-NEXT: vmlava.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001216; CHECK-NEXT: bx lr
1217entry:
1218 %xx = zext <4 x i8> %x to <4 x i32>
1219 %yy = zext <4 x i8> %y to <4 x i32>
1220 %m = mul <4 x i32> %xx, %yy
1221 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
1222 %r = add i32 %z, %a
1223 ret i32 %r
1224}
1225
1226define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %y, i32 %a) {
1227; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
1228; CHECK: @ %bb.0: @ %entry
1229; CHECK-NEXT: vmovlb.s8 q1, q1
1230; CHECK-NEXT: vmovlb.s8 q0, q0
1231; CHECK-NEXT: vmovlb.s16 q1, q1
1232; CHECK-NEXT: vmovlb.s16 q0, q0
David Green33aa5df2020-02-17 12:00:17 +00001233; CHECK-NEXT: vmlava.u32 r0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001234; CHECK-NEXT: bx lr
1235entry:
1236 %xx = sext <4 x i8> %x to <4 x i32>
1237 %yy = sext <4 x i8> %y to <4 x i32>
1238 %m = mul <4 x i32> %xx, %yy
1239 %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
1240 %r = add i32 %z, %a
1241 ret i32 %r
1242}
1243
1244define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, <16 x i8> %y, i16 %a) {
1245; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
1246; CHECK: @ %bb.0: @ %entry
1247; CHECK-NEXT: vmov.u8 r1, q1[8]
1248; CHECK-NEXT: vmov.16 q2[0], r1
1249; CHECK-NEXT: vmov.u8 r1, q1[9]
1250; CHECK-NEXT: vmov.16 q2[1], r1
1251; CHECK-NEXT: vmov.u8 r1, q1[10]
1252; CHECK-NEXT: vmov.16 q2[2], r1
1253; CHECK-NEXT: vmov.u8 r1, q1[11]
1254; CHECK-NEXT: vmov.16 q2[3], r1
1255; CHECK-NEXT: vmov.u8 r1, q1[12]
1256; CHECK-NEXT: vmov.16 q2[4], r1
1257; CHECK-NEXT: vmov.u8 r1, q1[13]
1258; CHECK-NEXT: vmov.16 q2[5], r1
1259; CHECK-NEXT: vmov.u8 r1, q1[14]
1260; CHECK-NEXT: vmov.16 q2[6], r1
1261; CHECK-NEXT: vmov.u8 r1, q1[15]
1262; CHECK-NEXT: vmov.16 q2[7], r1
1263; CHECK-NEXT: vmov.u8 r1, q0[8]
1264; CHECK-NEXT: vmov.16 q3[0], r1
1265; CHECK-NEXT: vmov.u8 r1, q0[9]
1266; CHECK-NEXT: vmov.16 q3[1], r1
1267; CHECK-NEXT: vmov.u8 r1, q0[10]
1268; CHECK-NEXT: vmov.16 q3[2], r1
1269; CHECK-NEXT: vmov.u8 r1, q0[11]
1270; CHECK-NEXT: vmov.16 q3[3], r1
1271; CHECK-NEXT: vmov.u8 r1, q0[12]
1272; CHECK-NEXT: vmov.16 q3[4], r1
1273; CHECK-NEXT: vmov.u8 r1, q0[13]
1274; CHECK-NEXT: vmov.16 q3[5], r1
1275; CHECK-NEXT: vmov.u8 r1, q0[14]
1276; CHECK-NEXT: vmov.16 q3[6], r1
1277; CHECK-NEXT: vmov.u8 r1, q0[15]
1278; CHECK-NEXT: vmov.16 q3[7], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001279; CHECK-NEXT: vmov.u8 r1, q1[0]
David Greenfbd53ff2020-04-01 13:58:42 +01001280; CHECK-NEXT: vmullb.u8 q2, q3, q2
David Green0ac4f6b2020-02-17 11:41:16 +00001281; CHECK-NEXT: vmov.16 q3[0], r1
1282; CHECK-NEXT: vmov.u8 r1, q1[1]
1283; CHECK-NEXT: vmov.16 q3[1], r1
1284; CHECK-NEXT: vmov.u8 r1, q1[2]
1285; CHECK-NEXT: vmov.16 q3[2], r1
1286; CHECK-NEXT: vmov.u8 r1, q1[3]
1287; CHECK-NEXT: vmov.16 q3[3], r1
1288; CHECK-NEXT: vmov.u8 r1, q1[4]
1289; CHECK-NEXT: vmov.16 q3[4], r1
1290; CHECK-NEXT: vmov.u8 r1, q1[5]
1291; CHECK-NEXT: vmov.16 q3[5], r1
1292; CHECK-NEXT: vmov.u8 r1, q1[6]
1293; CHECK-NEXT: vmov.16 q3[6], r1
1294; CHECK-NEXT: vmov.u8 r1, q1[7]
1295; CHECK-NEXT: vmov.16 q3[7], r1
1296; CHECK-NEXT: vmov.u8 r1, q0[0]
David Greenfbd53ff2020-04-01 13:58:42 +01001297; CHECK-NEXT: vmov.16 q1[0], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001298; CHECK-NEXT: vmov.u8 r1, q0[1]
David Greenfbd53ff2020-04-01 13:58:42 +01001299; CHECK-NEXT: vmov.16 q1[1], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001300; CHECK-NEXT: vmov.u8 r1, q0[2]
David Greenfbd53ff2020-04-01 13:58:42 +01001301; CHECK-NEXT: vmov.16 q1[2], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001302; CHECK-NEXT: vmov.u8 r1, q0[3]
David Greenfbd53ff2020-04-01 13:58:42 +01001303; CHECK-NEXT: vmov.16 q1[3], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001304; CHECK-NEXT: vmov.u8 r1, q0[4]
David Greenfbd53ff2020-04-01 13:58:42 +01001305; CHECK-NEXT: vmov.16 q1[4], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001306; CHECK-NEXT: vmov.u8 r1, q0[5]
David Greenfbd53ff2020-04-01 13:58:42 +01001307; CHECK-NEXT: vmov.16 q1[5], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001308; CHECK-NEXT: vmov.u8 r1, q0[6]
David Greenfbd53ff2020-04-01 13:58:42 +01001309; CHECK-NEXT: vmov.16 q1[6], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001310; CHECK-NEXT: vmov.u8 r1, q0[7]
David Greenfbd53ff2020-04-01 13:58:42 +01001311; CHECK-NEXT: vmov.16 q1[7], r1
1312; CHECK-NEXT: vmullb.u8 q0, q1, q3
David Green0ac4f6b2020-02-17 11:41:16 +00001313; CHECK-NEXT: vadd.i16 q0, q0, q2
1314; CHECK-NEXT: vaddva.u16 r0, q0
1315; CHECK-NEXT: uxth r0, r0
1316; CHECK-NEXT: bx lr
1317entry:
1318 %xx = zext <16 x i8> %x to <16 x i16>
1319 %yy = zext <16 x i8> %y to <16 x i16>
1320 %m = mul <16 x i16> %xx, %yy
1321 %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
1322 %r = add i16 %z, %a
1323 ret i16 %r
1324}
1325
1326define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, <16 x i8> %y, i16 %a) {
1327; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
1328; CHECK: @ %bb.0: @ %entry
1329; CHECK-NEXT: vmov.u8 r1, q1[8]
1330; CHECK-NEXT: vmov.16 q2[0], r1
1331; CHECK-NEXT: vmov.u8 r1, q1[9]
1332; CHECK-NEXT: vmov.16 q2[1], r1
1333; CHECK-NEXT: vmov.u8 r1, q1[10]
1334; CHECK-NEXT: vmov.16 q2[2], r1
1335; CHECK-NEXT: vmov.u8 r1, q1[11]
1336; CHECK-NEXT: vmov.16 q2[3], r1
1337; CHECK-NEXT: vmov.u8 r1, q1[12]
1338; CHECK-NEXT: vmov.16 q2[4], r1
1339; CHECK-NEXT: vmov.u8 r1, q1[13]
1340; CHECK-NEXT: vmov.16 q2[5], r1
1341; CHECK-NEXT: vmov.u8 r1, q1[14]
1342; CHECK-NEXT: vmov.16 q2[6], r1
1343; CHECK-NEXT: vmov.u8 r1, q1[15]
1344; CHECK-NEXT: vmov.16 q2[7], r1
1345; CHECK-NEXT: vmov.u8 r1, q0[8]
1346; CHECK-NEXT: vmov.16 q3[0], r1
1347; CHECK-NEXT: vmov.u8 r1, q0[9]
1348; CHECK-NEXT: vmov.16 q3[1], r1
1349; CHECK-NEXT: vmov.u8 r1, q0[10]
1350; CHECK-NEXT: vmov.16 q3[2], r1
1351; CHECK-NEXT: vmov.u8 r1, q0[11]
1352; CHECK-NEXT: vmov.16 q3[3], r1
1353; CHECK-NEXT: vmov.u8 r1, q0[12]
1354; CHECK-NEXT: vmov.16 q3[4], r1
1355; CHECK-NEXT: vmov.u8 r1, q0[13]
1356; CHECK-NEXT: vmov.16 q3[5], r1
1357; CHECK-NEXT: vmov.u8 r1, q0[14]
1358; CHECK-NEXT: vmov.16 q3[6], r1
1359; CHECK-NEXT: vmov.u8 r1, q0[15]
1360; CHECK-NEXT: vmov.16 q3[7], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001361; CHECK-NEXT: vmov.u8 r1, q1[0]
David Greenfbd53ff2020-04-01 13:58:42 +01001362; CHECK-NEXT: vmullb.s8 q2, q3, q2
David Green0ac4f6b2020-02-17 11:41:16 +00001363; CHECK-NEXT: vmov.16 q3[0], r1
1364; CHECK-NEXT: vmov.u8 r1, q1[1]
1365; CHECK-NEXT: vmov.16 q3[1], r1
1366; CHECK-NEXT: vmov.u8 r1, q1[2]
1367; CHECK-NEXT: vmov.16 q3[2], r1
1368; CHECK-NEXT: vmov.u8 r1, q1[3]
1369; CHECK-NEXT: vmov.16 q3[3], r1
1370; CHECK-NEXT: vmov.u8 r1, q1[4]
1371; CHECK-NEXT: vmov.16 q3[4], r1
1372; CHECK-NEXT: vmov.u8 r1, q1[5]
1373; CHECK-NEXT: vmov.16 q3[5], r1
1374; CHECK-NEXT: vmov.u8 r1, q1[6]
1375; CHECK-NEXT: vmov.16 q3[6], r1
1376; CHECK-NEXT: vmov.u8 r1, q1[7]
1377; CHECK-NEXT: vmov.16 q3[7], r1
1378; CHECK-NEXT: vmov.u8 r1, q0[0]
David Greenfbd53ff2020-04-01 13:58:42 +01001379; CHECK-NEXT: vmov.16 q1[0], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001380; CHECK-NEXT: vmov.u8 r1, q0[1]
David Greenfbd53ff2020-04-01 13:58:42 +01001381; CHECK-NEXT: vmov.16 q1[1], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001382; CHECK-NEXT: vmov.u8 r1, q0[2]
David Greenfbd53ff2020-04-01 13:58:42 +01001383; CHECK-NEXT: vmov.16 q1[2], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001384; CHECK-NEXT: vmov.u8 r1, q0[3]
David Greenfbd53ff2020-04-01 13:58:42 +01001385; CHECK-NEXT: vmov.16 q1[3], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001386; CHECK-NEXT: vmov.u8 r1, q0[4]
David Greenfbd53ff2020-04-01 13:58:42 +01001387; CHECK-NEXT: vmov.16 q1[4], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001388; CHECK-NEXT: vmov.u8 r1, q0[5]
David Greenfbd53ff2020-04-01 13:58:42 +01001389; CHECK-NEXT: vmov.16 q1[5], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001390; CHECK-NEXT: vmov.u8 r1, q0[6]
David Greenfbd53ff2020-04-01 13:58:42 +01001391; CHECK-NEXT: vmov.16 q1[6], r1
David Green0ac4f6b2020-02-17 11:41:16 +00001392; CHECK-NEXT: vmov.u8 r1, q0[7]
David Greenfbd53ff2020-04-01 13:58:42 +01001393; CHECK-NEXT: vmov.16 q1[7], r1
1394; CHECK-NEXT: vmullb.s8 q0, q1, q3
David Green0ac4f6b2020-02-17 11:41:16 +00001395; CHECK-NEXT: vadd.i16 q0, q0, q2
1396; CHECK-NEXT: vaddva.u16 r0, q0
1397; CHECK-NEXT: sxth r0, r0
1398; CHECK-NEXT: bx lr
1399entry:
1400 %xx = sext <16 x i8> %x to <16 x i16>
1401 %yy = sext <16 x i8> %y to <16 x i16>
1402 %m = mul <16 x i16> %xx, %yy
1403 %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
1404 %r = add i16 %z, %a
1405 ret i16 %r
1406}
1407
1408define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %y, i16 %a) {
1409; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
1410; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +01001411; CHECK-NEXT: vmullb.u8 q0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001412; CHECK-NEXT: vaddva.u16 r0, q0
1413; CHECK-NEXT: uxth r0, r0
1414; CHECK-NEXT: bx lr
1415entry:
1416 %xx = zext <8 x i8> %x to <8 x i16>
1417 %yy = zext <8 x i8> %y to <8 x i16>
1418 %m = mul <8 x i16> %xx, %yy
1419 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
1420 %r = add i16 %z, %a
1421 ret i16 %r
1422}
1423
1424define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %y, i16 %a) {
1425; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
1426; CHECK: @ %bb.0: @ %entry
David Greenfbd53ff2020-04-01 13:58:42 +01001427; CHECK-NEXT: vmullb.s8 q0, q0, q1
David Green0ac4f6b2020-02-17 11:41:16 +00001428; CHECK-NEXT: vaddva.u16 r0, q0
1429; CHECK-NEXT: sxth r0, r0
1430; CHECK-NEXT: bx lr
1431entry:
1432 %xx = sext <8 x i8> %x to <8 x i16>
1433 %yy = sext <8 x i8> %y to <8 x i16>
1434 %m = mul <8 x i16> %xx, %yy
1435 %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
1436 %r = add i16 %z, %a
1437 ret i16 %r
1438}
1439
1440define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %y, i8 %a) {
1441; CHECK-LABEL: add_v16i8_v16i8_acc:
1442; CHECK: @ %bb.0: @ %entry
1443; CHECK-NEXT: vmul.i8 q0, q0, q1
1444; CHECK-NEXT: vaddva.u8 r0, q0
1445; CHECK-NEXT: uxtb r0, r0
1446; CHECK-NEXT: bx lr
1447entry:
1448 %m = mul <16 x i8> %x, %y
1449 %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m)
1450 %r = add i8 %z, %a
1451 ret i8 %r
1452}
1453
1454define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y, i64 %a) {
1455; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
1456; CHECK: @ %bb.0: @ %entry
1457; CHECK-NEXT: .save {r4, r5, r7, lr}
1458; CHECK-NEXT: push {r4, r5, r7, lr}
1459; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1460; CHECK-NEXT: vpush {d8, d9, d10, d11}
1461; CHECK-NEXT: vmov.u8 r2, q1[0]
1462; CHECK-NEXT: vmov.u8 r3, q0[0]
1463; CHECK-NEXT: vmov.32 q3[0], r2
1464; CHECK-NEXT: vmov.u8 r2, q1[1]
David Green0ac4f6b2020-02-17 11:41:16 +00001465; CHECK-NEXT: vmov.32 q4[0], r3
1466; CHECK-NEXT: vmov.u8 r3, q0[1]
David Greenc9eaed52020-03-28 16:22:05 +00001467; CHECK-NEXT: vmov.32 q3[2], r2
1468; CHECK-NEXT: vmov.i64 q2, #0xff
David Green0ac4f6b2020-02-17 11:41:16 +00001469; CHECK-NEXT: vmov.32 q4[2], r3
1470; CHECK-NEXT: vand q3, q3, q2
1471; CHECK-NEXT: vand q4, q4, q2
1472; CHECK-NEXT: vmov r2, s14
1473; CHECK-NEXT: vmov r3, s18
David Greenc9eaed52020-03-28 16:22:05 +00001474; CHECK-NEXT: vmov.u8 r4, q0[2]
David Green0ac4f6b2020-02-17 11:41:16 +00001475; CHECK-NEXT: umull r12, lr, r3, r2
1476; CHECK-NEXT: vmov r3, s16
1477; CHECK-NEXT: vmov r2, s12
1478; CHECK-NEXT: vmov.32 q4[0], r4
1479; CHECK-NEXT: vmov.u8 r4, q0[3]
1480; CHECK-NEXT: vmov.32 q4[2], r4
1481; CHECK-NEXT: vand q4, q4, q2
1482; CHECK-NEXT: vmov r4, s16
1483; CHECK-NEXT: umull r2, r3, r3, r2
1484; CHECK-NEXT: orr.w lr, lr, r3
1485; CHECK-NEXT: vmov.u8 r3, q1[2]
1486; CHECK-NEXT: vmov.32 q3[0], r3
1487; CHECK-NEXT: vmov.u8 r3, q1[3]
1488; CHECK-NEXT: vmov.32 q3[2], r3
1489; CHECK-NEXT: add r2, r12
1490; CHECK-NEXT: vand q3, q3, q2
1491; CHECK-NEXT: vmov r3, s12
1492; CHECK-NEXT: umull r3, r4, r4, r3
1493; CHECK-NEXT: vmov.32 q5[0], r3
1494; CHECK-NEXT: vmov r3, s14
1495; CHECK-NEXT: vmov.32 q5[1], r4
1496; CHECK-NEXT: vmov r4, s18
1497; CHECK-NEXT: umull r3, r4, r4, r3
1498; CHECK-NEXT: vmov.32 q5[2], r3
1499; CHECK-NEXT: vmov.32 q5[3], r4
1500; CHECK-NEXT: vmov r3, s20
1501; CHECK-NEXT: vmov r5, s21
1502; CHECK-NEXT: adds r2, r2, r3
1503; CHECK-NEXT: adc.w r3, lr, r5
1504; CHECK-NEXT: vmov r5, s22
1505; CHECK-NEXT: adds.w r12, r2, r5
1506; CHECK-NEXT: vmov.u8 r5, q1[4]
1507; CHECK-NEXT: adcs r3, r4
1508; CHECK-NEXT: vmov.u8 r4, q0[4]
1509; CHECK-NEXT: vmov.32 q3[0], r5
1510; CHECK-NEXT: vmov.u8 r5, q1[5]
1511; CHECK-NEXT: vmov.32 q4[0], r4
1512; CHECK-NEXT: vmov.u8 r4, q0[5]
1513; CHECK-NEXT: vmov.32 q3[2], r5
1514; CHECK-NEXT: vmov.32 q4[2], r4
1515; CHECK-NEXT: vand q3, q3, q2
1516; CHECK-NEXT: vand q4, q4, q2
1517; CHECK-NEXT: vmov r5, s12
1518; CHECK-NEXT: vmov r4, s16
1519; CHECK-NEXT: umull r5, r4, r4, r5
1520; CHECK-NEXT: vmov.32 q5[0], r5
1521; CHECK-NEXT: vmov r5, s14
1522; CHECK-NEXT: vmov.32 q5[1], r4
1523; CHECK-NEXT: vmov r4, s18
1524; CHECK-NEXT: umull r5, r4, r4, r5
1525; CHECK-NEXT: vmov.32 q5[2], r5
1526; CHECK-NEXT: vmov.32 q5[3], r4
1527; CHECK-NEXT: vmov r2, s20
1528; CHECK-NEXT: vmov r5, s21
1529; CHECK-NEXT: adds.w r2, r2, r12
1530; CHECK-NEXT: adcs r3, r5
1531; CHECK-NEXT: vmov r5, s22
1532; CHECK-NEXT: adds.w r12, r2, r5
1533; CHECK-NEXT: vmov.u8 r5, q1[6]
1534; CHECK-NEXT: adcs r3, r4
1535; CHECK-NEXT: vmov.u8 r4, q0[6]
1536; CHECK-NEXT: vmov.32 q3[0], r5
1537; CHECK-NEXT: vmov.u8 r5, q1[7]
1538; CHECK-NEXT: vmov.32 q4[0], r4
1539; CHECK-NEXT: vmov.u8 r4, q0[7]
1540; CHECK-NEXT: vmov.32 q3[2], r5
1541; CHECK-NEXT: vmov.32 q4[2], r4
1542; CHECK-NEXT: vand q3, q3, q2
1543; CHECK-NEXT: vand q4, q4, q2
1544; CHECK-NEXT: vmov r5, s12
1545; CHECK-NEXT: vmov r4, s16
1546; CHECK-NEXT: umull r5, r4, r4, r5
1547; CHECK-NEXT: vmov.32 q5[0], r5
1548; CHECK-NEXT: vmov r5, s14
1549; CHECK-NEXT: vmov.32 q5[1], r4
1550; CHECK-NEXT: vmov r4, s18
1551; CHECK-NEXT: umull r5, r4, r4, r5
1552; CHECK-NEXT: vmov.32 q5[2], r5
1553; CHECK-NEXT: vmov.32 q5[3], r4
1554; CHECK-NEXT: vmov r2, s20
1555; CHECK-NEXT: vmov r5, s21
1556; CHECK-NEXT: adds.w r2, r2, r12
1557; CHECK-NEXT: adcs r3, r5
1558; CHECK-NEXT: vmov r5, s22
1559; CHECK-NEXT: adds.w r12, r2, r5
1560; CHECK-NEXT: vmov.u8 r5, q1[8]
1561; CHECK-NEXT: adcs r3, r4
1562; CHECK-NEXT: vmov.u8 r4, q0[8]
1563; CHECK-NEXT: vmov.32 q3[0], r5
1564; CHECK-NEXT: vmov.u8 r5, q1[9]
1565; CHECK-NEXT: vmov.32 q4[0], r4
1566; CHECK-NEXT: vmov.u8 r4, q0[9]
1567; CHECK-NEXT: vmov.32 q3[2], r5
1568; CHECK-NEXT: vmov.32 q4[2], r4
1569; CHECK-NEXT: vand q3, q3, q2
1570; CHECK-NEXT: vand q4, q4, q2
1571; CHECK-NEXT: vmov r5, s12
1572; CHECK-NEXT: vmov r4, s16
1573; CHECK-NEXT: umull r5, r4, r4, r5
1574; CHECK-NEXT: vmov.32 q5[0], r5
1575; CHECK-NEXT: vmov r5, s14
1576; CHECK-NEXT: vmov.32 q5[1], r4
1577; CHECK-NEXT: vmov r4, s18
1578; CHECK-NEXT: umull r5, r4, r4, r5
1579; CHECK-NEXT: vmov.32 q5[2], r5
1580; CHECK-NEXT: vmov.32 q5[3], r4
1581; CHECK-NEXT: vmov r2, s20
1582; CHECK-NEXT: vmov r5, s21
1583; CHECK-NEXT: adds.w r2, r2, r12
1584; CHECK-NEXT: adcs r3, r5
1585; CHECK-NEXT: vmov r5, s22
1586; CHECK-NEXT: adds.w r12, r2, r5
1587; CHECK-NEXT: vmov.u8 r5, q1[10]
1588; CHECK-NEXT: adcs r3, r4
1589; CHECK-NEXT: vmov.u8 r4, q0[10]
1590; CHECK-NEXT: vmov.32 q3[0], r5
1591; CHECK-NEXT: vmov.u8 r5, q1[11]
1592; CHECK-NEXT: vmov.32 q4[0], r4
1593; CHECK-NEXT: vmov.u8 r4, q0[11]
1594; CHECK-NEXT: vmov.32 q3[2], r5
1595; CHECK-NEXT: vmov.32 q4[2], r4
1596; CHECK-NEXT: vand q3, q3, q2
1597; CHECK-NEXT: vand q4, q4, q2
1598; CHECK-NEXT: vmov r5, s12
1599; CHECK-NEXT: vmov r4, s16
1600; CHECK-NEXT: umull r5, r4, r4, r5
1601; CHECK-NEXT: vmov.32 q5[0], r5
1602; CHECK-NEXT: vmov r5, s14
1603; CHECK-NEXT: vmov.32 q5[1], r4
1604; CHECK-NEXT: vmov r4, s18
1605; CHECK-NEXT: umull r5, r4, r4, r5
1606; CHECK-NEXT: vmov.32 q5[2], r5
1607; CHECK-NEXT: vmov.32 q5[3], r4
1608; CHECK-NEXT: vmov r2, s20
1609; CHECK-NEXT: vmov r5, s21
1610; CHECK-NEXT: adds.w r2, r2, r12
1611; CHECK-NEXT: adcs r3, r5
1612; CHECK-NEXT: vmov r5, s22
1613; CHECK-NEXT: adds.w r12, r2, r5
1614; CHECK-NEXT: vmov.u8 r5, q1[12]
1615; CHECK-NEXT: adcs r3, r4
1616; CHECK-NEXT: vmov.u8 r4, q0[12]
1617; CHECK-NEXT: vmov.32 q3[0], r5
1618; CHECK-NEXT: vmov.u8 r5, q1[13]
1619; CHECK-NEXT: vmov.32 q4[0], r4
1620; CHECK-NEXT: vmov.u8 r4, q0[13]
1621; CHECK-NEXT: vmov.32 q3[2], r5
1622; CHECK-NEXT: vmov.32 q4[2], r4
1623; CHECK-NEXT: vand q3, q3, q2
1624; CHECK-NEXT: vand q4, q4, q2
1625; CHECK-NEXT: vmov r5, s12
1626; CHECK-NEXT: vmov r4, s16
1627; CHECK-NEXT: umull r5, r4, r4, r5
1628; CHECK-NEXT: vmov.32 q5[0], r5
1629; CHECK-NEXT: vmov r5, s14
1630; CHECK-NEXT: vmov.32 q5[1], r4
1631; CHECK-NEXT: vmov r4, s18
1632; CHECK-NEXT: umull r5, r4, r4, r5
1633; CHECK-NEXT: vmov.32 q5[2], r5
1634; CHECK-NEXT: vmov.32 q5[3], r4
1635; CHECK-NEXT: vmov r2, s20
1636; CHECK-NEXT: vmov r5, s21
1637; CHECK-NEXT: adds.w r2, r2, r12
1638; CHECK-NEXT: adcs r3, r5
1639; CHECK-NEXT: vmov r5, s22
1640; CHECK-NEXT: adds r2, r2, r5
1641; CHECK-NEXT: vmov.u8 r5, q1[14]
1642; CHECK-NEXT: vmov.32 q3[0], r5
1643; CHECK-NEXT: vmov.u8 r5, q1[15]
1644; CHECK-NEXT: adcs r3, r4
1645; CHECK-NEXT: vmov.32 q3[2], r5
1646; CHECK-NEXT: vmov.u8 r4, q0[14]
1647; CHECK-NEXT: vand q1, q3, q2
1648; CHECK-NEXT: vmov.32 q3[0], r4
1649; CHECK-NEXT: vmov.u8 r4, q0[15]
1650; CHECK-NEXT: vmov.32 q3[2], r4
1651; CHECK-NEXT: vmov r5, s4
1652; CHECK-NEXT: vand q0, q3, q2
1653; CHECK-NEXT: vmov r4, s0
1654; CHECK-NEXT: umlal r2, r3, r4, r5
1655; CHECK-NEXT: vmov r5, s6
1656; CHECK-NEXT: vmov r4, s2
1657; CHECK-NEXT: umlal r2, r3, r4, r5
1658; CHECK-NEXT: adds r0, r0, r2
1659; CHECK-NEXT: adcs r1, r3
1660; CHECK-NEXT: vpop {d8, d9, d10, d11}
1661; CHECK-NEXT: pop {r4, r5, r7, pc}
David Green0ac4f6b2020-02-17 11:41:16 +00001662entry:
1663 %xx = zext <16 x i8> %x to <16 x i64>
1664 %yy = zext <16 x i8> %y to <16 x i64>
1665 %m = mul <16 x i64> %xx, %yy
1666 %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
1667 %r = add i64 %z, %a
1668 ret i64 %r
1669}
1670
1671define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %y, i64 %a) {
1672; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
1673; CHECK: @ %bb.0: @ %entry
1674; CHECK-NEXT: .save {r4, lr}
1675; CHECK-NEXT: push {r4, lr}
1676; CHECK-NEXT: vmov.u8 r2, q1[0]
1677; CHECK-NEXT: vmov.u8 r3, q0[0]
1678; CHECK-NEXT: sxtb r2, r2
1679; CHECK-NEXT: sxtb r3, r3
1680; CHECK-NEXT: smull r2, r3, r3, r2
1681; CHECK-NEXT: vmov.32 q2[0], r2
1682; CHECK-NEXT: vmov.u8 r2, q1[1]
1683; CHECK-NEXT: vmov.32 q2[1], r3
1684; CHECK-NEXT: vmov.u8 r3, q0[1]
1685; CHECK-NEXT: sxtb r2, r2
1686; CHECK-NEXT: sxtb r3, r3
1687; CHECK-NEXT: smull r2, r3, r3, r2
1688; CHECK-NEXT: vmov.32 q2[2], r2
1689; CHECK-NEXT: vmov.32 q2[3], r3
1690; CHECK-NEXT: vmov lr, s10
1691; CHECK-NEXT: vmov r2, s8
1692; CHECK-NEXT: vmov r12, s9
1693; CHECK-NEXT: adds.w lr, lr, r2
1694; CHECK-NEXT: vmov.u8 r2, q1[2]
1695; CHECK-NEXT: adc.w r12, r12, r3
1696; CHECK-NEXT: vmov.u8 r3, q0[2]
1697; CHECK-NEXT: sxtb r2, r2
1698; CHECK-NEXT: sxtb r3, r3
1699; CHECK-NEXT: smull r2, r3, r3, r2
1700; CHECK-NEXT: vmov.32 q2[0], r2
1701; CHECK-NEXT: vmov.u8 r2, q1[3]
1702; CHECK-NEXT: vmov.32 q2[1], r3
1703; CHECK-NEXT: vmov.u8 r3, q0[3]
1704; CHECK-NEXT: sxtb r2, r2
1705; CHECK-NEXT: sxtb r3, r3
1706; CHECK-NEXT: smull r2, r3, r3, r2
1707; CHECK-NEXT: vmov.32 q2[2], r2
1708; CHECK-NEXT: vmov.32 q2[3], r3
1709; CHECK-NEXT: vmov r4, s8
1710; CHECK-NEXT: vmov r2, s9
1711; CHECK-NEXT: adds.w r4, r4, lr
1712; CHECK-NEXT: adc.w r12, r12, r2
1713; CHECK-NEXT: vmov r2, s10
1714; CHECK-NEXT: adds.w lr, r4, r2
1715; CHECK-NEXT: vmov.u8 r4, q1[4]
1716; CHECK-NEXT: vmov.u8 r2, q0[4]
1717; CHECK-NEXT: sxtb r4, r4
1718; CHECK-NEXT: sxtb r2, r2
1719; CHECK-NEXT: adc.w r12, r12, r3
1720; CHECK-NEXT: smull r2, r4, r2, r4
1721; CHECK-NEXT: vmov.32 q2[0], r2
1722; CHECK-NEXT: vmov.u8 r2, q1[5]
1723; CHECK-NEXT: vmov.32 q2[1], r4
1724; CHECK-NEXT: vmov.u8 r4, q0[5]
1725; CHECK-NEXT: sxtb r2, r2
1726; CHECK-NEXT: sxtb r4, r4
1727; CHECK-NEXT: smull r2, r4, r4, r2
1728; CHECK-NEXT: vmov.32 q2[2], r2
1729; CHECK-NEXT: vmov.32 q2[3], r4
1730; CHECK-NEXT: vmov r3, s8
1731; CHECK-NEXT: vmov r2, s9
1732; CHECK-NEXT: adds.w r3, r3, lr
1733; CHECK-NEXT: adc.w r12, r12, r2
1734; CHECK-NEXT: vmov r2, s10
1735; CHECK-NEXT: adds.w lr, r3, r2
1736; CHECK-NEXT: vmov.u8 r2, q0[6]
1737; CHECK-NEXT: adc.w r12, r12, r4
1738; CHECK-NEXT: vmov.u8 r4, q1[6]
1739; CHECK-NEXT: sxtb r4, r4
1740; CHECK-NEXT: sxtb r2, r2
1741; CHECK-NEXT: smull r2, r4, r2, r4
1742; CHECK-NEXT: vmov.32 q2[0], r2
1743; CHECK-NEXT: vmov.u8 r2, q1[7]
1744; CHECK-NEXT: vmov.32 q2[1], r4
1745; CHECK-NEXT: vmov.u8 r4, q0[7]
1746; CHECK-NEXT: sxtb r2, r2
1747; CHECK-NEXT: sxtb r4, r4
1748; CHECK-NEXT: smull r2, r4, r4, r2
1749; CHECK-NEXT: vmov.32 q2[2], r2
1750; CHECK-NEXT: vmov.32 q2[3], r4
1751; CHECK-NEXT: vmov r3, s8
1752; CHECK-NEXT: vmov r2, s9
1753; CHECK-NEXT: adds.w r3, r3, lr
1754; CHECK-NEXT: adc.w r12, r12, r2
1755; CHECK-NEXT: vmov r2, s10
1756; CHECK-NEXT: adds.w lr, r3, r2
1757; CHECK-NEXT: vmov.u8 r2, q0[8]
1758; CHECK-NEXT: adc.w r12, r12, r4
1759; CHECK-NEXT: vmov.u8 r4, q1[8]
1760; CHECK-NEXT: sxtb r4, r4
1761; CHECK-NEXT: sxtb r2, r2
1762; CHECK-NEXT: smull r2, r4, r2, r4
1763; CHECK-NEXT: vmov.32 q2[0], r2
1764; CHECK-NEXT: vmov.u8 r2, q1[9]
1765; CHECK-NEXT: vmov.32 q2[1], r4
1766; CHECK-NEXT: vmov.u8 r4, q0[9]
1767; CHECK-NEXT: sxtb r2, r2
1768; CHECK-NEXT: sxtb r4, r4
1769; CHECK-NEXT: smull r2, r4, r4, r2
1770; CHECK-NEXT: vmov.32 q2[2], r2
1771; CHECK-NEXT: vmov.32 q2[3], r4
1772; CHECK-NEXT: vmov r3, s8
1773; CHECK-NEXT: vmov r2, s9
1774; CHECK-NEXT: adds.w r3, r3, lr
1775; CHECK-NEXT: adc.w r12, r12, r2
1776; CHECK-NEXT: vmov r2, s10
1777; CHECK-NEXT: adds.w lr, r3, r2
1778; CHECK-NEXT: vmov.u8 r2, q0[10]
1779; CHECK-NEXT: adc.w r12, r12, r4
1780; CHECK-NEXT: vmov.u8 r4, q1[10]
1781; CHECK-NEXT: sxtb r4, r4
1782; CHECK-NEXT: sxtb r2, r2
1783; CHECK-NEXT: smull r2, r4, r2, r4
1784; CHECK-NEXT: vmov.32 q2[0], r2
1785; CHECK-NEXT: vmov.u8 r2, q1[11]
1786; CHECK-NEXT: vmov.32 q2[1], r4
1787; CHECK-NEXT: vmov.u8 r4, q0[11]
1788; CHECK-NEXT: sxtb r2, r2
1789; CHECK-NEXT: sxtb r4, r4
1790; CHECK-NEXT: smull r2, r4, r4, r2
1791; CHECK-NEXT: vmov.32 q2[2], r2
1792; CHECK-NEXT: vmov.32 q2[3], r4
1793; CHECK-NEXT: vmov r3, s8
1794; CHECK-NEXT: vmov r2, s9
1795; CHECK-NEXT: adds.w r3, r3, lr
1796; CHECK-NEXT: adc.w r12, r12, r2
1797; CHECK-NEXT: vmov r2, s10
1798; CHECK-NEXT: adds.w lr, r3, r2
1799; CHECK-NEXT: vmov.u8 r2, q0[12]
1800; CHECK-NEXT: adc.w r12, r12, r4
1801; CHECK-NEXT: vmov.u8 r4, q1[12]
1802; CHECK-NEXT: sxtb r4, r4
1803; CHECK-NEXT: sxtb r2, r2
1804; CHECK-NEXT: smull r2, r4, r2, r4
1805; CHECK-NEXT: vmov.32 q2[0], r2
1806; CHECK-NEXT: vmov.u8 r2, q1[13]
1807; CHECK-NEXT: vmov.32 q2[1], r4
1808; CHECK-NEXT: vmov.u8 r4, q0[13]
1809; CHECK-NEXT: sxtb r2, r2
1810; CHECK-NEXT: sxtb r4, r4
1811; CHECK-NEXT: smull r2, r4, r4, r2
1812; CHECK-NEXT: vmov.32 q2[2], r2
1813; CHECK-NEXT: vmov.32 q2[3], r4
1814; CHECK-NEXT: vmov r3, s8
1815; CHECK-NEXT: vmov r2, s9
1816; CHECK-NEXT: adds.w r3, r3, lr
1817; CHECK-NEXT: adc.w r12, r12, r2
1818; CHECK-NEXT: vmov r2, s10
1819; CHECK-NEXT: adds r2, r2, r3
1820; CHECK-NEXT: adc.w r3, r12, r4
1821; CHECK-NEXT: vmov.u8 r4, q1[14]
1822; CHECK-NEXT: sxtb.w r12, r4
1823; CHECK-NEXT: vmov.u8 r4, q0[14]
1824; CHECK-NEXT: sxtb r4, r4
1825; CHECK-NEXT: smlal r2, r3, r4, r12
1826; CHECK-NEXT: vmov.u8 r4, q1[15]
1827; CHECK-NEXT: sxtb.w r12, r4
1828; CHECK-NEXT: vmov.u8 r4, q0[15]
1829; CHECK-NEXT: sxtb r4, r4
1830; CHECK-NEXT: smlal r2, r3, r4, r12
1831; CHECK-NEXT: adds r0, r0, r2
1832; CHECK-NEXT: adcs r1, r3
1833; CHECK-NEXT: pop {r4, pc}
1834entry:
1835 %xx = sext <16 x i8> %x to <16 x i64>
1836 %yy = sext <16 x i8> %y to <16 x i64>
1837 %m = mul <16 x i64> %xx, %yy
1838 %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
1839 %r = add i64 %z, %a
1840 ret i64 %r
1841}
1842
1843define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %y, i64 %a) {
1844; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
1845; CHECK: @ %bb.0: @ %entry
1846; CHECK-NEXT: .save {r7, lr}
1847; CHECK-NEXT: push {r7, lr}
David Greenc9eaed52020-03-28 16:22:05 +00001848; CHECK-NEXT: vmov.i64 q2, #0xff
David Green0ac4f6b2020-02-17 11:41:16 +00001849; CHECK-NEXT: vand q1, q1, q2
1850; CHECK-NEXT: vand q0, q0, q2
1851; CHECK-NEXT: vmov r2, s6
1852; CHECK-NEXT: vmov r3, s2
1853; CHECK-NEXT: umull r12, lr, r3, r2
1854; CHECK-NEXT: vmov r2, s4
1855; CHECK-NEXT: vmov r3, s0
1856; CHECK-NEXT: umull r2, r3, r3, r2
1857; CHECK-NEXT: add r2, r12
1858; CHECK-NEXT: orr.w r3, r3, lr
1859; CHECK-NEXT: adds r0, r0, r2
1860; CHECK-NEXT: adcs r1, r3
1861; CHECK-NEXT: pop {r7, pc}
David Green0ac4f6b2020-02-17 11:41:16 +00001862entry:
1863 %xx = zext <2 x i8> %x to <2 x i64>
1864 %yy = zext <2 x i8> %y to <2 x i64>
1865 %m = mul <2 x i64> %xx, %yy
1866 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1867 %r = add i64 %z, %a
1868 ret i64 %r
1869}
1870
1871define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, <2 x i8> %y, i64 %a) {
1872; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
1873; CHECK: @ %bb.0: @ %entry
1874; CHECK-NEXT: .save {r7, lr}
1875; CHECK-NEXT: push {r7, lr}
1876; CHECK-NEXT: vmov r2, s4
1877; CHECK-NEXT: vmov r3, s0
1878; CHECK-NEXT: sxtb r2, r2
1879; CHECK-NEXT: sxtb r3, r3
1880; CHECK-NEXT: smull r2, r12, r3, r2
1881; CHECK-NEXT: vmov r3, s6
1882; CHECK-NEXT: sxtb.w lr, r3
1883; CHECK-NEXT: vmov r3, s2
1884; CHECK-NEXT: sxtb r3, r3
1885; CHECK-NEXT: smlal r2, r12, r3, lr
1886; CHECK-NEXT: adds r0, r0, r2
1887; CHECK-NEXT: adc.w r1, r1, r12
1888; CHECK-NEXT: pop {r7, pc}
1889entry:
1890 %xx = sext <2 x i8> %x to <2 x i64>
1891 %yy = sext <2 x i8> %y to <2 x i64>
1892 %m = mul <2 x i64> %xx, %yy
1893 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1894 %r = add i64 %z, %a
1895 ret i64 %r
1896}
1897
1898define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %y, i64 %a) {
1899; CHECK-LABEL: add_v2i64_v2i64_acc:
1900; CHECK: @ %bb.0: @ %entry
1901; CHECK-NEXT: .save {r4, r5, r6, lr}
1902; CHECK-NEXT: push {r4, r5, r6, lr}
1903; CHECK-NEXT: vmov r2, s4
1904; CHECK-NEXT: vmov r3, s0
1905; CHECK-NEXT: vmov r4, s5
1906; CHECK-NEXT: vmov r6, s7
1907; CHECK-NEXT: umull r12, lr, r3, r2
1908; CHECK-NEXT: mla r3, r3, r4, lr
1909; CHECK-NEXT: vmov r4, s1
1910; CHECK-NEXT: vmov.32 q2[0], r12
1911; CHECK-NEXT: mla r2, r4, r2, r3
1912; CHECK-NEXT: vmov r4, s6
1913; CHECK-NEXT: vmov r3, s2
1914; CHECK-NEXT: vmov.32 q2[1], r2
1915; CHECK-NEXT: vmov r12, s8
1916; CHECK-NEXT: umull lr, r5, r3, r4
1917; CHECK-NEXT: mla r3, r3, r6, r5
1918; CHECK-NEXT: vmov r5, s3
1919; CHECK-NEXT: adds.w r6, r12, lr
1920; CHECK-NEXT: mla r3, r5, r4, r3
1921; CHECK-NEXT: adcs r2, r3
1922; CHECK-NEXT: adds r0, r0, r6
1923; CHECK-NEXT: adcs r1, r2
1924; CHECK-NEXT: pop {r4, r5, r6, pc}
1925entry:
1926 %m = mul <2 x i64> %x, %y
1927 %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
1928 %r = add i64 %z, %a
1929 ret i64 %r
1930}
1931
1932declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
1933declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
1934declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
1935declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
1936declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
1937declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
1938declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
1939declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
1940declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
1941declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)