blob: 5383425018f8e37c00b9f4dfa6338e24d95f5ba7 [file] [log] [blame]
Bob Wilsonfe27c512009-10-07 23:47:21 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson5bafff32009-06-22 23:27:02 +00002
3define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +00004;CHECK: vmuli8:
5;CHECK: vmul.i8
Bob Wilson5bafff32009-06-22 23:27:02 +00006 %tmp1 = load <8 x i8>* %A
7 %tmp2 = load <8 x i8>* %B
8 %tmp3 = mul <8 x i8> %tmp1, %tmp2
9 ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000013;CHECK: vmuli16:
14;CHECK: vmul.i16
Bob Wilson5bafff32009-06-22 23:27:02 +000015 %tmp1 = load <4 x i16>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = mul <4 x i16> %tmp1, %tmp2
18 ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000022;CHECK: vmuli32:
23;CHECK: vmul.i32
Bob Wilson5bafff32009-06-22 23:27:02 +000024 %tmp1 = load <2 x i32>* %A
25 %tmp2 = load <2 x i32>* %B
26 %tmp3 = mul <2 x i32> %tmp1, %tmp2
27 ret <2 x i32> %tmp3
28}
29
30define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000031;CHECK: vmulf32:
32;CHECK: vmul.f32
Bob Wilson5bafff32009-06-22 23:27:02 +000033 %tmp1 = load <2 x float>* %A
34 %tmp2 = load <2 x float>* %B
Dan Gohmand4d01152010-05-03 22:36:46 +000035 %tmp3 = fmul <2 x float> %tmp1, %tmp2
Bob Wilson5bafff32009-06-22 23:27:02 +000036 ret <2 x float> %tmp3
37}
38
39define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000040;CHECK: vmulp8:
41;CHECK: vmul.p8
Bob Wilson5bafff32009-06-22 23:27:02 +000042 %tmp1 = load <8 x i8>* %A
43 %tmp2 = load <8 x i8>* %B
44 %tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
45 ret <8 x i8> %tmp3
46}
47
48define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000049;CHECK: vmulQi8:
50;CHECK: vmul.i8
Bob Wilson5bafff32009-06-22 23:27:02 +000051 %tmp1 = load <16 x i8>* %A
52 %tmp2 = load <16 x i8>* %B
53 %tmp3 = mul <16 x i8> %tmp1, %tmp2
54 ret <16 x i8> %tmp3
55}
56
57define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000058;CHECK: vmulQi16:
59;CHECK: vmul.i16
Bob Wilson5bafff32009-06-22 23:27:02 +000060 %tmp1 = load <8 x i16>* %A
61 %tmp2 = load <8 x i16>* %B
62 %tmp3 = mul <8 x i16> %tmp1, %tmp2
63 ret <8 x i16> %tmp3
64}
65
66define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000067;CHECK: vmulQi32:
68;CHECK: vmul.i32
Bob Wilson5bafff32009-06-22 23:27:02 +000069 %tmp1 = load <4 x i32>* %A
70 %tmp2 = load <4 x i32>* %B
71 %tmp3 = mul <4 x i32> %tmp1, %tmp2
72 ret <4 x i32> %tmp3
73}
74
75define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000076;CHECK: vmulQf32:
77;CHECK: vmul.f32
Bob Wilson5bafff32009-06-22 23:27:02 +000078 %tmp1 = load <4 x float>* %A
79 %tmp2 = load <4 x float>* %B
Dan Gohmand4d01152010-05-03 22:36:46 +000080 %tmp3 = fmul <4 x float> %tmp1, %tmp2
Bob Wilson5bafff32009-06-22 23:27:02 +000081 ret <4 x float> %tmp3
82}
83
84define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
Bob Wilsonfe27c512009-10-07 23:47:21 +000085;CHECK: vmulQp8:
86;CHECK: vmul.p8
Bob Wilson5bafff32009-06-22 23:27:02 +000087 %tmp1 = load <16 x i8>* %A
88 %tmp2 = load <16 x i8>* %B
89 %tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
90 ret <16 x i8> %tmp3
91}
92
93declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
94declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
Bob Wilson83815ae2009-10-09 20:20:54 +000095
96define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
97entry:
98; CHECK: test_vmul_lanef32:
99; CHECK: vmul.f32 d0, d0, d1[0]
100 %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
101 %1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1]
102 ret <2 x float> %1
103}
104
105define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
106entry:
107; CHECK: test_vmul_lanes16:
108; CHECK: vmul.i16 d0, d0, d1[1]
109 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
110 %1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1]
111 ret <4 x i16> %1
112}
113
114define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
115entry:
116; CHECK: test_vmul_lanes32:
117; CHECK: vmul.i32 d0, d0, d1[1]
118 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
119 %1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1]
120 ret <2 x i32> %1
121}
122
123define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
124entry:
125; CHECK: test_vmulQ_lanef32:
126; CHECK: vmul.f32 q0, q0, d2[1]
127 %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
128 %1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1]
129 ret <4 x float> %1
130}
131
132define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
133entry:
134; CHECK: test_vmulQ_lanes16:
135; CHECK: vmul.i16 q0, q0, d2[1]
136 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
137 %1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1]
138 ret <8 x i16> %1
139}
140
141define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
142entry:
143; CHECK: test_vmulQ_lanes32:
144; CHECK: vmul.i32 q0, q0, d2[1]
145 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
146 %1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1]
147 ret <4 x i32> %1
148}
149
150define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
151;CHECK: vmulls8:
152;CHECK: vmull.s8
153 %tmp1 = load <8 x i8>* %A
154 %tmp2 = load <8 x i8>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000155 %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
156 %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
157 %tmp5 = mul <8 x i16> %tmp3, %tmp4
158 ret <8 x i16> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000159}
160
161define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
162;CHECK: vmulls16:
163;CHECK: vmull.s16
164 %tmp1 = load <4 x i16>* %A
165 %tmp2 = load <4 x i16>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000166 %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
167 %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
168 %tmp5 = mul <4 x i32> %tmp3, %tmp4
169 ret <4 x i32> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000170}
171
172define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
173;CHECK: vmulls32:
174;CHECK: vmull.s32
175 %tmp1 = load <2 x i32>* %A
176 %tmp2 = load <2 x i32>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000177 %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
178 %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
179 %tmp5 = mul <2 x i64> %tmp3, %tmp4
180 ret <2 x i64> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000181}
182
183define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
184;CHECK: vmullu8:
185;CHECK: vmull.u8
186 %tmp1 = load <8 x i8>* %A
187 %tmp2 = load <8 x i8>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000188 %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
189 %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
190 %tmp5 = mul <8 x i16> %tmp3, %tmp4
191 ret <8 x i16> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000192}
193
194define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
195;CHECK: vmullu16:
196;CHECK: vmull.u16
197 %tmp1 = load <4 x i16>* %A
198 %tmp2 = load <4 x i16>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000199 %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
200 %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
201 %tmp5 = mul <4 x i32> %tmp3, %tmp4
202 ret <4 x i32> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000203}
204
205define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
206;CHECK: vmullu32:
207;CHECK: vmull.u32
208 %tmp1 = load <2 x i32>* %A
209 %tmp2 = load <2 x i32>* %B
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000210 %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
211 %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
212 %tmp5 = mul <2 x i64> %tmp3, %tmp4
213 ret <2 x i64> %tmp5
Bob Wilson83815ae2009-10-09 20:20:54 +0000214}
215
216define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
217;CHECK: vmullp8:
218;CHECK: vmull.p8
219 %tmp1 = load <8 x i8>* %A
220 %tmp2 = load <8 x i8>* %B
221 %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
222 ret <8 x i16> %tmp3
223}
224
225define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
226entry:
227; CHECK: test_vmull_lanes16
228; CHECK: vmull.s16 q0, d0, d1[1]
229 %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000230 %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
231 %2 = sext <4 x i16> %0 to <4 x i32>
232 %3 = mul <4 x i32> %1, %2
233 ret <4 x i32> %3
Bob Wilson83815ae2009-10-09 20:20:54 +0000234}
235
236define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
237entry:
238; CHECK: test_vmull_lanes32
239; CHECK: vmull.s32 q0, d0, d1[1]
240 %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000241 %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
242 %2 = sext <2 x i32> %0 to <2 x i64>
243 %3 = mul <2 x i64> %1, %2
244 ret <2 x i64> %3
Bob Wilson83815ae2009-10-09 20:20:54 +0000245}
246
247define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
248entry:
249; CHECK: test_vmull_laneu16
250; CHECK: vmull.u16 q0, d0, d1[1]
251 %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000252 %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
253 %2 = zext <4 x i16> %0 to <4 x i32>
254 %3 = mul <4 x i32> %1, %2
255 ret <4 x i32> %3
Bob Wilson83815ae2009-10-09 20:20:54 +0000256}
257
258define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
259entry:
260; CHECK: test_vmull_laneu32
261; CHECK: vmull.u32 q0, d0, d1[1]
262 %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
Bob Wilsond0b69cf2010-09-01 23:50:19 +0000263 %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
264 %2 = zext <2 x i32> %0 to <2 x i64>
265 %3 = mul <2 x i64> %1, %2
266 ret <2 x i64> %3
Bob Wilson83815ae2009-10-09 20:20:54 +0000267}
268
Bob Wilson83815ae2009-10-09 20:20:54 +0000269declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone