blob: 9bb8bf56104536ac7858b1379e193ae7e4ffac68 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson5bafff32009-06-22 23:27:02 +00002
3define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +00004;CHECK: vaddi8:
5;CHECK: vadd.i8
Bob Wilson5bafff32009-06-22 23:27:02 +00006 %tmp1 = load <8 x i8>* %A
7 %tmp2 = load <8 x i8>* %B
8 %tmp3 = add <8 x i8> %tmp1, %tmp2
9 ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000013;CHECK: vaddi16:
14;CHECK: vadd.i16
Bob Wilson5bafff32009-06-22 23:27:02 +000015 %tmp1 = load <4 x i16>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = add <4 x i16> %tmp1, %tmp2
18 ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000022;CHECK: vaddi32:
23;CHECK: vadd.i32
Bob Wilson5bafff32009-06-22 23:27:02 +000024 %tmp1 = load <2 x i32>* %A
25 %tmp2 = load <2 x i32>* %B
26 %tmp3 = add <2 x i32> %tmp1, %tmp2
27 ret <2 x i32> %tmp3
28}
29
30define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000031;CHECK: vaddi64:
32;CHECK: vadd.i64
Bob Wilson5bafff32009-06-22 23:27:02 +000033 %tmp1 = load <1 x i64>* %A
34 %tmp2 = load <1 x i64>* %B
35 %tmp3 = add <1 x i64> %tmp1, %tmp2
36 ret <1 x i64> %tmp3
37}
38
39define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000040;CHECK: vaddf32:
41;CHECK: vadd.f32
Bob Wilson5bafff32009-06-22 23:27:02 +000042 %tmp1 = load <2 x float>* %A
43 %tmp2 = load <2 x float>* %B
Dan Gohmand4d01152010-05-03 22:36:46 +000044 %tmp3 = fadd <2 x float> %tmp1, %tmp2
Bob Wilson5bafff32009-06-22 23:27:02 +000045 ret <2 x float> %tmp3
46}
47
48define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000049;CHECK: vaddQi8:
50;CHECK: vadd.i8
Bob Wilson5bafff32009-06-22 23:27:02 +000051 %tmp1 = load <16 x i8>* %A
52 %tmp2 = load <16 x i8>* %B
53 %tmp3 = add <16 x i8> %tmp1, %tmp2
54 ret <16 x i8> %tmp3
55}
56
57define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000058;CHECK: vaddQi16:
59;CHECK: vadd.i16
Bob Wilson5bafff32009-06-22 23:27:02 +000060 %tmp1 = load <8 x i16>* %A
61 %tmp2 = load <8 x i16>* %B
62 %tmp3 = add <8 x i16> %tmp1, %tmp2
63 ret <8 x i16> %tmp3
64}
65
66define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000067;CHECK: vaddQi32:
68;CHECK: vadd.i32
Bob Wilson5bafff32009-06-22 23:27:02 +000069 %tmp1 = load <4 x i32>* %A
70 %tmp2 = load <4 x i32>* %B
71 %tmp3 = add <4 x i32> %tmp1, %tmp2
72 ret <4 x i32> %tmp3
73}
74
75define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000076;CHECK: vaddQi64:
77;CHECK: vadd.i64
Bob Wilson5bafff32009-06-22 23:27:02 +000078 %tmp1 = load <2 x i64>* %A
79 %tmp2 = load <2 x i64>* %B
80 %tmp3 = add <2 x i64> %tmp1, %tmp2
81 ret <2 x i64> %tmp3
82}
83
84define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
Bob Wilsone9ce5d52009-08-07 23:45:02 +000085;CHECK: vaddQf32:
86;CHECK: vadd.f32
Bob Wilson5bafff32009-06-22 23:27:02 +000087 %tmp1 = load <4 x float>* %A
88 %tmp2 = load <4 x float>* %B
Dan Gohmand4d01152010-05-03 22:36:46 +000089 %tmp3 = fadd <4 x float> %tmp1, %tmp2
Bob Wilson5bafff32009-06-22 23:27:02 +000090 ret <4 x float> %tmp3
91}
Bob Wilson83815ae2009-10-09 20:20:54 +000092
93define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
94;CHECK: vaddhni16:
95;CHECK: vaddhn.i16
96 %tmp1 = load <8 x i16>* %A
97 %tmp2 = load <8 x i16>* %B
98 %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
99 ret <8 x i8> %tmp3
100}
101
102define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
103;CHECK: vaddhni32:
104;CHECK: vaddhn.i32
105 %tmp1 = load <4 x i32>* %A
106 %tmp2 = load <4 x i32>* %B
107 %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
108 ret <4 x i16> %tmp3
109}
110
111define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
112;CHECK: vaddhni64:
113;CHECK: vaddhn.i64
114 %tmp1 = load <2 x i64>* %A
115 %tmp2 = load <2 x i64>* %B
116 %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
117 ret <2 x i32> %tmp3
118}
119
120declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
121declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
122declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
123
124define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
125;CHECK: vraddhni16:
126;CHECK: vraddhn.i16
127 %tmp1 = load <8 x i16>* %A
128 %tmp2 = load <8 x i16>* %B
129 %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
130 ret <8 x i8> %tmp3
131}
132
133define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
134;CHECK: vraddhni32:
135;CHECK: vraddhn.i32
136 %tmp1 = load <4 x i32>* %A
137 %tmp2 = load <4 x i32>* %B
138 %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
139 ret <4 x i16> %tmp3
140}
141
142define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
143;CHECK: vraddhni64:
144;CHECK: vraddhn.i64
145 %tmp1 = load <2 x i64>* %A
146 %tmp2 = load <2 x i64>* %B
147 %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
148 ret <2 x i32> %tmp3
149}
150
151declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
152declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
153declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
154
155define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
156;CHECK: vaddls8:
157;CHECK: vaddl.s8
158 %tmp1 = load <8 x i8>* %A
159 %tmp2 = load <8 x i8>* %B
160 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
161 ret <8 x i16> %tmp3
162}
163
164define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
165;CHECK: vaddls16:
166;CHECK: vaddl.s16
167 %tmp1 = load <4 x i16>* %A
168 %tmp2 = load <4 x i16>* %B
169 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
170 ret <4 x i32> %tmp3
171}
172
173define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
174;CHECK: vaddls32:
175;CHECK: vaddl.s32
176 %tmp1 = load <2 x i32>* %A
177 %tmp2 = load <2 x i32>* %B
178 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
179 ret <2 x i64> %tmp3
180}
181
182define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
183;CHECK: vaddlu8:
184;CHECK: vaddl.u8
185 %tmp1 = load <8 x i8>* %A
186 %tmp2 = load <8 x i8>* %B
187 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
188 ret <8 x i16> %tmp3
189}
190
191define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
192;CHECK: vaddlu16:
193;CHECK: vaddl.u16
194 %tmp1 = load <4 x i16>* %A
195 %tmp2 = load <4 x i16>* %B
196 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
197 ret <4 x i32> %tmp3
198}
199
200define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
201;CHECK: vaddlu32:
202;CHECK: vaddl.u32
203 %tmp1 = load <2 x i32>* %A
204 %tmp2 = load <2 x i32>* %B
205 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
206 ret <2 x i64> %tmp3
207}
208
209declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
210declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
211declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
212
213declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
214declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
215declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
216
217define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
218;CHECK: vaddws8:
219;CHECK: vaddw.s8
220 %tmp1 = load <8 x i16>* %A
221 %tmp2 = load <8 x i8>* %B
222 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
223 ret <8 x i16> %tmp3
224}
225
226define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
227;CHECK: vaddws16:
228;CHECK: vaddw.s16
229 %tmp1 = load <4 x i32>* %A
230 %tmp2 = load <4 x i16>* %B
231 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
232 ret <4 x i32> %tmp3
233}
234
235define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
236;CHECK: vaddws32:
237;CHECK: vaddw.s32
238 %tmp1 = load <2 x i64>* %A
239 %tmp2 = load <2 x i32>* %B
240 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
241 ret <2 x i64> %tmp3
242}
243
244define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
245;CHECK: vaddwu8:
246;CHECK: vaddw.u8
247 %tmp1 = load <8 x i16>* %A
248 %tmp2 = load <8 x i8>* %B
249 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
250 ret <8 x i16> %tmp3
251}
252
253define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
254;CHECK: vaddwu16:
255;CHECK: vaddw.u16
256 %tmp1 = load <4 x i32>* %A
257 %tmp2 = load <4 x i16>* %B
258 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
259 ret <4 x i32> %tmp3
260}
261
262define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
263;CHECK: vaddwu32:
264;CHECK: vaddw.u32
265 %tmp1 = load <2 x i64>* %A
266 %tmp2 = load <2 x i32>* %B
267 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
268 ret <2 x i64> %tmp3
269}
270
271declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
272declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
273declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
274
275declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
276declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
277declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone