blob: 4fe1c434799d2f7c81f1a798d5b680a21d8bf6b6 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson5bafff32009-06-22 23:27:02 +00002
3define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +00004;CHECK: vabas8:
5;CHECK: vaba.s8
Bob Wilson5bafff32009-06-22 23:27:02 +00006 %tmp1 = load <8 x i8>* %A
7 %tmp2 = load <8 x i8>* %B
8 %tmp3 = load <8 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +00009 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
10 %tmp5 = add <8 x i8> %tmp1, %tmp4
11 ret <8 x i8> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000012}
13
14define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000015;CHECK: vabas16:
16;CHECK: vaba.s16
Bob Wilson5bafff32009-06-22 23:27:02 +000017 %tmp1 = load <4 x i16>* %A
18 %tmp2 = load <4 x i16>* %B
19 %tmp3 = load <4 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000020 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
21 %tmp5 = add <4 x i16> %tmp1, %tmp4
22 ret <4 x i16> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000023}
24
25define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000026;CHECK: vabas32:
27;CHECK: vaba.s32
Bob Wilson5bafff32009-06-22 23:27:02 +000028 %tmp1 = load <2 x i32>* %A
29 %tmp2 = load <2 x i32>* %B
30 %tmp3 = load <2 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000031 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
32 %tmp5 = add <2 x i32> %tmp1, %tmp4
33 ret <2 x i32> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000034}
35
36define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000037;CHECK: vabau8:
38;CHECK: vaba.u8
Bob Wilson5bafff32009-06-22 23:27:02 +000039 %tmp1 = load <8 x i8>* %A
40 %tmp2 = load <8 x i8>* %B
41 %tmp3 = load <8 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000042 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
43 %tmp5 = add <8 x i8> %tmp1, %tmp4
44 ret <8 x i8> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000045}
46
47define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000048;CHECK: vabau16:
49;CHECK: vaba.u16
Bob Wilson5bafff32009-06-22 23:27:02 +000050 %tmp1 = load <4 x i16>* %A
51 %tmp2 = load <4 x i16>* %B
52 %tmp3 = load <4 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000053 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
54 %tmp5 = add <4 x i16> %tmp1, %tmp4
55 ret <4 x i16> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000056}
57
58define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000059;CHECK: vabau32:
60;CHECK: vaba.u32
Bob Wilson5bafff32009-06-22 23:27:02 +000061 %tmp1 = load <2 x i32>* %A
62 %tmp2 = load <2 x i32>* %B
63 %tmp3 = load <2 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000064 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
65 %tmp5 = add <2 x i32> %tmp1, %tmp4
66 ret <2 x i32> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000067}
68
69define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000070;CHECK: vabaQs8:
71;CHECK: vaba.s8
Bob Wilson5bafff32009-06-22 23:27:02 +000072 %tmp1 = load <16 x i8>* %A
73 %tmp2 = load <16 x i8>* %B
74 %tmp3 = load <16 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000075 %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
76 %tmp5 = add <16 x i8> %tmp1, %tmp4
77 ret <16 x i8> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000078}
79
80define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000081;CHECK: vabaQs16:
82;CHECK: vaba.s16
Bob Wilson5bafff32009-06-22 23:27:02 +000083 %tmp1 = load <8 x i16>* %A
84 %tmp2 = load <8 x i16>* %B
85 %tmp3 = load <8 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000086 %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
87 %tmp5 = add <8 x i16> %tmp1, %tmp4
88 ret <8 x i16> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +000089}
90
91define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000092;CHECK: vabaQs32:
93;CHECK: vaba.s32
Bob Wilson5bafff32009-06-22 23:27:02 +000094 %tmp1 = load <4 x i32>* %A
95 %tmp2 = load <4 x i32>* %B
96 %tmp3 = load <4 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +000097 %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
98 %tmp5 = add <4 x i32> %tmp1, %tmp4
99 ret <4 x i32> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +0000100}
101
102define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +0000103;CHECK: vabaQu8:
104;CHECK: vaba.u8
Bob Wilson5bafff32009-06-22 23:27:02 +0000105 %tmp1 = load <16 x i8>* %A
106 %tmp2 = load <16 x i8>* %B
107 %tmp3 = load <16 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000108 %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
109 %tmp5 = add <16 x i8> %tmp1, %tmp4
110 ret <16 x i8> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +0000111}
112
113define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +0000114;CHECK: vabaQu16:
115;CHECK: vaba.u16
Bob Wilson5bafff32009-06-22 23:27:02 +0000116 %tmp1 = load <8 x i16>* %A
117 %tmp2 = load <8 x i16>* %B
118 %tmp3 = load <8 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000119 %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
120 %tmp5 = add <8 x i16> %tmp1, %tmp4
121 ret <8 x i16> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +0000122}
123
124define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +0000125;CHECK: vabaQu32:
126;CHECK: vaba.u32
Bob Wilson5bafff32009-06-22 23:27:02 +0000127 %tmp1 = load <4 x i32>* %A
128 %tmp2 = load <4 x i32>* %B
129 %tmp3 = load <4 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000130 %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
131 %tmp5 = add <4 x i32> %tmp1, %tmp4
132 ret <4 x i32> %tmp5
Bob Wilson5bafff32009-06-22 23:27:02 +0000133}
134
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000135declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
136declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
137declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
Bob Wilson5bafff32009-06-22 23:27:02 +0000138
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000139declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
140declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
141declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
Bob Wilson5bafff32009-06-22 23:27:02 +0000142
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000143declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
144declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
145declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
Bob Wilson5bafff32009-06-22 23:27:02 +0000146
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000147declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
148declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
149declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
Bob Wilson83815ae2009-10-09 20:20:54 +0000150
151define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
152;CHECK: vabals8:
153;CHECK: vabal.s8
154 %tmp1 = load <8 x i16>* %A
155 %tmp2 = load <8 x i8>* %B
156 %tmp3 = load <8 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000157 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
158 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
159 %tmp6 = add <8 x i16> %tmp1, %tmp5
160 ret <8 x i16> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000161}
162
163define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
164;CHECK: vabals16:
165;CHECK: vabal.s16
166 %tmp1 = load <4 x i32>* %A
167 %tmp2 = load <4 x i16>* %B
168 %tmp3 = load <4 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000169 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
170 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
171 %tmp6 = add <4 x i32> %tmp1, %tmp5
172 ret <4 x i32> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000173}
174
175define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
176;CHECK: vabals32:
177;CHECK: vabal.s32
178 %tmp1 = load <2 x i64>* %A
179 %tmp2 = load <2 x i32>* %B
180 %tmp3 = load <2 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000181 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
182 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
183 %tmp6 = add <2 x i64> %tmp1, %tmp5
184 ret <2 x i64> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000185}
186
187define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
188;CHECK: vabalu8:
189;CHECK: vabal.u8
190 %tmp1 = load <8 x i16>* %A
191 %tmp2 = load <8 x i8>* %B
192 %tmp3 = load <8 x i8>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000193 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
194 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
195 %tmp6 = add <8 x i16> %tmp1, %tmp5
196 ret <8 x i16> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000197}
198
199define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
200;CHECK: vabalu16:
201;CHECK: vabal.u16
202 %tmp1 = load <4 x i32>* %A
203 %tmp2 = load <4 x i16>* %B
204 %tmp3 = load <4 x i16>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000205 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
206 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
207 %tmp6 = add <4 x i32> %tmp1, %tmp5
208 ret <4 x i32> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000209}
210
211define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
212;CHECK: vabalu32:
213;CHECK: vabal.u32
214 %tmp1 = load <2 x i64>* %A
215 %tmp2 = load <2 x i32>* %B
216 %tmp3 = load <2 x i32>* %C
Bob Wilsoneb0c3d32010-09-03 01:35:08 +0000217 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
218 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
219 %tmp6 = add <2 x i64> %tmp1, %tmp5
220 ret <2 x i64> %tmp6
Bob Wilson83815ae2009-10-09 20:20:54 +0000221}