blob: e2dca4647bce66ac89497650cc0f130882b790e4 [file] [log] [blame]
Dan Gohmanfce288f2009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilson5bafff32009-06-22 23:27:02 +00002
3define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +00004;CHECK: vabas8:
5;CHECK: vaba.s8
Bob Wilson5bafff32009-06-22 23:27:02 +00006 %tmp1 = load <8 x i8>* %A
7 %tmp2 = load <8 x i8>* %B
8 %tmp3 = load <8 x i8>* %C
9 %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
10 ret <8 x i8> %tmp4
11}
12
13define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000014;CHECK: vabas16:
15;CHECK: vaba.s16
Bob Wilson5bafff32009-06-22 23:27:02 +000016 %tmp1 = load <4 x i16>* %A
17 %tmp2 = load <4 x i16>* %B
18 %tmp3 = load <4 x i16>* %C
19 %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
20 ret <4 x i16> %tmp4
21}
22
23define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000024;CHECK: vabas32:
25;CHECK: vaba.s32
Bob Wilson5bafff32009-06-22 23:27:02 +000026 %tmp1 = load <2 x i32>* %A
27 %tmp2 = load <2 x i32>* %B
28 %tmp3 = load <2 x i32>* %C
29 %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
30 ret <2 x i32> %tmp4
31}
32
33define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000034;CHECK: vabau8:
35;CHECK: vaba.u8
Bob Wilson5bafff32009-06-22 23:27:02 +000036 %tmp1 = load <8 x i8>* %A
37 %tmp2 = load <8 x i8>* %B
38 %tmp3 = load <8 x i8>* %C
39 %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
40 ret <8 x i8> %tmp4
41}
42
43define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000044;CHECK: vabau16:
45;CHECK: vaba.u16
Bob Wilson5bafff32009-06-22 23:27:02 +000046 %tmp1 = load <4 x i16>* %A
47 %tmp2 = load <4 x i16>* %B
48 %tmp3 = load <4 x i16>* %C
49 %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
50 ret <4 x i16> %tmp4
51}
52
53define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000054;CHECK: vabau32:
55;CHECK: vaba.u32
Bob Wilson5bafff32009-06-22 23:27:02 +000056 %tmp1 = load <2 x i32>* %A
57 %tmp2 = load <2 x i32>* %B
58 %tmp3 = load <2 x i32>* %C
59 %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
60 ret <2 x i32> %tmp4
61}
62
63define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000064;CHECK: vabaQs8:
65;CHECK: vaba.s8
Bob Wilson5bafff32009-06-22 23:27:02 +000066 %tmp1 = load <16 x i8>* %A
67 %tmp2 = load <16 x i8>* %B
68 %tmp3 = load <16 x i8>* %C
69 %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
70 ret <16 x i8> %tmp4
71}
72
73define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000074;CHECK: vabaQs16:
75;CHECK: vaba.s16
Bob Wilson5bafff32009-06-22 23:27:02 +000076 %tmp1 = load <8 x i16>* %A
77 %tmp2 = load <8 x i16>* %B
78 %tmp3 = load <8 x i16>* %C
79 %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
80 ret <8 x i16> %tmp4
81}
82
83define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000084;CHECK: vabaQs32:
85;CHECK: vaba.s32
Bob Wilson5bafff32009-06-22 23:27:02 +000086 %tmp1 = load <4 x i32>* %A
87 %tmp2 = load <4 x i32>* %B
88 %tmp3 = load <4 x i32>* %C
89 %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
90 ret <4 x i32> %tmp4
91}
92
93define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +000094;CHECK: vabaQu8:
95;CHECK: vaba.u8
Bob Wilson5bafff32009-06-22 23:27:02 +000096 %tmp1 = load <16 x i8>* %A
97 %tmp2 = load <16 x i8>* %B
98 %tmp3 = load <16 x i8>* %C
99 %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
100 ret <16 x i8> %tmp4
101}
102
103define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +0000104;CHECK: vabaQu16:
105;CHECK: vaba.u16
Bob Wilson5bafff32009-06-22 23:27:02 +0000106 %tmp1 = load <8 x i16>* %A
107 %tmp2 = load <8 x i16>* %B
108 %tmp3 = load <8 x i16>* %C
109 %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
110 ret <8 x i16> %tmp4
111}
112
113define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
Bob Wilsonad5312a2009-08-04 21:33:22 +0000114;CHECK: vabaQu32:
115;CHECK: vaba.u32
Bob Wilson5bafff32009-06-22 23:27:02 +0000116 %tmp1 = load <4 x i32>* %A
117 %tmp2 = load <4 x i32>* %B
118 %tmp3 = load <4 x i32>* %C
119 %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
120 ret <4 x i32> %tmp4
121}
122
123declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
124declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
125declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
126
127declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
128declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
129declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
130
131declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
132declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
133declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
134
135declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
136declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
137declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
Bob Wilson83815ae2009-10-09 20:20:54 +0000138
139define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
140;CHECK: vabals8:
141;CHECK: vabal.s8
142 %tmp1 = load <8 x i16>* %A
143 %tmp2 = load <8 x i8>* %B
144 %tmp3 = load <8 x i8>* %C
145 %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
146 ret <8 x i16> %tmp4
147}
148
149define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
150;CHECK: vabals16:
151;CHECK: vabal.s16
152 %tmp1 = load <4 x i32>* %A
153 %tmp2 = load <4 x i16>* %B
154 %tmp3 = load <4 x i16>* %C
155 %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
156 ret <4 x i32> %tmp4
157}
158
159define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
160;CHECK: vabals32:
161;CHECK: vabal.s32
162 %tmp1 = load <2 x i64>* %A
163 %tmp2 = load <2 x i32>* %B
164 %tmp3 = load <2 x i32>* %C
165 %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
166 ret <2 x i64> %tmp4
167}
168
169define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
170;CHECK: vabalu8:
171;CHECK: vabal.u8
172 %tmp1 = load <8 x i16>* %A
173 %tmp2 = load <8 x i8>* %B
174 %tmp3 = load <8 x i8>* %C
175 %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
176 ret <8 x i16> %tmp4
177}
178
179define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
180;CHECK: vabalu16:
181;CHECK: vabal.u16
182 %tmp1 = load <4 x i32>* %A
183 %tmp2 = load <4 x i16>* %B
184 %tmp3 = load <4 x i16>* %C
185 %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
186 ret <4 x i32> %tmp4
187}
188
189define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
190;CHECK: vabalu32:
191;CHECK: vabal.u32
192 %tmp1 = load <2 x i64>* %A
193 %tmp2 = load <2 x i32>* %B
194 %tmp3 = load <2 x i32>* %C
195 %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
196 ret <2 x i64> %tmp4
197}
198
199declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
200declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
201declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
202
203declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
204declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
205declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone