blob: 1b4a988be55106365ddd57b95f1799975b316d7c [file] [log] [blame]
Owen Anderson31325202010-10-22 23:21:04 +00001; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
2
3; CHECK: vsub_8xi8
4define <8 x i8> @vsub_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7; CHECK: vsub.i8 d16, d17, d16 @ encoding: [0xa0,0x08,0x41,0xf3]
8 %tmp3 = sub <8 x i8> %tmp1, %tmp2
9 ret <8 x i8> %tmp3
10}
11
12; CHECK: vsub_4xi16
13define <4 x i16> @vsub_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16; CHECK: vsub.i16 d16, d17, d16 @ encoding: [0xa0,0x08,0x51,0xf3]
17 %tmp3 = sub <4 x i16> %tmp1, %tmp2
18 ret <4 x i16> %tmp3
19}
20
21; CHECK: vsub_2xi32
22define <2 x i32> @vsub_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
23 %tmp1 = load <2 x i32>* %A
24; CHECK: vsub.i32 d16, d17, d16 @ encoding: [0xa0,0x08,0x61,0xf3]
25 %tmp2 = load <2 x i32>* %B
26 %tmp3 = sub <2 x i32> %tmp1, %tmp2
27 ret <2 x i32> %tmp3
28}
29
30; CHECK: vsub_1xi64
31define <1 x i64> @vsub_1xi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
32 %tmp1 = load <1 x i64>* %A
33 %tmp2 = load <1 x i64>* %B
34; CHECK: vsub.i64 d16, d17, d16 @ encoding: [0xa0,0x08,0x71,0xf3]
35 %tmp3 = sub <1 x i64> %tmp1, %tmp2
36 ret <1 x i64> %tmp3
37}
38
39; CHECK: vsub_2xifloat
40define <2 x float> @vsub_2xifloat(<2 x float>* %A, <2 x float>* %B) nounwind {
41 %tmp1 = load <2 x float>* %A
42 %tmp2 = load <2 x float>* %B
43; CHECK: vsub.f32 d16, d16, d17 @ encoding: [0xa1,0x0d,0x60,0xf2]
44 %tmp3 = fsub <2 x float> %tmp1, %tmp2
45 ret <2 x float> %tmp3
46}
47
48; CHECK: vsub_16xi8
49define <16 x i8> @vsub_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
50 %tmp1 = load <16 x i8>* %A
51 %tmp2 = load <16 x i8>* %B
52; CHECK: vsub.i8 q8, q8, q9 @ encoding: [0xe2,0x08,0x40,0xf3]
53 %tmp3 = sub <16 x i8> %tmp1, %tmp2
54 ret <16 x i8> %tmp3
55}
56
57; CHECK: vsub_8xi16
58define <8 x i16> @vsub_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
59 %tmp1 = load <8 x i16>* %A
60 %tmp2 = load <8 x i16>* %B
61; CHECK: vsub.i16 q8, q8, q9 @ encoding: [0xe2,0x08,0x50,0xf3]
62 %tmp3 = sub <8 x i16> %tmp1, %tmp2
63 ret <8 x i16> %tmp3
64}
65
66; CHECK: vsub_4xi32
67define <4 x i32> @vsub_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
68 %tmp1 = load <4 x i32>* %A
69 %tmp2 = load <4 x i32>* %B
70; CHECK: vsub.i32 q8, q8, q9 @ encoding: [0xe2,0x08,0x60,0xf3]
71 %tmp3 = sub <4 x i32> %tmp1, %tmp2
72 ret <4 x i32> %tmp3
73}
74
75; CHECK: vsub_2xi64
76define <2 x i64> @vsub_2xi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
77 %tmp1 = load <2 x i64>* %A
78 %tmp2 = load <2 x i64>* %B
79; CHECK: vsub.i64 q8, q8, q9 @ encoding: [0xe2,0x08,0x70,0xf3]
80 %tmp3 = sub <2 x i64> %tmp1, %tmp2
81 ret <2 x i64> %tmp3
82}
83
84; CHECK: vsub_4xfloat
85define <4 x float> @vsub_4xfloat(<4 x float>* %A, <4 x float>* %B) nounwind {
86 %tmp1 = load <4 x float>* %A
87 %tmp2 = load <4 x float>* %B
88; CHECK: vsub.f32 q8, q8, q9 @ encoding: [0xe2,0x0d,0x60,0xf2]
89 %tmp3 = fsub <4 x float> %tmp1, %tmp2
90 ret <4 x float> %tmp3
91}
Owen Anderson2b6b97c2010-10-22 23:36:36 +000092
93; CHECK: vsubls_8xi8
94define <8 x i16> @vsubls_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
95 %tmp1 = load <8 x i8>* %A
96 %tmp2 = load <8 x i8>* %B
97 %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
98 %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
99; CHECK: vsubl.s8 q8, d17, d16 @ encoding: [0xa0,0x02,0xc1,0xf2]
100 %tmp5 = sub <8 x i16> %tmp3, %tmp4
101 ret <8 x i16> %tmp5
102}
103
104; CHECK: vsubls_4xi16
105define <4 x i32> @vsubls_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
106 %tmp1 = load <4 x i16>* %A
107 %tmp2 = load <4 x i16>* %B
108 %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
109 %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
110; CHECK: vsubl.s16 q8, d17, d16 @ encoding: [0xa0,0x02,0xd1,0xf2]
111 %tmp5 = sub <4 x i32> %tmp3, %tmp4
112 ret <4 x i32> %tmp5
113}
114
115; CHECK: vsubls_2xi32
116define <2 x i64> @vsubls_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
117 %tmp1 = load <2 x i32>* %A
118 %tmp2 = load <2 x i32>* %B
119 %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
120 %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
121; CHECK: vsubl.s32 q8, d17, d16 @ encoding: [0xa0,0x02,0xe1,0xf2]
122 %tmp5 = sub <2 x i64> %tmp3, %tmp4
123 ret <2 x i64> %tmp5
124}
125
126; CHECK: vsublu_8xi8
127define <8 x i16> @vsublu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
128 %tmp1 = load <8 x i8>* %A
129 %tmp2 = load <8 x i8>* %B
130 %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
131 %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
132; CHECK: vsubl.u8 q8, d17, d16 @ encoding: [0xa0,0x02,0xc1,0xf3]
133 %tmp5 = sub <8 x i16> %tmp3, %tmp4
134 ret <8 x i16> %tmp5
135}
136
137; CHECK: vsublu_4xi16
138define <4 x i32> @vsublu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
139 %tmp1 = load <4 x i16>* %A
140 %tmp2 = load <4 x i16>* %B
141 %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
142 %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
143; CHECK: vsubl.u16 q8, d17, d16 @ encoding: [0xa0,0x02,0xd1,0xf3]
144 %tmp5 = sub <4 x i32> %tmp3, %tmp4
145 ret <4 x i32> %tmp5
146}
147
148; CHECK: vsublu_2xi32
149define <2 x i64> @vsublu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
150 %tmp1 = load <2 x i32>* %A
151 %tmp2 = load <2 x i32>* %B
152 %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
153 %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
154; CHECK: vsubl.u32 q8, d17, d16 @ encoding: [0xa0,0x02,0xe1,0xf3]
155 %tmp5 = sub <2 x i64> %tmp3, %tmp4
156 ret <2 x i64> %tmp5
157}
Owen Anderson884f2282010-10-22 23:46:07 +0000158
159; CHECK: vsubws_8xi8
160define <8 x i16> @vsubws_8xi8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
161 %tmp1 = load <8 x i16>* %A
162 %tmp2 = load <8 x i8>* %B
163 %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
164; CHECK: vsubw.s8 q8, q8, d18 @ encoding: [0xa2,0x03,0xc0,0xf2]
165 %tmp4 = sub <8 x i16> %tmp1, %tmp3
166 ret <8 x i16> %tmp4
167}
168
169; CHECK: vsubws_4xi16
170define <4 x i32> @vsubws_4xi16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
171 %tmp1 = load <4 x i32>* %A
172 %tmp2 = load <4 x i16>* %B
173 %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
174; CHECK: vsubw.s16 q8, q8, d18 @ encoding: [0xa2,0x03,0xd0,0xf2]
175 %tmp4 = sub <4 x i32> %tmp1, %tmp3
176 ret <4 x i32> %tmp4
177}
178
179; CHECK: vsubws_2xi32
180define <2 x i64> @vsubws_2xi32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
181 %tmp1 = load <2 x i64>* %A
182 %tmp2 = load <2 x i32>* %B
183 %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
184; CHECK: vsubw.s32 q8, q8, d18 @ encoding: [0xa2,0x03,0xe0,0xf2]
185 %tmp4 = sub <2 x i64> %tmp1, %tmp3
186 ret <2 x i64> %tmp4
187}
188
189; CHECK: vsubwu_8xi8
190define <8 x i16> @vsubwu_8xi8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
191 %tmp1 = load <8 x i16>* %A
192 %tmp2 = load <8 x i8>* %B
193 %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
194; CHECK: vsubw.u8 q8, q8, d18 @ encoding: [0xa2,0x03,0xc0,0xf3]
195 %tmp4 = sub <8 x i16> %tmp1, %tmp3
196 ret <8 x i16> %tmp4
197}
198
199; CHECK: vsubwu_4xi16
200define <4 x i32> @vsubwu_4xi16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
201 %tmp1 = load <4 x i32>* %A
202 %tmp2 = load <4 x i16>* %B
203 %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
204; CHECK: vsubw.u16 q8, q8, d18 @ encoding: [0xa2,0x03,0xd0,0xf3]
205 %tmp4 = sub <4 x i32> %tmp1, %tmp3
206 ret <4 x i32> %tmp4
207}
208
209; CHECK: vsubwu_2xi32
210define <2 x i64> @vsubwu_2xi32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
211 %tmp1 = load <2 x i64>* %A
212 %tmp2 = load <2 x i32>* %B
213 %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
214; CHECK: vsubw.u32 q8, q8, d18 @ encoding: [0xa2,0x03,0xe0,0xf3]
215 %tmp4 = sub <2 x i64> %tmp1, %tmp3
216 ret <2 x i64> %tmp4
217}