blob: 60b04bd5830ef467ae6f23c8806d646fed1a7410 [file] [log] [blame]
Bob Wilson5bafff32009-06-22 23:27:02 +00001; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
2; RUN: grep {vqshl\\.s8} %t | count 4
3; RUN: grep {vqshl\\.s16} %t | count 4
4; RUN: grep {vqshl\\.s32} %t | count 4
5; RUN: grep {vqshl\\.s64} %t | count 4
6; RUN: grep {vqshl\\.u8} %t | count 4
7; RUN: grep {vqshl\\.u16} %t | count 4
8; RUN: grep {vqshl\\.u32} %t | count 4
9; RUN: grep {vqshl\\.u64} %t | count 4
10; RUN: grep {vqshl\\.s8.*#7} %t | count 2
11; RUN: grep {vqshl\\.s16.*#15} %t | count 2
12; RUN: grep {vqshl\\.s32.*#31} %t | count 2
13; RUN: grep {vqshl\\.s64.*#63} %t | count 2
14; RUN: grep {vqshl\\.u8.*#7} %t | count 2
15; RUN: grep {vqshl\\.u16.*#15} %t | count 2
16; RUN: grep {vqshl\\.u32.*#31} %t | count 2
17; RUN: grep {vqshl\\.u64.*#63} %t | count 2
18; RUN: grep {vqshlu\\.s8} %t | count 2
19; RUN: grep {vqshlu\\.s16} %t | count 2
20; RUN: grep {vqshlu\\.s32} %t | count 2
21; RUN: grep {vqshlu\\.s64} %t | count 2
22
23define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
24 %tmp1 = load <8 x i8>* %A
25 %tmp2 = load <8 x i8>* %B
26 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
27 ret <8 x i8> %tmp3
28}
29
30define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
31 %tmp1 = load <4 x i16>* %A
32 %tmp2 = load <4 x i16>* %B
33 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
34 ret <4 x i16> %tmp3
35}
36
37define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
38 %tmp1 = load <2 x i32>* %A
39 %tmp2 = load <2 x i32>* %B
40 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
41 ret <2 x i32> %tmp3
42}
43
44define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
45 %tmp1 = load <1 x i64>* %A
46 %tmp2 = load <1 x i64>* %B
47 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
48 ret <1 x i64> %tmp3
49}
50
51define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
52 %tmp1 = load <8 x i8>* %A
53 %tmp2 = load <8 x i8>* %B
54 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
55 ret <8 x i8> %tmp3
56}
57
58define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
59 %tmp1 = load <4 x i16>* %A
60 %tmp2 = load <4 x i16>* %B
61 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
62 ret <4 x i16> %tmp3
63}
64
65define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
66 %tmp1 = load <2 x i32>* %A
67 %tmp2 = load <2 x i32>* %B
68 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
69 ret <2 x i32> %tmp3
70}
71
72define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
73 %tmp1 = load <1 x i64>* %A
74 %tmp2 = load <1 x i64>* %B
75 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
76 ret <1 x i64> %tmp3
77}
78
79define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
80 %tmp1 = load <16 x i8>* %A
81 %tmp2 = load <16 x i8>* %B
82 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
83 ret <16 x i8> %tmp3
84}
85
86define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
87 %tmp1 = load <8 x i16>* %A
88 %tmp2 = load <8 x i16>* %B
89 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
90 ret <8 x i16> %tmp3
91}
92
93define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
94 %tmp1 = load <4 x i32>* %A
95 %tmp2 = load <4 x i32>* %B
96 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
97 ret <4 x i32> %tmp3
98}
99
100define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
101 %tmp1 = load <2 x i64>* %A
102 %tmp2 = load <2 x i64>* %B
103 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
104 ret <2 x i64> %tmp3
105}
106
107define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
108 %tmp1 = load <16 x i8>* %A
109 %tmp2 = load <16 x i8>* %B
110 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
111 ret <16 x i8> %tmp3
112}
113
114define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
115 %tmp1 = load <8 x i16>* %A
116 %tmp2 = load <8 x i16>* %B
117 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
118 ret <8 x i16> %tmp3
119}
120
121define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
122 %tmp1 = load <4 x i32>* %A
123 %tmp2 = load <4 x i32>* %B
124 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
125 ret <4 x i32> %tmp3
126}
127
128define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
129 %tmp1 = load <2 x i64>* %A
130 %tmp2 = load <2 x i64>* %B
131 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
132 ret <2 x i64> %tmp3
133}
134
135define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
136 %tmp1 = load <8 x i8>* %A
137 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
138 ret <8 x i8> %tmp2
139}
140
141define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
142 %tmp1 = load <4 x i16>* %A
143 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
144 ret <4 x i16> %tmp2
145}
146
147define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
148 %tmp1 = load <2 x i32>* %A
149 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
150 ret <2 x i32> %tmp2
151}
152
153define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
154 %tmp1 = load <1 x i64>* %A
155 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
156 ret <1 x i64> %tmp2
157}
158
159define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
160 %tmp1 = load <8 x i8>* %A
161 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
162 ret <8 x i8> %tmp2
163}
164
165define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
166 %tmp1 = load <4 x i16>* %A
167 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
168 ret <4 x i16> %tmp2
169}
170
171define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
172 %tmp1 = load <2 x i32>* %A
173 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
174 ret <2 x i32> %tmp2
175}
176
177define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
178 %tmp1 = load <1 x i64>* %A
179 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
180 ret <1 x i64> %tmp2
181}
182
183define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
184 %tmp1 = load <8 x i8>* %A
185 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
186 ret <8 x i8> %tmp2
187}
188
189define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
190 %tmp1 = load <4 x i16>* %A
191 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
192 ret <4 x i16> %tmp2
193}
194
195define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
196 %tmp1 = load <2 x i32>* %A
197 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
198 ret <2 x i32> %tmp2
199}
200
201define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
202 %tmp1 = load <1 x i64>* %A
203 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
204 ret <1 x i64> %tmp2
205}
206
207define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
208 %tmp1 = load <16 x i8>* %A
209 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
210 ret <16 x i8> %tmp2
211}
212
213define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
214 %tmp1 = load <8 x i16>* %A
215 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
216 ret <8 x i16> %tmp2
217}
218
219define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
220 %tmp1 = load <4 x i32>* %A
221 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
222 ret <4 x i32> %tmp2
223}
224
225define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
226 %tmp1 = load <2 x i64>* %A
227 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
228 ret <2 x i64> %tmp2
229}
230
231define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
232 %tmp1 = load <16 x i8>* %A
233 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
234 ret <16 x i8> %tmp2
235}
236
237define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
238 %tmp1 = load <8 x i16>* %A
239 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
240 ret <8 x i16> %tmp2
241}
242
243define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
244 %tmp1 = load <4 x i32>* %A
245 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
246 ret <4 x i32> %tmp2
247}
248
249define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
250 %tmp1 = load <2 x i64>* %A
251 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
252 ret <2 x i64> %tmp2
253}
254
255define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
256 %tmp1 = load <16 x i8>* %A
257 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
258 ret <16 x i8> %tmp2
259}
260
261define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
262 %tmp1 = load <8 x i16>* %A
263 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
264 ret <8 x i16> %tmp2
265}
266
267define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
268 %tmp1 = load <4 x i32>* %A
269 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
270 ret <4 x i32> %tmp2
271}
272
273define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
274 %tmp1 = load <2 x i64>* %A
275 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
276 ret <2 x i64> %tmp2
277}
278
279declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
280declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
281declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
282declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
283
284declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
285declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
286declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
287declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
288
289declare <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
290declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
291declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
292declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
293
294declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
295declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
296declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
297declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
298
299declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
300declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
301declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
302declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
303
304declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
305declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
306declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
307declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone