blob: fbbf5489c719403442fa416da8dfab14f354a2af [file] [log] [blame]
Bob Wilsone60fee02009-06-22 23:27:02 +00001; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t
2; RUN: grep {vrshl\\.s8} %t | count 2
3; RUN: grep {vrshl\\.s16} %t | count 2
4; RUN: grep {vrshl\\.s32} %t | count 2
5; RUN: grep {vrshl\\.s64} %t | count 2
6; RUN: grep {vrshl\\.u8} %t | count 2
7; RUN: grep {vrshl\\.u16} %t | count 2
8; RUN: grep {vrshl\\.u32} %t | count 2
9; RUN: grep {vrshl\\.u64} %t | count 2
10; RUN: grep {vrshr\\.s8} %t | count 2
11; RUN: grep {vrshr\\.s16} %t | count 2
12; RUN: grep {vrshr\\.s32} %t | count 2
13; RUN: grep {vrshr\\.s64} %t | count 2
14; RUN: grep {vrshr\\.u8} %t | count 2
15; RUN: grep {vrshr\\.u16} %t | count 2
16; RUN: grep {vrshr\\.u32} %t | count 2
17; RUN: grep {vrshr\\.u64} %t | count 2
18
19define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
20 %tmp1 = load <8 x i8>* %A
21 %tmp2 = load <8 x i8>* %B
22 %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
23 ret <8 x i8> %tmp3
24}
25
26define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
27 %tmp1 = load <4 x i16>* %A
28 %tmp2 = load <4 x i16>* %B
29 %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
30 ret <4 x i16> %tmp3
31}
32
33define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
34 %tmp1 = load <2 x i32>* %A
35 %tmp2 = load <2 x i32>* %B
36 %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
37 ret <2 x i32> %tmp3
38}
39
40define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
41 %tmp1 = load <1 x i64>* %A
42 %tmp2 = load <1 x i64>* %B
43 %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
44 ret <1 x i64> %tmp3
45}
46
47define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
48 %tmp1 = load <8 x i8>* %A
49 %tmp2 = load <8 x i8>* %B
50 %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
51 ret <8 x i8> %tmp3
52}
53
54define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
55 %tmp1 = load <4 x i16>* %A
56 %tmp2 = load <4 x i16>* %B
57 %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
58 ret <4 x i16> %tmp3
59}
60
61define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
62 %tmp1 = load <2 x i32>* %A
63 %tmp2 = load <2 x i32>* %B
64 %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
65 ret <2 x i32> %tmp3
66}
67
68define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
69 %tmp1 = load <1 x i64>* %A
70 %tmp2 = load <1 x i64>* %B
71 %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
72 ret <1 x i64> %tmp3
73}
74
75define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
76 %tmp1 = load <16 x i8>* %A
77 %tmp2 = load <16 x i8>* %B
78 %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
79 ret <16 x i8> %tmp3
80}
81
82define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
83 %tmp1 = load <8 x i16>* %A
84 %tmp2 = load <8 x i16>* %B
85 %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
86 ret <8 x i16> %tmp3
87}
88
89define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
90 %tmp1 = load <4 x i32>* %A
91 %tmp2 = load <4 x i32>* %B
92 %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
93 ret <4 x i32> %tmp3
94}
95
96define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
97 %tmp1 = load <2 x i64>* %A
98 %tmp2 = load <2 x i64>* %B
99 %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
100 ret <2 x i64> %tmp3
101}
102
103define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
104 %tmp1 = load <16 x i8>* %A
105 %tmp2 = load <16 x i8>* %B
106 %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
107 ret <16 x i8> %tmp3
108}
109
110define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
111 %tmp1 = load <8 x i16>* %A
112 %tmp2 = load <8 x i16>* %B
113 %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
114 ret <8 x i16> %tmp3
115}
116
117define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
118 %tmp1 = load <4 x i32>* %A
119 %tmp2 = load <4 x i32>* %B
120 %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
121 ret <4 x i32> %tmp3
122}
123
124define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
125 %tmp1 = load <2 x i64>* %A
126 %tmp2 = load <2 x i64>* %B
127 %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
128 ret <2 x i64> %tmp3
129}
130
131define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
132 %tmp1 = load <8 x i8>* %A
133 %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
134 ret <8 x i8> %tmp2
135}
136
137define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
138 %tmp1 = load <4 x i16>* %A
139 %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
140 ret <4 x i16> %tmp2
141}
142
143define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
144 %tmp1 = load <2 x i32>* %A
145 %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
146 ret <2 x i32> %tmp2
147}
148
149define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
150 %tmp1 = load <1 x i64>* %A
151 %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
152 ret <1 x i64> %tmp2
153}
154
155define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
156 %tmp1 = load <8 x i8>* %A
157 %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
158 ret <8 x i8> %tmp2
159}
160
161define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
162 %tmp1 = load <4 x i16>* %A
163 %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
164 ret <4 x i16> %tmp2
165}
166
167define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
168 %tmp1 = load <2 x i32>* %A
169 %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
170 ret <2 x i32> %tmp2
171}
172
173define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
174 %tmp1 = load <1 x i64>* %A
175 %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
176 ret <1 x i64> %tmp2
177}
178
179define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
180 %tmp1 = load <16 x i8>* %A
181 %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
182 ret <16 x i8> %tmp2
183}
184
185define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
186 %tmp1 = load <8 x i16>* %A
187 %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
188 ret <8 x i16> %tmp2
189}
190
191define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
192 %tmp1 = load <4 x i32>* %A
193 %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
194 ret <4 x i32> %tmp2
195}
196
197define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
198 %tmp1 = load <2 x i64>* %A
199 %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
200 ret <2 x i64> %tmp2
201}
202
203define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
204 %tmp1 = load <16 x i8>* %A
205 %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
206 ret <16 x i8> %tmp2
207}
208
209define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
210 %tmp1 = load <8 x i16>* %A
211 %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
212 ret <8 x i16> %tmp2
213}
214
215define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
216 %tmp1 = load <4 x i32>* %A
217 %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
218 ret <4 x i32> %tmp2
219}
220
221define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
222 %tmp1 = load <2 x i64>* %A
223 %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
224 ret <2 x i64> %tmp2
225}
226
227declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
228declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
229declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
230declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
231
232declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
233declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
234declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
235declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
236
237declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
238declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
239declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
240declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
241
242declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
243declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
244declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
245declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone