blob: fefaca74e43ffad59476cb0297af7bfd51e54895 [file] [log] [blame]
Daniel Sanders7fab9122013-09-11 12:39:25 +00001; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s
2
Daniel Sanders8ca81e42013-09-23 12:57:42 +00003define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
4 ; CHECK: and_v16i8:
5
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = and <16 x i8> %1, %2
11 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
14
15 ret void
16 ; CHECK: .size and_v16i8
17}
18
19define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
20 ; CHECK: and_v8i16:
21
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = and <8 x i16> %1, %2
27 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
30
31 ret void
32 ; CHECK: .size and_v8i16
33}
34
35define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
36 ; CHECK: and_v4i32:
37
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = and <4 x i32> %1, %2
43 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
46
47 ret void
48 ; CHECK: .size and_v4i32
49}
50
51define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
52 ; CHECK: and_v2i64:
53
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = and <2 x i64> %1, %2
59 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
62
63 ret void
64 ; CHECK: .size and_v2i64
65}
66
67define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
68 ; CHECK: or_v16i8:
69
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = load <16 x i8>* %b
73 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
74 %3 = or <16 x i8> %1, %2
75 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
76 store <16 x i8> %3, <16 x i8>* %c
77 ; CHECK-DAG: st.b [[R3]], 0($4)
78
79 ret void
80 ; CHECK: .size or_v16i8
81}
82
83define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
84 ; CHECK: or_v8i16:
85
86 %1 = load <8 x i16>* %a
87 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
88 %2 = load <8 x i16>* %b
89 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
90 %3 = or <8 x i16> %1, %2
91 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
92 store <8 x i16> %3, <8 x i16>* %c
93 ; CHECK-DAG: st.h [[R3]], 0($4)
94
95 ret void
96 ; CHECK: .size or_v8i16
97}
98
99define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
100 ; CHECK: or_v4i32:
101
102 %1 = load <4 x i32>* %a
103 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
104 %2 = load <4 x i32>* %b
105 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
106 %3 = or <4 x i32> %1, %2
107 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
108 store <4 x i32> %3, <4 x i32>* %c
109 ; CHECK-DAG: st.w [[R3]], 0($4)
110
111 ret void
112 ; CHECK: .size or_v4i32
113}
114
115define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
116 ; CHECK: or_v2i64:
117
118 %1 = load <2 x i64>* %a
119 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
120 %2 = load <2 x i64>* %b
121 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
122 %3 = or <2 x i64> %1, %2
123 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
124 store <2 x i64> %3, <2 x i64>* %c
125 ; CHECK-DAG: st.d [[R3]], 0($4)
126
127 ret void
128 ; CHECK: .size or_v2i64
129}
130
Daniel Sandersf7456c72013-09-23 13:22:24 +0000131define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
132 ; CHECK: nor_v16i8:
133
134 %1 = load <16 x i8>* %a
135 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
136 %2 = load <16 x i8>* %b
137 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
138 %3 = or <16 x i8> %1, %2
139 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
140 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
141 store <16 x i8> %4, <16 x i8>* %c
142 ; CHECK-DAG: st.b [[R3]], 0($4)
143
144 ret void
145 ; CHECK: .size nor_v16i8
146}
147
148define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
149 ; CHECK: nor_v8i16:
150
151 %1 = load <8 x i16>* %a
152 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
153 %2 = load <8 x i16>* %b
154 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
155 %3 = or <8 x i16> %1, %2
156 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
157 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
158 store <8 x i16> %4, <8 x i16>* %c
159 ; CHECK-DAG: st.h [[R3]], 0($4)
160
161 ret void
162 ; CHECK: .size nor_v8i16
163}
164
165define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
166 ; CHECK: nor_v4i32:
167
168 %1 = load <4 x i32>* %a
169 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
170 %2 = load <4 x i32>* %b
171 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
172 %3 = or <4 x i32> %1, %2
173 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
174 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
175 store <4 x i32> %4, <4 x i32>* %c
176 ; CHECK-DAG: st.w [[R3]], 0($4)
177
178 ret void
179 ; CHECK: .size nor_v4i32
180}
181
182define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
183 ; CHECK: nor_v2i64:
184
185 %1 = load <2 x i64>* %a
186 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
187 %2 = load <2 x i64>* %b
188 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
189 %3 = or <2 x i64> %1, %2
190 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
191 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
192 store <2 x i64> %4, <2 x i64>* %c
193 ; CHECK-DAG: st.d [[R3]], 0($4)
194
195 ret void
196 ; CHECK: .size nor_v2i64
197}
198
Daniel Sanders8ca81e42013-09-23 12:57:42 +0000199define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
200 ; CHECK: xor_v16i8:
201
202 %1 = load <16 x i8>* %a
203 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
204 %2 = load <16 x i8>* %b
205 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
206 %3 = xor <16 x i8> %1, %2
207 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
208 store <16 x i8> %3, <16 x i8>* %c
209 ; CHECK-DAG: st.b [[R3]], 0($4)
210
211 ret void
212 ; CHECK: .size xor_v16i8
213}
214
215define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
216 ; CHECK: xor_v8i16:
217
218 %1 = load <8 x i16>* %a
219 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
220 %2 = load <8 x i16>* %b
221 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
222 %3 = xor <8 x i16> %1, %2
223 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
224 store <8 x i16> %3, <8 x i16>* %c
225 ; CHECK-DAG: st.h [[R3]], 0($4)
226
227 ret void
228 ; CHECK: .size xor_v8i16
229}
230
231define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
232 ; CHECK: xor_v4i32:
233
234 %1 = load <4 x i32>* %a
235 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
236 %2 = load <4 x i32>* %b
237 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
238 %3 = xor <4 x i32> %1, %2
239 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
240 store <4 x i32> %3, <4 x i32>* %c
241 ; CHECK-DAG: st.w [[R3]], 0($4)
242
243 ret void
244 ; CHECK: .size xor_v4i32
245}
246
247define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
248 ; CHECK: xor_v2i64:
249
250 %1 = load <2 x i64>* %a
251 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
252 %2 = load <2 x i64>* %b
253 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
254 %3 = xor <2 x i64> %1, %2
255 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
256 store <2 x i64> %3, <2 x i64>* %c
257 ; CHECK-DAG: st.d [[R3]], 0($4)
258
259 ret void
260 ; CHECK: .size xor_v2i64
261}
262
Daniel Sanders7fab9122013-09-11 12:39:25 +0000263define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
264 ; CHECK: sll_v16i8:
265
266 %1 = load <16 x i8>* %a
267 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
268 %2 = load <16 x i8>* %b
269 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
270 %3 = shl <16 x i8> %1, %2
271 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
272 store <16 x i8> %3, <16 x i8>* %c
273 ; CHECK-DAG: st.b [[R3]], 0($4)
274
275 ret void
276 ; CHECK: .size sll_v16i8
277}
278
279define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
280 ; CHECK: sll_v8i16:
281
282 %1 = load <8 x i16>* %a
283 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
284 %2 = load <8 x i16>* %b
285 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
286 %3 = shl <8 x i16> %1, %2
287 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
288 store <8 x i16> %3, <8 x i16>* %c
289 ; CHECK-DAG: st.h [[R3]], 0($4)
290
291 ret void
292 ; CHECK: .size sll_v8i16
293}
294
295define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
296 ; CHECK: sll_v4i32:
297
298 %1 = load <4 x i32>* %a
299 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
300 %2 = load <4 x i32>* %b
301 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
302 %3 = shl <4 x i32> %1, %2
303 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
304 store <4 x i32> %3, <4 x i32>* %c
305 ; CHECK-DAG: st.w [[R3]], 0($4)
306
307 ret void
308 ; CHECK: .size sll_v4i32
309}
310
311define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
312 ; CHECK: sll_v2i64:
313
314 %1 = load <2 x i64>* %a
315 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
316 %2 = load <2 x i64>* %b
317 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
318 %3 = shl <2 x i64> %1, %2
319 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
320 store <2 x i64> %3, <2 x i64>* %c
321 ; CHECK-DAG: st.d [[R3]], 0($4)
322
323 ret void
324 ; CHECK: .size sll_v2i64
325}
326
Daniel Sanderscba19222013-09-24 10:28:18 +0000327define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
328 ; CHECK: sll_v16i8_i:
329
330 %1 = load <16 x i8>* %a
331 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
332 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
333 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
334 store <16 x i8> %2, <16 x i8>* %c
335 ; CHECK-DAG: st.b [[R4]], 0($4)
336
337 ret void
338 ; CHECK: .size sll_v16i8_i
339}
340
341define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
342 ; CHECK: sll_v8i16_i:
343
344 %1 = load <8 x i16>* %a
345 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
346 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
347 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
348 store <8 x i16> %2, <8 x i16>* %c
349 ; CHECK-DAG: st.h [[R4]], 0($4)
350
351 ret void
352 ; CHECK: .size sll_v8i16_i
353}
354
355define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
356 ; CHECK: sll_v4i32_i:
357
358 %1 = load <4 x i32>* %a
359 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
360 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
361 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
362 store <4 x i32> %2, <4 x i32>* %c
363 ; CHECK-DAG: st.w [[R4]], 0($4)
364
365 ret void
366 ; CHECK: .size sll_v4i32_i
367}
368
369define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
370 ; CHECK: sll_v2i64_i:
371
372 %1 = load <2 x i64>* %a
373 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
374 %2 = shl <2 x i64> %1, <i64 1, i64 1>
375 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
376 store <2 x i64> %2, <2 x i64>* %c
377 ; CHECK-DAG: st.d [[R4]], 0($4)
378
379 ret void
380 ; CHECK: .size sll_v2i64_i
381}
382
Daniel Sanders7fab9122013-09-11 12:39:25 +0000383define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
384 ; CHECK: sra_v16i8:
385
386 %1 = load <16 x i8>* %a
387 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
388 %2 = load <16 x i8>* %b
389 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
390 %3 = ashr <16 x i8> %1, %2
391 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
392 store <16 x i8> %3, <16 x i8>* %c
393 ; CHECK-DAG: st.b [[R3]], 0($4)
394
395 ret void
396 ; CHECK: .size sra_v16i8
397}
398
399define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
400 ; CHECK: sra_v8i16:
401
402 %1 = load <8 x i16>* %a
403 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
404 %2 = load <8 x i16>* %b
405 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
406 %3 = ashr <8 x i16> %1, %2
407 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
408 store <8 x i16> %3, <8 x i16>* %c
409 ; CHECK-DAG: st.h [[R3]], 0($4)
410
411 ret void
412 ; CHECK: .size sra_v8i16
413}
414
415define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
416 ; CHECK: sra_v4i32:
417
418 %1 = load <4 x i32>* %a
419 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
420 %2 = load <4 x i32>* %b
421 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
422 %3 = ashr <4 x i32> %1, %2
423 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
424 store <4 x i32> %3, <4 x i32>* %c
425 ; CHECK-DAG: st.w [[R3]], 0($4)
426
427 ret void
428 ; CHECK: .size sra_v4i32
429}
430
431define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
432 ; CHECK: sra_v2i64:
433
434 %1 = load <2 x i64>* %a
435 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
436 %2 = load <2 x i64>* %b
437 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
438 %3 = ashr <2 x i64> %1, %2
439 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
440 store <2 x i64> %3, <2 x i64>* %c
441 ; CHECK-DAG: st.d [[R3]], 0($4)
442
443 ret void
444 ; CHECK: .size sra_v2i64
445}
446
Daniel Sanderscba19222013-09-24 10:28:18 +0000447define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
448 ; CHECK: sra_v16i8_i:
449
450 %1 = load <16 x i8>* %a
451 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
452 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
453 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
454 store <16 x i8> %2, <16 x i8>* %c
455 ; CHECK-DAG: st.b [[R4]], 0($4)
456
457 ret void
458 ; CHECK: .size sra_v16i8_i
459}
460
461define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
462 ; CHECK: sra_v8i16_i:
463
464 %1 = load <8 x i16>* %a
465 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
466 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
467 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
468 store <8 x i16> %2, <8 x i16>* %c
469 ; CHECK-DAG: st.h [[R4]], 0($4)
470
471 ret void
472 ; CHECK: .size sra_v8i16_i
473}
474
475define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
476 ; CHECK: sra_v4i32_i:
477
478 %1 = load <4 x i32>* %a
479 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
480 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
481 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
482 store <4 x i32> %2, <4 x i32>* %c
483 ; CHECK-DAG: st.w [[R4]], 0($4)
484
485 ret void
486 ; CHECK: .size sra_v4i32_i
487}
488
489define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
490 ; CHECK: sra_v2i64_i:
491
492 %1 = load <2 x i64>* %a
493 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
494 %2 = ashr <2 x i64> %1, <i64 1, i64 1>
495 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
496 store <2 x i64> %2, <2 x i64>* %c
497 ; CHECK-DAG: st.d [[R4]], 0($4)
498
499 ret void
500 ; CHECK: .size sra_v2i64_i
501}
502
Daniel Sanders7fab9122013-09-11 12:39:25 +0000503define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
504 ; CHECK: srl_v16i8:
505
506 %1 = load <16 x i8>* %a
507 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
508 %2 = load <16 x i8>* %b
509 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
510 %3 = lshr <16 x i8> %1, %2
511 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
512 store <16 x i8> %3, <16 x i8>* %c
513 ; CHECK-DAG: st.b [[R3]], 0($4)
514
515 ret void
516 ; CHECK: .size srl_v16i8
517}
518
519define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
520 ; CHECK: srl_v8i16:
521
522 %1 = load <8 x i16>* %a
523 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
524 %2 = load <8 x i16>* %b
525 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
526 %3 = lshr <8 x i16> %1, %2
527 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
528 store <8 x i16> %3, <8 x i16>* %c
529 ; CHECK-DAG: st.h [[R3]], 0($4)
530
531 ret void
532 ; CHECK: .size srl_v8i16
533}
534
535define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
536 ; CHECK: srl_v4i32:
537
538 %1 = load <4 x i32>* %a
539 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
540 %2 = load <4 x i32>* %b
541 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
542 %3 = lshr <4 x i32> %1, %2
543 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
544 store <4 x i32> %3, <4 x i32>* %c
545 ; CHECK-DAG: st.w [[R3]], 0($4)
546
547 ret void
548 ; CHECK: .size srl_v4i32
549}
550
551define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
552 ; CHECK: srl_v2i64:
553
554 %1 = load <2 x i64>* %a
555 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
556 %2 = load <2 x i64>* %b
557 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
558 %3 = lshr <2 x i64> %1, %2
559 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
560 store <2 x i64> %3, <2 x i64>* %c
561 ; CHECK-DAG: st.d [[R3]], 0($4)
562
563 ret void
564 ; CHECK: .size srl_v2i64
565}
566
Daniel Sanderscba19222013-09-24 10:28:18 +0000567define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
568 ; CHECK: srl_v16i8_i:
569
570 %1 = load <16 x i8>* %a
571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
572 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
573 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
574 store <16 x i8> %2, <16 x i8>* %c
575 ; CHECK-DAG: st.b [[R4]], 0($4)
576
577 ret void
578 ; CHECK: .size srl_v16i8_i
579}
580
581define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
582 ; CHECK: srl_v8i16_i:
583
584 %1 = load <8 x i16>* %a
585 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
586 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
587 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
588 store <8 x i16> %2, <8 x i16>* %c
589 ; CHECK-DAG: st.h [[R4]], 0($4)
590
591 ret void
592 ; CHECK: .size srl_v8i16_i
593}
594
595define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
596 ; CHECK: srl_v4i32_i:
597
598 %1 = load <4 x i32>* %a
599 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
600 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
601 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
602 store <4 x i32> %2, <4 x i32>* %c
603 ; CHECK-DAG: st.w [[R4]], 0($4)
604
605 ret void
606 ; CHECK: .size srl_v4i32_i
607}
608
609define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
610 ; CHECK: srl_v2i64_i:
611
612 %1 = load <2 x i64>* %a
613 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
614 %2 = lshr <2 x i64> %1, <i64 1, i64 1>
615 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
616 store <2 x i64> %2, <2 x i64>* %c
617 ; CHECK-DAG: st.d [[R4]], 0($4)
618
619 ret void
620 ; CHECK: .size srl_v2i64_i
621}
622
Daniel Sanders766cb692013-09-23 13:40:21 +0000623define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
624 ; CHECK: ctpop_v16i8:
625
626 %1 = load <16 x i8>* %a
627 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
628 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
629 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
630 store <16 x i8> %2, <16 x i8>* %c
631 ; CHECK-DAG: st.b [[R3]], 0($4)
632
633 ret void
634 ; CHECK: .size ctpop_v16i8
635}
636
637define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
638 ; CHECK: ctpop_v8i16:
639
640 %1 = load <8 x i16>* %a
641 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
642 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
643 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
644 store <8 x i16> %2, <8 x i16>* %c
645 ; CHECK-DAG: st.h [[R3]], 0($4)
646
647 ret void
648 ; CHECK: .size ctpop_v8i16
649}
650
651define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
652 ; CHECK: ctpop_v4i32:
653
654 %1 = load <4 x i32>* %a
655 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
656 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
657 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
658 store <4 x i32> %2, <4 x i32>* %c
659 ; CHECK-DAG: st.w [[R3]], 0($4)
660
661 ret void
662 ; CHECK: .size ctpop_v4i32
663}
664
665define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
666 ; CHECK: ctpop_v2i64:
667
668 %1 = load <2 x i64>* %a
669 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
670 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
671 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
672 store <2 x i64> %2, <2 x i64>* %c
673 ; CHECK-DAG: st.d [[R3]], 0($4)
674
675 ret void
676 ; CHECK: .size ctpop_v2i64
677}
678
Daniel Sanders7fab9122013-09-11 12:39:25 +0000679define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
680 ; CHECK: ctlz_v16i8:
681
682 %1 = load <16 x i8>* %a
683 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
684 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
685 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
686 store <16 x i8> %2, <16 x i8>* %c
687 ; CHECK-DAG: st.b [[R3]], 0($4)
688
689 ret void
690 ; CHECK: .size ctlz_v16i8
691}
692
693define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
694 ; CHECK: ctlz_v8i16:
695
696 %1 = load <8 x i16>* %a
697 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
698 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
699 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
700 store <8 x i16> %2, <8 x i16>* %c
701 ; CHECK-DAG: st.h [[R3]], 0($4)
702
703 ret void
704 ; CHECK: .size ctlz_v8i16
705}
706
707define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
708 ; CHECK: ctlz_v4i32:
709
710 %1 = load <4 x i32>* %a
711 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
712 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
713 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
714 store <4 x i32> %2, <4 x i32>* %c
715 ; CHECK-DAG: st.w [[R3]], 0($4)
716
717 ret void
718 ; CHECK: .size ctlz_v4i32
719}
720
721define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
722 ; CHECK: ctlz_v2i64:
723
724 %1 = load <2 x i64>* %a
725 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
726 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
727 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
728 store <2 x i64> %2, <2 x i64>* %c
729 ; CHECK-DAG: st.d [[R3]], 0($4)
730
731 ret void
732 ; CHECK: .size ctlz_v2i64
733}
734
Daniel Sanders766cb692013-09-23 13:40:21 +0000735declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
736declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
737declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
738declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
Daniel Sanders7fab9122013-09-11 12:39:25 +0000739declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
740declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
741declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
742declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)