blob: ec180716e32569cf85bd80b9013de7a0858dffe7 [file] [log] [blame]
Sjoerd Meijer590e4e82018-08-01 14:43:59 +00001; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s
2
3%struct.float16x4x2_t = type { [2 x <4 x half>] }
4%struct.float16x8x2_t = type { [2 x <8 x half>] }
5
6define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
7; CHECKLABEL: test_vabs_f16:
8; CHECK: vabs.f16 d0, d0
9; CHECK-NEXT: bx lr
10entry:
11 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
12 ret <4 x half> %vabs1.i
13}
14
15define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
16; CHECKLABEL: test_vabsq_f16:
17; CHECK: vabs.f16 q0, q0
18; CHECK-NEXT: bx lr
19entry:
20 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
21 ret <8 x half> %vabs1.i
22}
23
24define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
25; CHECKLABEL: test_vceqz_f16:
26; CHECK: vceq.f16 d0, d0, #0
27; CHECK-NEXT: bx lr
28entry:
29 %0 = fcmp oeq <4 x half> %a, zeroinitializer
30 %vceqz.i = sext <4 x i1> %0 to <4 x i16>
31 ret <4 x i16> %vceqz.i
32}
33
34define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) {
35; CHECKLABEL: test_vceqzq_f16:
36; CHECK: vceq.f16 q0, q0, #0
37; CHECK-NEXT: bx lr
38entry:
39 %0 = fcmp oeq <8 x half> %a, zeroinitializer
40 %vceqz.i = sext <8 x i1> %0 to <8 x i16>
41 ret <8 x i16> %vceqz.i
42}
43
44define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) {
45; CHECKLABEL: test_vcgez_f16:
46; CHECK: vcge.f16 d0, d0, #0
47; CHECK-NEXT: bx lr
48entry:
49 %0 = fcmp oge <4 x half> %a, zeroinitializer
50 %vcgez.i = sext <4 x i1> %0 to <4 x i16>
51 ret <4 x i16> %vcgez.i
52}
53
54define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) {
55; CHECKLABEL: test_vcgezq_f16:
56; CHECK: vcge.f16 q0, q0, #0
57; CHECK-NEXT: bx lr
58entry:
59 %0 = fcmp oge <8 x half> %a, zeroinitializer
60 %vcgez.i = sext <8 x i1> %0 to <8 x i16>
61 ret <8 x i16> %vcgez.i
62}
63
64define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) {
65; CHECKLABEL: test_vcgtz_f16:
66; CHECK: vcgt.f16 d0, d0, #0
67; CHECK-NEXT: bx lr
68entry:
69 %0 = fcmp ogt <4 x half> %a, zeroinitializer
70 %vcgtz.i = sext <4 x i1> %0 to <4 x i16>
71 ret <4 x i16> %vcgtz.i
72}
73
74define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) {
75; CHECKLABEL: test_vcgtzq_f16:
76; CHECK: vcgt.f16 q0, q0, #0
77; CHECK-NEXT: bx lr
78entry:
79 %0 = fcmp ogt <8 x half> %a, zeroinitializer
80 %vcgtz.i = sext <8 x i1> %0 to <8 x i16>
81 ret <8 x i16> %vcgtz.i
82}
83
84define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) {
85; CHECKLABEL: test_vclez_f16:
86; CHECK: vcle.f16 d0, d0, #0
87; CHECK-NEXT: bx lr
88entry:
89 %0 = fcmp ole <4 x half> %a, zeroinitializer
90 %vclez.i = sext <4 x i1> %0 to <4 x i16>
91 ret <4 x i16> %vclez.i
92}
93
94define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) {
95; CHECKLABEL: test_vclezq_f16:
96; CHECK: vcle.f16 q0, q0, #0
97; CHECK-NEXT: bx lr
98entry:
99 %0 = fcmp ole <8 x half> %a, zeroinitializer
100 %vclez.i = sext <8 x i1> %0 to <8 x i16>
101 ret <8 x i16> %vclez.i
102}
103
104define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) {
105; CHECKLABEL: test_vcltz_f16:
106; CHECK: vclt.f16 d0, d0, #0
107; CHECK-NEXT: bx lr
108entry:
109 %0 = fcmp olt <4 x half> %a, zeroinitializer
110 %vcltz.i = sext <4 x i1> %0 to <4 x i16>
111 ret <4 x i16> %vcltz.i
112}
113
114define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) {
115; CHECKLABEL: test_vcltzq_f16:
116; CHECK: vclt.f16 q0, q0, #0
117; CHECK-NEXT: bx lr
118entry:
119 %0 = fcmp olt <8 x half> %a, zeroinitializer
120 %vcltz.i = sext <8 x i1> %0 to <8 x i16>
121 ret <8 x i16> %vcltz.i
122}
123
124; FIXME (PR38404)
125;
126;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
127;entry:
128; %vcvt.i = sitofp <4 x i16> %a to <4 x half>
129; ret <4 x half> %vcvt.i
130;}
131;
132;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
133;entry:
134; %vcvt.i = sitofp <8 x i16> %a to <8 x half>
135; ret <8 x half> %vcvt.i
136;}
137
138;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
139;entry:
140; %vcvt.i = uitofp <4 x i16> %a to <4 x half>
141; ret <4 x half> %vcvt.i
142;}
143
144;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
145;entry:
146; %vcvt.i = uitofp <8 x i16> %a to <8 x half>
147; ret <8 x half> %vcvt.i
148;}
149
150;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
151;entry:
152; %vcvt.i = fptosi <4 x half> %a to <4 x i16>
153; ret <4 x i16> %vcvt.i
154;}
155
156;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
157;entry:
158; %vcvt.i = fptosi <8 x half> %a to <8 x i16>
159; ret <8 x i16> %vcvt.i
160;}
161
162;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
163;entry:
164; %vcvt.i = fptoui <4 x half> %a to <4 x i16>
165; ret <4 x i16> %vcvt.i
166;}
167
168;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
169;entry:
170; %vcvt.i = fptoui <8 x half> %a to <8 x i16>
171; ret <8 x i16> %vcvt.i
172;}
173
174;define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
175;entry:
176; %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
177; ret <4 x i16> %vcvta_s16_v1.i
178;}
179
180;define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) {
181;entry:
182; %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
183; ret <4 x i16> %vcvta_u16_v1.i
184;}
185
186;define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
187;entry:
188; %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
189; ret <8 x i16> %vcvtaq_s16_v1.i
190;}
191
192;define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) {
193;entry:
194; %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
195; ret <4 x i16> %vcvtm_s16_v1.i
196;}
197
198;define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
199;entry:
200; %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
201; ret <8 x i16> %vcvtmq_s16_v1.i
202;}
203
204;define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) {
205;entry:
206; %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
207; ret <4 x i16> %vcvtm_u16_v1.i
208;}
209
210;define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
211;entry:
212; %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
213; ret <8 x i16> %vcvtmq_u16_v1.i
214;}
215
216;define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) {
217;entry:
218; %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
219; ret <4 x i16> %vcvtn_s16_v1.i
220;}
221
222;define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
223;entry:
224; %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
225; ret <8 x i16> %vcvtnq_s16_v1.i
226;}
227
228;define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) {
229;entry:
230; %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
231; ret <4 x i16> %vcvtn_u16_v1.i
232;}
233
234;define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
235;entry:
236; %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
237; ret <8 x i16> %vcvtnq_u16_v1.i
238;}
239
240;define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) {
241;entry:
242; %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
243; ret <4 x i16> %vcvtp_s16_v1.i
244;}
245
246;define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
247;entry:
248; %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
249; ret <8 x i16> %vcvtpq_s16_v1.i
250;}
251
252;define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) {
253;entry:
254; %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
255; ret <4 x i16> %vcvtp_u16_v1.i
256;}
257
258;define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
259;entry:
260; %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
261; ret <8 x i16> %vcvtpq_u16_v1.i
262;}
263
264define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) {
265; CHECKLABEL: test_vneg_f16:
266; CHECK: vneg.f16 d0, d0
267; CHECK-NEXT: bx lr
268entry:
269 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
270 ret <4 x half> %sub.i
271}
272
273define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) {
274; CHECKLABEL: test_vnegq_f16:
275; CHECK: vneg.f16 q0, q0
276; CHECK-NEXT: bx lr
277entry:
278 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
279 ret <8 x half> %sub.i
280}
281
282define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) {
283; CHECKLABEL: test_vrecpe_f16:
284; CHECK: vrecpe.f16 d0, d0
285; CHECK-NEXT: bx lr
286entry:
287 %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
288 ret <4 x half> %vrecpe_v1.i
289}
290
291define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) {
292; CHECKLABEL: test_vrecpeq_f16:
293; CHECK: vrecpe.f16 q0, q0
294; CHECK-NEXT: bx lr
295entry:
296 %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
297 ret <8 x half> %vrecpeq_v1.i
298}
299
300define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) {
301; CHECKLABEL: test_vrnd_f16:
302; CHECK: vrintz.f16 d0, d0
303; CHECK-NEXT: bx lr
304entry:
305 %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
306 ret <4 x half> %vrnd_v1.i
307}
308
309define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) {
310; CHECKLABEL: test_vrndq_f16:
311; CHECK: vrintz.f16 q0, q0
312; CHECK-NEXT: bx lr
313entry:
314 %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
315 ret <8 x half> %vrndq_v1.i
316}
317
318define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) {
319; CHECKLABEL: test_vrnda_f16:
320; CHECK: vrinta.f16 d0, d0
321; CHECK-NEXT: bx lr
322entry:
323 %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
324 ret <4 x half> %vrnda_v1.i
325}
326
327define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) {
328; CHECKLABEL: test_vrndaq_f16:
329; CHECK: vrinta.f16 q0, q0
330; CHECK-NEXT: bx lr
331entry:
332 %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
333 ret <8 x half> %vrndaq_v1.i
334}
335
336define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) {
337; CHECKLABEL: test_vrndm_f16:
338; CHECK: vrintm.f16 d0, d0
339; CHECK-NEXT: bx lr
340entry:
341 %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
342 ret <4 x half> %vrndm_v1.i
343}
344
345define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) {
346; CHECKLABEL: test_vrndmq_f16:
347; CHECK: vrintm.f16 q0, q0
348; CHECK-NEXT: bx lr
349entry:
350 %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
351 ret <8 x half> %vrndmq_v1.i
352}
353
354define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) {
355; CHECKLABEL: test_vrndn_f16:
356; CHECK: vrintn.f16 d0, d0
357; CHECK-NEXT: bx lr
358entry:
359 %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
360 ret <4 x half> %vrndn_v1.i
361}
362
363define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) {
364; CHECKLABEL: test_vrndnq_f16:
365; CHECK: vrintn.f16 q0, q0
366; CHECK-NEXT: bx lr
367entry:
368 %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
369 ret <8 x half> %vrndnq_v1.i
370}
371
372define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) {
373; CHECKLABEL: test_vrndp_f16:
374; CHECK: vrintp.f16 d0, d0
375; CHECK-NEXT: bx lr
376entry:
377 %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
378 ret <4 x half> %vrndp_v1.i
379}
380
381define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) {
382; CHECKLABEL: test_vrndpq_f16:
383; CHECK: vrintp.f16 q0, q0
384; CHECK-NEXT: bx lr
385entry:
386 %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
387 ret <8 x half> %vrndpq_v1.i
388}
389
390define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) {
391; CHECKLABEL: test_vrndx_f16:
392; CHECK: vrintx.f16 d0, d0
393; CHECK-NEXT: bx lr
394entry:
395 %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
396 ret <4 x half> %vrndx_v1.i
397}
398
399define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) {
400; CHECKLABEL: test_vrndxq_f16:
401; CHECK: vrintx.f16 q0, q0
402; CHECK-NEXT: bx lr
403entry:
404 %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
405 ret <8 x half> %vrndxq_v1.i
406}
407
408define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) {
409; CHECKLABEL: test_vrsqrte_f16:
410; CHECK: vrsqrte.f16 d0, d0
411; CHECK-NEXT: bx lr
412entry:
413 %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
414 ret <4 x half> %vrsqrte_v1.i
415}
416
417define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) {
418; CHECKLABEL: test_vrsqrteq_f16:
419; CHECK: vrsqrte.f16 q0, q0
420; CHECK-NEXT: bx lr
421entry:
422 %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
423 ret <8 x half> %vrsqrteq_v1.i
424}
425
426define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) {
427; CHECKLABEL: test_vadd_f16:
428; CHECK: vadd.f16 d0, d0, d1
429; CHECK-NEXT: bx lr
430entry:
431 %add.i = fadd <4 x half> %a, %b
432 ret <4 x half> %add.i
433}
434
435define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
436; CHECKLABEL: test_vaddq_f16:
437; CHECK: vadd.f16 q0, q0, q1
438; CHECK-NEXT: bx lr
439entry:
440 %add.i = fadd <8 x half> %a, %b
441 ret <8 x half> %add.i
442}
443
444define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) {
445; CHECKLABEL: test_vabd_f16:
446; CHECK: vabd.f16 d0, d0, d1
447; CHECK-NEXT: bx lr
448entry:
449 %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
450 ret <4 x half> %vabd_v2.i
451}
452
453define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) {
454; CHECKLABEL: test_vabdq_f16:
455; CHECK: vabd.f16 q0, q0, q1
456; CHECK-NEXT: bx lr
457entry:
458 %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
459 ret <8 x half> %vabdq_v2.i
460}
461
462define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) {
463; CHECKLABEL: test_vcage_f16:
464; CHECK: vacge.f16 d0, d0, d1
465; CHECK-NEXT: bx lr
466entry:
467 %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
468 ret <4 x i16> %vcage_v2.i
469}
470
471define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) {
472; CHECKLABEL: test_vcageq_f16:
473; CHECK: vacge.f16 q0, q0, q1
474; CHECK-NEXT: bx lr
475entry:
476 %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
477 ret <8 x i16> %vcageq_v2.i
478}
479
480; FIXME (PR38404)
481;
482;define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) {
483;entry:
484; %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
485; ret <4 x i16> %vcagt_v2.i
486;}
487;
488;define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) {
489;entry:
490; %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
491; ret <8 x i16> %vcagtq_v2.i
492;}
493
494define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) {
495; CHECKLABEL: test_vcale_f16:
496; CHECK: vacge.f16 d0, d1, d0
497; CHECK-NEXT: bx lr
498entry:
499 %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
500 ret <4 x i16> %vcale_v2.i
501}
502
503define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) {
504; CHECKLABEL: test_vcaleq_f16:
505; CHECK: vacge.f16 q0, q1, q0
506; CHECK-NEXT: bx lr
507entry:
508 %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
509 ret <8 x i16> %vcaleq_v2.i
510}
511
512; FIXME (PR38404)
513;
514;define dso_local <4 x i16> @test_vcalt_f16(<4 x half> %a, <4 x half> %b) {
515;entry:
516; %vcalt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
517; ret <4 x i16> %vcalt_v2.i
518;}
519
520;define dso_local <8 x i16> @test_vcaltq_f16(<8 x half> %a, <8 x half> %b) {
521;entry:
522; %vcaltq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
523; ret <8 x i16> %vcaltq_v2.i
524;}
525
526define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) {
527; CHECKLABEL: test_vceq_f16:
528; CHECK: vceq.f16 d0, d0, d1
529; CHECK-NEXT: bx lr
530entry:
531 %cmp.i = fcmp oeq <4 x half> %a, %b
532 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
533 ret <4 x i16> %sext.i
534}
535
536define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) {
537; CHECKLABEL: test_vceqq_f16:
538; CHECK: vceq.f16 q0, q0, q1
539; CHECK-NEXT: bx lr
540entry:
541 %cmp.i = fcmp oeq <8 x half> %a, %b
542 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
543 ret <8 x i16> %sext.i
544}
545
546define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) {
547; CHECKLABEL: test_vcge_f16:
548; CHECK: vcge.f16 d0, d0, d1
549; CHECK-NEXT: bx lr
550entry:
551 %cmp.i = fcmp oge <4 x half> %a, %b
552 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
553 ret <4 x i16> %sext.i
554}
555
556define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) {
557; CHECKLABEL: test_vcgeq_f16:
558; CHECK: vcge.f16 q0, q0, q1
559; CHECK-NEXT: bx lr
560entry:
561 %cmp.i = fcmp oge <8 x half> %a, %b
562 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
563 ret <8 x i16> %sext.i
564}
565
566define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) {
567; CHECKLABEL: test_vcgt_f16:
568; CHECK: vcgt.f16 d0, d0, d1
569; CHECK-NEXT: bx lr
570entry:
571 %cmp.i = fcmp ogt <4 x half> %a, %b
572 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
573 ret <4 x i16> %sext.i
574}
575
576define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) {
577; CHECKLABEL: test_vcgtq_f16:
578; CHECK: vcgt.f16 q0, q0, q1
579; CHECK-NEXT: bx lr
580entry:
581 %cmp.i = fcmp ogt <8 x half> %a, %b
582 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
583 ret <8 x i16> %sext.i
584}
585
586define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) {
587; CHECKLABEL: test_vcle_f16:
588; CHECK: vcge.f16 d0, d1, d0
589; CHECK-NEXT: bx lr
590entry:
591 %cmp.i = fcmp ole <4 x half> %a, %b
592 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
593 ret <4 x i16> %sext.i
594}
595
596define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) {
597; CHECKLABEL: test_vcleq_f16:
598; CHECK: vcge.f16 q0, q1, q0
599; CHECK-NEXT: bx lr
600entry:
601 %cmp.i = fcmp ole <8 x half> %a, %b
602 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
603 ret <8 x i16> %sext.i
604}
605
606define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) {
607; CHECKLABEL: test_vclt_f16:
608; CHECK: vcgt.f16 d0, d1, d0
609; CHECK-NEXT: bx lr
610entry:
611 %cmp.i = fcmp olt <4 x half> %a, %b
612 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
613 ret <4 x i16> %sext.i
614}
615
616define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) {
617; CHECKLABEL: test_vcltq_f16:
618; CHECK: vcgt.f16 q0, q1, q0
619; CHECK-NEXT: bx lr
620entry:
621 %cmp.i = fcmp olt <8 x half> %a, %b
622 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
623 ret <8 x i16> %sext.i
624}
625
626define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) {
627; CHECKLABEL: test_vcvt_n_f16_s16:
628; CHECK: vcvt.f16.s16 d0, d0, #2
629; CHECK-NEXT: bx lr
630entry:
631 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
632 ret <4 x half> %vcvt_n1
633}
634
635declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2
636
637define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
638; CHECKLABEL: test_vcvtq_n_f16_s16:
639; CHECK: vcvt.f16.s16 q0, q0, #2
640; CHECK-NEXT: bx lr
641entry:
642 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
643 ret <8 x half> %vcvt_n1
644}
645
646declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2
647
648define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) {
649; CHECKLABEL: test_vcvt_n_f16_u16:
650; CHECK: vcvt.f16.u16 d0, d0, #2
651; CHECK-NEXT: bx lr
652entry:
653 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
654 ret <4 x half> %vcvt_n1
655}
656
657declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2
658
659define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
660; CHECKLABEL: test_vcvtq_n_f16_u16:
661; CHECK: vcvt.f16.u16 q0, q0, #2
662; CHECK-NEXT: bx lr
663entry:
664 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
665 ret <8 x half> %vcvt_n1
666}
667
668declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2
669
670define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) {
671; CHECKLABEL: test_vcvt_n_s16_f16:
672; CHECK: vcvt.s16.f16 d0, d0, #2
673; CHECK-NEXT: bx lr
674entry:
675 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2)
676 ret <4 x i16> %vcvt_n1
677}
678
679declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2
680
681define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
682; CHECKLABEL: test_vcvtq_n_s16_f16:
683; CHECK: vcvt.s16.f16 q0, q0, #2
684; CHECK-NEXT: bx lr
685entry:
686 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2)
687 ret <8 x i16> %vcvt_n1
688}
689
690declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2
691
692define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) {
693; CHECKLABEL: test_vcvt_n_u16_f16:
694; CHECK: vcvt.u16.f16 d0, d0, #2
695; CHECK-NEXT: bx lr
696entry:
697 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2)
698 ret <4 x i16> %vcvt_n1
699}
700
701declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2
702
703define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
704; CHECKLABEL: test_vcvtq_n_u16_f16:
705; CHECK: vcvt.u16.f16 q0, q0, #2
706; CHECK-NEXT: bx lr
707entry:
708 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2)
709 ret <8 x i16> %vcvt_n1
710}
711
712declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2
713
714define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) {
715; CHECKLABEL: test_vmax_f16:
716; CHECK: vmax.f16 d0, d0, d1
717; CHECK-NEXT: bx lr
718entry:
719 %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
720 ret <4 x half> %vmax_v2.i
721}
722
723define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) {
724; CHECKLABEL: test_vmaxq_f16:
725; CHECK: vmax.f16 q0, q0, q1
726; CHECK-NEXT: bx lr
727entry:
728 %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
729 ret <8 x half> %vmaxq_v2.i
730}
731
732; FIXME (PR38404)
733;
734;define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) {
735;entry:
736; %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
737; ret <4 x half> %vmaxnm_v2.i
738;}
739
740;define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) {
741;entry:
742; %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
743; ret <8 x half> %vmaxnmq_v2.i
744;}
745
746;define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) {
747;entry:
748; %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
749; ret <4 x half> %vmin_v2.i
750;}
751
752;define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) {
753;entry:
754; %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
755; ret <8 x half> %vminq_v2.i
756;}
757
758;define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) {
759;entry:
760; %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
761; ret <4 x half> %vminnm_v2.i
762;}
763
764;define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) {
765;entry:
766; %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
767; ret <8 x half> %vminnmq_v2.i
768;}
769
770define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) {
771; CHECKLABEL: test_vmul_f16:
772; CHECK: vmul.f16 d0, d0, d1
773; CHECK-NEXT: bx lr
774entry:
775 %mul.i = fmul <4 x half> %a, %b
776 ret <4 x half> %mul.i
777}
778
779define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
780; CHECKLABEL: test_vmulq_f16:
781; CHECK: vmul.f16 q0, q0, q1
782; CHECK-NEXT: bx lr
783entry:
784 %mul.i = fmul <8 x half> %a, %b
785 ret <8 x half> %mul.i
786}
787
788define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) {
789; CHECKLABEL: test_vpadd_f16:
790; CHECK: vpadd.f16 d0, d0, d1
791; CHECK-NEXT: bx lr
792entry:
793 %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
794 ret <4 x half> %vpadd_v2.i
795}
796
797define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) {
798; CHECKLABEL: test_vpmax_f16:
799; CHECK: vpmax.f16 d0, d0, d1
800; CHECK-NEXT: bx lr
801entry:
802 %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
803 ret <4 x half> %vpmax_v2.i
804}
805
806define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) {
807; CHECKLABEL: test_vpmin_f16:
808; CHECK: vpmin.f16 d0, d0, d1
809; CHECK-NEXT: bx lr
810entry:
811 %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
812 ret <4 x half> %vpmin_v2.i
813}
814
815define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) {
816; CHECKLABEL: test_vrecps_f16:
817; CHECK: vrecps.f16 d0, d0, d1
818; CHECK-NEXT: bx lr
819entry:
820 %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
821 ret <4 x half> %vrecps_v2.i
822}
823
824define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) {
825; CHECKLABEL: test_vrecpsq_f16:
826; CHECK: vrecps.f16 q0, q0, q1
827; CHECK-NEXT: bx lr
828entry:
829 %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
830 ret <8 x half> %vrecpsq_v2.i
831}
832
833define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) {
834; CHECKLABEL: test_vrsqrts_f16:
835; CHECK: vrsqrts.f16 d0, d0, d1
836; CHECK-NEXT: bx lr
837entry:
838 %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
839 ret <4 x half> %vrsqrts_v2.i
840}
841
842define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) {
843; CHECKLABEL: test_vrsqrtsq_f16:
844; CHECK: vrsqrts.f16 q0, q0, q1
845; CHECK-NEXT: bx lr
846entry:
847 %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
848 ret <8 x half> %vrsqrtsq_v2.i
849}
850
851define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) {
852; CHECKLABEL: test_vsub_f16:
853; CHECK: vsub.f16 d0, d0, d1
854; CHECK-NEXT: bx lr
855entry:
856 %sub.i = fsub <4 x half> %a, %b
857 ret <4 x half> %sub.i
858}
859
860define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
861; CHECKLABEL: test_vsubq_f16:
862; CHECK: vsub.f16 q0, q0, q1
863; CHECK-NEXT: bx lr
864entry:
865 %sub.i = fsub <8 x half> %a, %b
866 ret <8 x half> %sub.i
867}
868
869; FIXME (PR38404)
870;
871;define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
872;entry:
873; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
874; ret <4 x half> %0
875;}
876
877;define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
878;entry:
879; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
880; ret <8 x half> %0
881;}
882
883;define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
884;entry:
885; %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
886; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
887; ret <4 x half> %0
888;}
889
890;define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
891;entry:
892; %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
893; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
894; ret <8 x half> %0
895;}
896
897;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
898;entry:
899; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
900; %mul = fmul <4 x half> %shuffle, %a
901; ret <4 x half> %mul
902;}
903
904;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
905;entry:
906; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
907; %mul = fmul <8 x half> %shuffle, %a
908; ret <8 x half> %mul
909;}
910
911;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
912;entry:
913; %0 = bitcast float %b.coerce to i32
914; %tmp.0.extract.trunc = trunc i32 %0 to i16
915; %1 = bitcast i16 %tmp.0.extract.trunc to half
916; %vecinit = insertelement <4 x half> undef, half %1, i32 0
917; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
918; %mul = fmul <4 x half> %vecinit4, %a
919; ret <4 x half> %mul
920;}
921
922;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
923;entry:
924; %0 = bitcast float %b.coerce to i32
925; %tmp.0.extract.trunc = trunc i32 %0 to i16
926; %1 = bitcast i16 %tmp.0.extract.trunc to half
927; %vecinit = insertelement <8 x half> undef, half %1, i32 0
928; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
929; %mul = fmul <8 x half> %vecinit8, %a
930; ret <8 x half> %mul
931;}
932
933define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
934; CHECKLABEL: test_vbsl_f16:
935; CHECK: vbsl d0, d1, d2
936; CHECK-NEXT: bx lr
937entry:
938 %0 = bitcast <4 x i16> %a to <8 x i8>
939 %1 = bitcast <4 x half> %b to <8 x i8>
940 %2 = bitcast <4 x half> %c to <8 x i8>
941 %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2)
942 %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half>
943 ret <4 x half> %3
944}
945
946define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) {
947; CHECKLABEL: test_vbslq_f16:
948; CHECK: vbsl q0, q1, q2
949; CHECK-NEXT: bx lr
950entry:
951 %0 = bitcast <8 x i16> %a to <16 x i8>
952 %1 = bitcast <8 x half> %b to <16 x i8>
953 %2 = bitcast <8 x half> %c to <16 x i8>
954 %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
955 %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half>
956 ret <8 x half> %3
957}
958
959; FIXME (PR38404)
960;
961;define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
962;entry:
963; %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
964; %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
965; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
966; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
967; ret %struct.float16x4x2_t %.fca.0.1.insert
968;}
969;
970;define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
971;entry:
972; %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
973; %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
974; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
975; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
976; ret %struct.float16x8x2_t %.fca.0.1.insert
977;}
978;
979;define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
980;entry:
981; %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
982; %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
983; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
984; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
985; ret %struct.float16x4x2_t %.fca.0.1.insert
986;}
987;
988;define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
989;entry:
990; %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
991; %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
992; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
993; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
994; ret %struct.float16x8x2_t %.fca.0.1.insert
995;}
996;
997;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
998;entry:
999; %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1000; %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1001; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
1002; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
1003; ret %struct.float16x4x2_t %.fca.0.1.insert
1004;}
1005;
1006;define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
1007;entry:
1008; %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1009; %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1010; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
1011; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
1012; ret %struct.float16x8x2_t %.fca.0.1.insert
1013;}
1014;
1015;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
1016;entry:
1017; %0 = bitcast float %a.coerce to i32
1018; %tmp.0.extract.trunc = trunc i32 %0 to i16
1019; %1 = bitcast i16 %tmp.0.extract.trunc to half
1020; %vecinit = insertelement <4 x half> undef, half %1, i32 0
1021; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1022; ret <4 x half> %vecinit4
1023;}
1024;
1025;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
1026;entry:
1027; %0 = bitcast float %a.coerce to i32
1028; %tmp.0.extract.trunc = trunc i32 %0 to i16
1029; %1 = bitcast i16 %tmp.0.extract.trunc to half
1030; %vecinit = insertelement <8 x half> undef, half %1, i32 0
1031; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1032; ret <8 x half> %vecinit8
1033;}
1034;
1035;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
1036;entry:
1037; %0 = bitcast float %a.coerce to i32
1038; %tmp.0.extract.trunc = trunc i32 %0 to i16
1039; %1 = bitcast i16 %tmp.0.extract.trunc to half
1040; %vecinit = insertelement <4 x half> undef, half %1, i32 0
1041; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1042; ret <4 x half> %vecinit4
1043;}
1044;
1045;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
1046;entry:
1047; %0 = bitcast float %a.coerce to i32
1048; %tmp.0.extract.trunc = trunc i32 %0 to i16
1049; %1 = bitcast i16 %tmp.0.extract.trunc to half
1050; %vecinit = insertelement <8 x half> undef, half %1, i32 0
1051; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1052; ret <8 x half> %vecinit8
1053;}
1054;
1055;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
1056;entry:
1057; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1058; ret <4 x half> %shuffle
1059;}
1060;
1061;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
1062;entry:
1063; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1064; ret <8 x half> %shuffle
1065;}
1066;
1067;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
1068;entry:
1069; %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1070; ret <4 x half> %vext
1071;}
1072;
1073;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
1074;entry:
1075; %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1076; ret <8 x half> %vext
1077;}
1078;
1079;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
1080;entry:
1081; %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1082; ret <4 x half> %shuffle.i
1083;}
1084;
1085;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) {
1086;entry:
1087; %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1088; ret <8 x half> %shuffle.i
1089;}
1090
1091declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1092declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1093declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
1094declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>)
1095declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>)
1096declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>)
1097declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>)
1098declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>)
1099declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>)
1100declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>)
1101declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>)
1102declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>)
1103declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>)
1104declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>)
1105declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>)
1106declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>)
1107declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>)
1108declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>)
1109declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>)
1110declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>)
1111declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>)
1112declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>)
1113declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>)
1114declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>)
1115declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>)
1116declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>)
1117declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>)
1118declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>)
1119declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>)
1120declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>)
1121declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>)
1122declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>)
1123declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>)
1124declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>)
1125declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>)
1126declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>)
1127declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>)
1128declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>)
1129declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>)
1130declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>)
1131declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>)
1132declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>)
1133declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>)
1134declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>)
1135declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>)
1136declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>)
1137declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>)
1138declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
1139declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>)
1140declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>)
1141declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>)
1142declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>)
1143declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>)
1144declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>)
1145declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1146declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1147declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
1148declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)