blob: 2b532a82b0eedd6c416031d3d5efd742ae6afc2f [file] [log] [blame]
Sjoerd Meijer590e4e82018-08-01 14:43:59 +00001; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s
2
3%struct.float16x4x2_t = type { [2 x <4 x half>] }
4%struct.float16x8x2_t = type { [2 x <8 x half>] }
5
6define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
7; CHECKLABEL: test_vabs_f16:
8; CHECK: vabs.f16 d0, d0
9; CHECK-NEXT: bx lr
10entry:
11 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
12 ret <4 x half> %vabs1.i
13}
14
15define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
16; CHECKLABEL: test_vabsq_f16:
17; CHECK: vabs.f16 q0, q0
18; CHECK-NEXT: bx lr
19entry:
20 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
21 ret <8 x half> %vabs1.i
22}
23
24define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
25; CHECKLABEL: test_vceqz_f16:
26; CHECK: vceq.f16 d0, d0, #0
27; CHECK-NEXT: bx lr
28entry:
29 %0 = fcmp oeq <4 x half> %a, zeroinitializer
30 %vceqz.i = sext <4 x i1> %0 to <4 x i16>
31 ret <4 x i16> %vceqz.i
32}
33
34define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) {
35; CHECKLABEL: test_vceqzq_f16:
36; CHECK: vceq.f16 q0, q0, #0
37; CHECK-NEXT: bx lr
38entry:
39 %0 = fcmp oeq <8 x half> %a, zeroinitializer
40 %vceqz.i = sext <8 x i1> %0 to <8 x i16>
41 ret <8 x i16> %vceqz.i
42}
43
44define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) {
45; CHECKLABEL: test_vcgez_f16:
46; CHECK: vcge.f16 d0, d0, #0
47; CHECK-NEXT: bx lr
48entry:
49 %0 = fcmp oge <4 x half> %a, zeroinitializer
50 %vcgez.i = sext <4 x i1> %0 to <4 x i16>
51 ret <4 x i16> %vcgez.i
52}
53
54define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) {
55; CHECKLABEL: test_vcgezq_f16:
56; CHECK: vcge.f16 q0, q0, #0
57; CHECK-NEXT: bx lr
58entry:
59 %0 = fcmp oge <8 x half> %a, zeroinitializer
60 %vcgez.i = sext <8 x i1> %0 to <8 x i16>
61 ret <8 x i16> %vcgez.i
62}
63
64define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) {
65; CHECKLABEL: test_vcgtz_f16:
66; CHECK: vcgt.f16 d0, d0, #0
67; CHECK-NEXT: bx lr
68entry:
69 %0 = fcmp ogt <4 x half> %a, zeroinitializer
70 %vcgtz.i = sext <4 x i1> %0 to <4 x i16>
71 ret <4 x i16> %vcgtz.i
72}
73
74define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) {
75; CHECKLABEL: test_vcgtzq_f16:
76; CHECK: vcgt.f16 q0, q0, #0
77; CHECK-NEXT: bx lr
78entry:
79 %0 = fcmp ogt <8 x half> %a, zeroinitializer
80 %vcgtz.i = sext <8 x i1> %0 to <8 x i16>
81 ret <8 x i16> %vcgtz.i
82}
83
84define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) {
85; CHECKLABEL: test_vclez_f16:
86; CHECK: vcle.f16 d0, d0, #0
87; CHECK-NEXT: bx lr
88entry:
89 %0 = fcmp ole <4 x half> %a, zeroinitializer
90 %vclez.i = sext <4 x i1> %0 to <4 x i16>
91 ret <4 x i16> %vclez.i
92}
93
94define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) {
95; CHECKLABEL: test_vclezq_f16:
96; CHECK: vcle.f16 q0, q0, #0
97; CHECK-NEXT: bx lr
98entry:
99 %0 = fcmp ole <8 x half> %a, zeroinitializer
100 %vclez.i = sext <8 x i1> %0 to <8 x i16>
101 ret <8 x i16> %vclez.i
102}
103
104define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) {
105; CHECKLABEL: test_vcltz_f16:
106; CHECK: vclt.f16 d0, d0, #0
107; CHECK-NEXT: bx lr
108entry:
109 %0 = fcmp olt <4 x half> %a, zeroinitializer
110 %vcltz.i = sext <4 x i1> %0 to <4 x i16>
111 ret <4 x i16> %vcltz.i
112}
113
114define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) {
115; CHECKLABEL: test_vcltzq_f16:
116; CHECK: vclt.f16 q0, q0, #0
117; CHECK-NEXT: bx lr
118entry:
119 %0 = fcmp olt <8 x half> %a, zeroinitializer
120 %vcltz.i = sext <8 x i1> %0 to <8 x i16>
121 ret <8 x i16> %vcltz.i
122}
123
124; FIXME (PR38404)
125;
126;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
127;entry:
128; %vcvt.i = sitofp <4 x i16> %a to <4 x half>
129; ret <4 x half> %vcvt.i
130;}
131;
132;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
133;entry:
134; %vcvt.i = sitofp <8 x i16> %a to <8 x half>
135; ret <8 x half> %vcvt.i
136;}
137
138;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
139;entry:
140; %vcvt.i = uitofp <4 x i16> %a to <4 x half>
141; ret <4 x half> %vcvt.i
142;}
143
144;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
145;entry:
146; %vcvt.i = uitofp <8 x i16> %a to <8 x half>
147; ret <8 x half> %vcvt.i
148;}
149
150;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
151;entry:
152; %vcvt.i = fptosi <4 x half> %a to <4 x i16>
153; ret <4 x i16> %vcvt.i
154;}
155
156;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
157;entry:
158; %vcvt.i = fptosi <8 x half> %a to <8 x i16>
159; ret <8 x i16> %vcvt.i
160;}
161
162;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
163;entry:
164; %vcvt.i = fptoui <4 x half> %a to <4 x i16>
165; ret <4 x i16> %vcvt.i
166;}
167
168;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
169;entry:
170; %vcvt.i = fptoui <8 x half> %a to <8 x i16>
171; ret <8 x i16> %vcvt.i
172;}
173
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000174define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
175; CHECK-LABEL: test_vcvta_s16_f16:
176; CHECK: vcvta.s16.f16 d0, d0
177; CHECK-NEXT: bx lr
178entry:
179 %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
180 ret <4 x i16> %vcvta_s16_v1.i
181}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000182
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000183define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) {
184; CHECK-LABEL: test_vcvta_u16_f16:
185; CHECK: vcvta.u16.f16 d0, d0
186; CHECK-NEXT: bx lr
187entry:
188 %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
189 ret <4 x i16> %vcvta_u16_v1.i
190}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000191
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000192define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
193; CHECK-LABEL: test_vcvtaq_s16_f16:
194; CHECK: vcvta.s16.f16 q0, q0
195; CHECK-NEXT: bx lr
196entry:
197 %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
198 ret <8 x i16> %vcvtaq_s16_v1.i
199}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000200
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000201define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) {
202; CHECK-LABEL: test_vcvtm_s16_f16:
203; CHECK: vcvtm.s16.f16 d0, d0
204; CHECK-NEXT: bx lr
205entry:
206 %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
207 ret <4 x i16> %vcvtm_s16_v1.i
208}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000209
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000210define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
211; CHECK-LABEL: test_vcvtmq_s16_f16:
212; CHECK: vcvtm.s16.f16 q0, q0
213; CHECK-NEXT: bx lr
214entry:
215 %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
216 ret <8 x i16> %vcvtmq_s16_v1.i
217}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000218
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000219define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) {
220; CHECK-LABEL: test_vcvtm_u16_f16:
221; CHECK: vcvtm.u16.f16 d0, d0
222; CHECK-NEXT: bx lr
223entry:
224 %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
225 ret <4 x i16> %vcvtm_u16_v1.i
226}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000227
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000228define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
229; CHECK-LABEL: test_vcvtmq_u16_f16:
230; CHECK: vcvtm.u16.f16 q0, q0
231; CHECK-NEXT: bx lr
232entry:
233 %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
234 ret <8 x i16> %vcvtmq_u16_v1.i
235}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000236
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000237define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) {
238; CHECK-LABEL: test_vcvtn_s16_f16:
239; CHECK: vcvtn.s16.f16 d0, d0
240; CHECK-NEXT: bx lr
241entry:
242 %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
243 ret <4 x i16> %vcvtn_s16_v1.i
244}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000245
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000246define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
247; CHECK-LABEL: test_vcvtnq_s16_f16:
248; CHECK: vcvtn.s16.f16 q0, q0
249; CHECK-NEXT: bx lr
250entry:
251 %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
252 ret <8 x i16> %vcvtnq_s16_v1.i
253}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000254
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000255define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) {
256; CHECK-LABEL: test_vcvtn_u16_f16:
257; CHECK: vcvtn.u16.f16 d0, d0
258; CHECK-NEXT: bx lr
259entry:
260 %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
261 ret <4 x i16> %vcvtn_u16_v1.i
262}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000263
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000264define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
265; CHECK-LABEL: test_vcvtnq_u16_f16:
266; CHECK: vcvtn.u16.f16 q0, q0
267; CHECK-NEXT: bx lr
268entry:
269 %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
270 ret <8 x i16> %vcvtnq_u16_v1.i
271}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000272
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000273define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) {
274; CHECK-LABEL: test_vcvtp_s16_f16:
275; CHECK: vcvtp.s16.f16 d0, d0
276; CHECK-NEXT: bx lr
277entry:
278 %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
279 ret <4 x i16> %vcvtp_s16_v1.i
280}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000281
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000282define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
283; CHECK-LABEL: test_vcvtpq_s16_f16:
284; CHECK: vcvtp.s16.f16 q0, q0
285; CHECK-NEXT: bx lr
286entry:
287 %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
288 ret <8 x i16> %vcvtpq_s16_v1.i
289}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000290
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000291define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) {
292; CHECK-LABEL: test_vcvtp_u16_f16:
293; CHECK: vcvtp.u16.f16 d0, d0
294; CHECK-NEXT: bx lr
295entry:
296 %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
297 ret <4 x i16> %vcvtp_u16_v1.i
298}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000299
Sjoerd Meijer8e7fab02018-08-02 14:04:48 +0000300define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
301; CHECK-LABEL: test_vcvtpq_u16_f16:
302; CHECK: vcvtp.u16.f16 q0, q0
303; CHECK-NEXT: bx lr
304entry:
305 %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
306 ret <8 x i16> %vcvtpq_u16_v1.i
307}
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000308
309define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) {
310; CHECKLABEL: test_vneg_f16:
311; CHECK: vneg.f16 d0, d0
312; CHECK-NEXT: bx lr
313entry:
314 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
315 ret <4 x half> %sub.i
316}
317
318define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) {
319; CHECKLABEL: test_vnegq_f16:
320; CHECK: vneg.f16 q0, q0
321; CHECK-NEXT: bx lr
322entry:
323 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
324 ret <8 x half> %sub.i
325}
326
327define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) {
328; CHECKLABEL: test_vrecpe_f16:
329; CHECK: vrecpe.f16 d0, d0
330; CHECK-NEXT: bx lr
331entry:
332 %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
333 ret <4 x half> %vrecpe_v1.i
334}
335
336define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) {
337; CHECKLABEL: test_vrecpeq_f16:
338; CHECK: vrecpe.f16 q0, q0
339; CHECK-NEXT: bx lr
340entry:
341 %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
342 ret <8 x half> %vrecpeq_v1.i
343}
344
345define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) {
346; CHECKLABEL: test_vrnd_f16:
347; CHECK: vrintz.f16 d0, d0
348; CHECK-NEXT: bx lr
349entry:
350 %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
351 ret <4 x half> %vrnd_v1.i
352}
353
354define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) {
355; CHECKLABEL: test_vrndq_f16:
356; CHECK: vrintz.f16 q0, q0
357; CHECK-NEXT: bx lr
358entry:
359 %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
360 ret <8 x half> %vrndq_v1.i
361}
362
363define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) {
364; CHECKLABEL: test_vrnda_f16:
365; CHECK: vrinta.f16 d0, d0
366; CHECK-NEXT: bx lr
367entry:
368 %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
369 ret <4 x half> %vrnda_v1.i
370}
371
372define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) {
373; CHECKLABEL: test_vrndaq_f16:
374; CHECK: vrinta.f16 q0, q0
375; CHECK-NEXT: bx lr
376entry:
377 %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
378 ret <8 x half> %vrndaq_v1.i
379}
380
381define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) {
382; CHECKLABEL: test_vrndm_f16:
383; CHECK: vrintm.f16 d0, d0
384; CHECK-NEXT: bx lr
385entry:
386 %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
387 ret <4 x half> %vrndm_v1.i
388}
389
390define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) {
391; CHECKLABEL: test_vrndmq_f16:
392; CHECK: vrintm.f16 q0, q0
393; CHECK-NEXT: bx lr
394entry:
395 %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
396 ret <8 x half> %vrndmq_v1.i
397}
398
399define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) {
400; CHECKLABEL: test_vrndn_f16:
401; CHECK: vrintn.f16 d0, d0
402; CHECK-NEXT: bx lr
403entry:
404 %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
405 ret <4 x half> %vrndn_v1.i
406}
407
408define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) {
409; CHECKLABEL: test_vrndnq_f16:
410; CHECK: vrintn.f16 q0, q0
411; CHECK-NEXT: bx lr
412entry:
413 %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
414 ret <8 x half> %vrndnq_v1.i
415}
416
417define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) {
418; CHECKLABEL: test_vrndp_f16:
419; CHECK: vrintp.f16 d0, d0
420; CHECK-NEXT: bx lr
421entry:
422 %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
423 ret <4 x half> %vrndp_v1.i
424}
425
426define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) {
427; CHECKLABEL: test_vrndpq_f16:
428; CHECK: vrintp.f16 q0, q0
429; CHECK-NEXT: bx lr
430entry:
431 %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
432 ret <8 x half> %vrndpq_v1.i
433}
434
435define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) {
436; CHECKLABEL: test_vrndx_f16:
437; CHECK: vrintx.f16 d0, d0
438; CHECK-NEXT: bx lr
439entry:
440 %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
441 ret <4 x half> %vrndx_v1.i
442}
443
444define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) {
445; CHECKLABEL: test_vrndxq_f16:
446; CHECK: vrintx.f16 q0, q0
447; CHECK-NEXT: bx lr
448entry:
449 %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
450 ret <8 x half> %vrndxq_v1.i
451}
452
453define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) {
454; CHECKLABEL: test_vrsqrte_f16:
455; CHECK: vrsqrte.f16 d0, d0
456; CHECK-NEXT: bx lr
457entry:
458 %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
459 ret <4 x half> %vrsqrte_v1.i
460}
461
462define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) {
463; CHECKLABEL: test_vrsqrteq_f16:
464; CHECK: vrsqrte.f16 q0, q0
465; CHECK-NEXT: bx lr
466entry:
467 %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
468 ret <8 x half> %vrsqrteq_v1.i
469}
470
471define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) {
472; CHECKLABEL: test_vadd_f16:
473; CHECK: vadd.f16 d0, d0, d1
474; CHECK-NEXT: bx lr
475entry:
476 %add.i = fadd <4 x half> %a, %b
477 ret <4 x half> %add.i
478}
479
480define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
481; CHECKLABEL: test_vaddq_f16:
482; CHECK: vadd.f16 q0, q0, q1
483; CHECK-NEXT: bx lr
484entry:
485 %add.i = fadd <8 x half> %a, %b
486 ret <8 x half> %add.i
487}
488
489define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) {
490; CHECKLABEL: test_vabd_f16:
491; CHECK: vabd.f16 d0, d0, d1
492; CHECK-NEXT: bx lr
493entry:
494 %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
495 ret <4 x half> %vabd_v2.i
496}
497
498define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) {
499; CHECKLABEL: test_vabdq_f16:
500; CHECK: vabd.f16 q0, q0, q1
501; CHECK-NEXT: bx lr
502entry:
503 %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
504 ret <8 x half> %vabdq_v2.i
505}
506
507define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) {
508; CHECKLABEL: test_vcage_f16:
509; CHECK: vacge.f16 d0, d0, d1
510; CHECK-NEXT: bx lr
511entry:
512 %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
513 ret <4 x i16> %vcage_v2.i
514}
515
516define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) {
517; CHECKLABEL: test_vcageq_f16:
518; CHECK: vacge.f16 q0, q0, q1
519; CHECK-NEXT: bx lr
520entry:
521 %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
522 ret <8 x i16> %vcageq_v2.i
523}
524
525; FIXME (PR38404)
526;
527;define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) {
528;entry:
529; %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
530; ret <4 x i16> %vcagt_v2.i
531;}
532;
533;define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) {
534;entry:
535; %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
536; ret <8 x i16> %vcagtq_v2.i
537;}
538
539define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) {
540; CHECKLABEL: test_vcale_f16:
541; CHECK: vacge.f16 d0, d1, d0
542; CHECK-NEXT: bx lr
543entry:
544 %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
545 ret <4 x i16> %vcale_v2.i
546}
547
548define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) {
549; CHECKLABEL: test_vcaleq_f16:
550; CHECK: vacge.f16 q0, q1, q0
551; CHECK-NEXT: bx lr
552entry:
553 %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
554 ret <8 x i16> %vcaleq_v2.i
555}
556
557; FIXME (PR38404)
558;
559;define dso_local <4 x i16> @test_vcalt_f16(<4 x half> %a, <4 x half> %b) {
560;entry:
561; %vcalt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
562; ret <4 x i16> %vcalt_v2.i
563;}
564
565;define dso_local <8 x i16> @test_vcaltq_f16(<8 x half> %a, <8 x half> %b) {
566;entry:
567; %vcaltq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
568; ret <8 x i16> %vcaltq_v2.i
569;}
570
571define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) {
572; CHECKLABEL: test_vceq_f16:
573; CHECK: vceq.f16 d0, d0, d1
574; CHECK-NEXT: bx lr
575entry:
576 %cmp.i = fcmp oeq <4 x half> %a, %b
577 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
578 ret <4 x i16> %sext.i
579}
580
581define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) {
582; CHECKLABEL: test_vceqq_f16:
583; CHECK: vceq.f16 q0, q0, q1
584; CHECK-NEXT: bx lr
585entry:
586 %cmp.i = fcmp oeq <8 x half> %a, %b
587 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
588 ret <8 x i16> %sext.i
589}
590
591define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) {
592; CHECKLABEL: test_vcge_f16:
593; CHECK: vcge.f16 d0, d0, d1
594; CHECK-NEXT: bx lr
595entry:
596 %cmp.i = fcmp oge <4 x half> %a, %b
597 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
598 ret <4 x i16> %sext.i
599}
600
601define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) {
602; CHECKLABEL: test_vcgeq_f16:
603; CHECK: vcge.f16 q0, q0, q1
604; CHECK-NEXT: bx lr
605entry:
606 %cmp.i = fcmp oge <8 x half> %a, %b
607 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
608 ret <8 x i16> %sext.i
609}
610
611define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) {
612; CHECKLABEL: test_vcgt_f16:
613; CHECK: vcgt.f16 d0, d0, d1
614; CHECK-NEXT: bx lr
615entry:
616 %cmp.i = fcmp ogt <4 x half> %a, %b
617 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
618 ret <4 x i16> %sext.i
619}
620
621define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) {
622; CHECKLABEL: test_vcgtq_f16:
623; CHECK: vcgt.f16 q0, q0, q1
624; CHECK-NEXT: bx lr
625entry:
626 %cmp.i = fcmp ogt <8 x half> %a, %b
627 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
628 ret <8 x i16> %sext.i
629}
630
631define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) {
632; CHECKLABEL: test_vcle_f16:
633; CHECK: vcge.f16 d0, d1, d0
634; CHECK-NEXT: bx lr
635entry:
636 %cmp.i = fcmp ole <4 x half> %a, %b
637 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
638 ret <4 x i16> %sext.i
639}
640
641define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) {
642; CHECKLABEL: test_vcleq_f16:
643; CHECK: vcge.f16 q0, q1, q0
644; CHECK-NEXT: bx lr
645entry:
646 %cmp.i = fcmp ole <8 x half> %a, %b
647 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
648 ret <8 x i16> %sext.i
649}
650
651define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) {
652; CHECKLABEL: test_vclt_f16:
653; CHECK: vcgt.f16 d0, d1, d0
654; CHECK-NEXT: bx lr
655entry:
656 %cmp.i = fcmp olt <4 x half> %a, %b
657 %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
658 ret <4 x i16> %sext.i
659}
660
661define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) {
662; CHECKLABEL: test_vcltq_f16:
663; CHECK: vcgt.f16 q0, q1, q0
664; CHECK-NEXT: bx lr
665entry:
666 %cmp.i = fcmp olt <8 x half> %a, %b
667 %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
668 ret <8 x i16> %sext.i
669}
670
671define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) {
672; CHECKLABEL: test_vcvt_n_f16_s16:
673; CHECK: vcvt.f16.s16 d0, d0, #2
674; CHECK-NEXT: bx lr
675entry:
676 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
677 ret <4 x half> %vcvt_n1
678}
679
680declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2
681
682define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
683; CHECKLABEL: test_vcvtq_n_f16_s16:
684; CHECK: vcvt.f16.s16 q0, q0, #2
685; CHECK-NEXT: bx lr
686entry:
687 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
688 ret <8 x half> %vcvt_n1
689}
690
691declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2
692
693define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) {
694; CHECKLABEL: test_vcvt_n_f16_u16:
695; CHECK: vcvt.f16.u16 d0, d0, #2
696; CHECK-NEXT: bx lr
697entry:
698 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
699 ret <4 x half> %vcvt_n1
700}
701
702declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2
703
704define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
705; CHECKLABEL: test_vcvtq_n_f16_u16:
706; CHECK: vcvt.f16.u16 q0, q0, #2
707; CHECK-NEXT: bx lr
708entry:
709 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
710 ret <8 x half> %vcvt_n1
711}
712
713declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2
714
715define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) {
716; CHECKLABEL: test_vcvt_n_s16_f16:
717; CHECK: vcvt.s16.f16 d0, d0, #2
718; CHECK-NEXT: bx lr
719entry:
720 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2)
721 ret <4 x i16> %vcvt_n1
722}
723
724declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2
725
726define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
727; CHECKLABEL: test_vcvtq_n_s16_f16:
728; CHECK: vcvt.s16.f16 q0, q0, #2
729; CHECK-NEXT: bx lr
730entry:
731 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2)
732 ret <8 x i16> %vcvt_n1
733}
734
735declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2
736
737define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) {
738; CHECKLABEL: test_vcvt_n_u16_f16:
739; CHECK: vcvt.u16.f16 d0, d0, #2
740; CHECK-NEXT: bx lr
741entry:
742 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2)
743 ret <4 x i16> %vcvt_n1
744}
745
746declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2
747
748define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
749; CHECKLABEL: test_vcvtq_n_u16_f16:
750; CHECK: vcvt.u16.f16 q0, q0, #2
751; CHECK-NEXT: bx lr
752entry:
753 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2)
754 ret <8 x i16> %vcvt_n1
755}
756
757declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2
758
759define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) {
760; CHECKLABEL: test_vmax_f16:
761; CHECK: vmax.f16 d0, d0, d1
762; CHECK-NEXT: bx lr
763entry:
764 %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
765 ret <4 x half> %vmax_v2.i
766}
767
768define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) {
769; CHECKLABEL: test_vmaxq_f16:
770; CHECK: vmax.f16 q0, q0, q1
771; CHECK-NEXT: bx lr
772entry:
773 %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
774 ret <8 x half> %vmaxq_v2.i
775}
776
777; FIXME (PR38404)
778;
779;define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) {
780;entry:
781; %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
782; ret <4 x half> %vmaxnm_v2.i
783;}
784
785;define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) {
786;entry:
787; %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
788; ret <8 x half> %vmaxnmq_v2.i
789;}
790
791;define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) {
792;entry:
793; %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
794; ret <4 x half> %vmin_v2.i
795;}
796
797;define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) {
798;entry:
799; %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
800; ret <8 x half> %vminq_v2.i
801;}
802
803;define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) {
804;entry:
805; %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
806; ret <4 x half> %vminnm_v2.i
807;}
808
809;define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) {
810;entry:
811; %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
812; ret <8 x half> %vminnmq_v2.i
813;}
814
815define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) {
816; CHECKLABEL: test_vmul_f16:
817; CHECK: vmul.f16 d0, d0, d1
818; CHECK-NEXT: bx lr
819entry:
820 %mul.i = fmul <4 x half> %a, %b
821 ret <4 x half> %mul.i
822}
823
824define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
825; CHECKLABEL: test_vmulq_f16:
826; CHECK: vmul.f16 q0, q0, q1
827; CHECK-NEXT: bx lr
828entry:
829 %mul.i = fmul <8 x half> %a, %b
830 ret <8 x half> %mul.i
831}
832
833define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) {
834; CHECKLABEL: test_vpadd_f16:
835; CHECK: vpadd.f16 d0, d0, d1
836; CHECK-NEXT: bx lr
837entry:
838 %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
839 ret <4 x half> %vpadd_v2.i
840}
841
842define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) {
843; CHECKLABEL: test_vpmax_f16:
844; CHECK: vpmax.f16 d0, d0, d1
845; CHECK-NEXT: bx lr
846entry:
847 %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
848 ret <4 x half> %vpmax_v2.i
849}
850
851define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) {
852; CHECKLABEL: test_vpmin_f16:
853; CHECK: vpmin.f16 d0, d0, d1
854; CHECK-NEXT: bx lr
855entry:
856 %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
857 ret <4 x half> %vpmin_v2.i
858}
859
860define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) {
861; CHECKLABEL: test_vrecps_f16:
862; CHECK: vrecps.f16 d0, d0, d1
863; CHECK-NEXT: bx lr
864entry:
865 %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
866 ret <4 x half> %vrecps_v2.i
867}
868
869define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) {
870; CHECKLABEL: test_vrecpsq_f16:
871; CHECK: vrecps.f16 q0, q0, q1
872; CHECK-NEXT: bx lr
873entry:
874 %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
875 ret <8 x half> %vrecpsq_v2.i
876}
877
878define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) {
879; CHECKLABEL: test_vrsqrts_f16:
880; CHECK: vrsqrts.f16 d0, d0, d1
881; CHECK-NEXT: bx lr
882entry:
883 %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
884 ret <4 x half> %vrsqrts_v2.i
885}
886
887define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) {
888; CHECKLABEL: test_vrsqrtsq_f16:
889; CHECK: vrsqrts.f16 q0, q0, q1
890; CHECK-NEXT: bx lr
891entry:
892 %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
893 ret <8 x half> %vrsqrtsq_v2.i
894}
895
896define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) {
897; CHECKLABEL: test_vsub_f16:
898; CHECK: vsub.f16 d0, d0, d1
899; CHECK-NEXT: bx lr
900entry:
901 %sub.i = fsub <4 x half> %a, %b
902 ret <4 x half> %sub.i
903}
904
905define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
906; CHECKLABEL: test_vsubq_f16:
907; CHECK: vsub.f16 q0, q0, q1
908; CHECK-NEXT: bx lr
909entry:
910 %sub.i = fsub <8 x half> %a, %b
911 ret <8 x half> %sub.i
912}
913
Sjoerd Meijer9b302132018-08-03 09:12:56 +0000914define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
915; CHECK-LABEL: test_vfma_f16:
916; CHECK: vfma.f16 d0, d1, d2
917; CHECK-NEXT: bx lr
918entry:
919 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
920 ret <4 x half> %0
921}
922
923define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
924; CHECK-LABEL: test_vfmaq_f16:
925; CHECK: vfma.f16 q0, q1, q2
926; CHECK-NEXT: bx lr
927entry:
928 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
929 ret <8 x half> %0
930}
931
932define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
933; CHECK-LABEL: test_vfms_f16:
934; CHECK: vneg.f16 [[D16:d[0-9]+]], d1
935; CHECK-NEXT: vfma.f16 d0, [[D16]], d2
936; CHECK-NEXT: bx lr
937entry:
938 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
939 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
940 ret <4 x half> %0
941}
942
943define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
944; CHECK-LABEL: test_vfmsq_f16:
945; CHECK: vneg.f16 [[Q8:q[0-9]+]], q1
946; CHECK-NEXT: vfma.f16 q0, [[Q8]], q2
947; CHECK-NEXT: bx lr
948entry:
949 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
950 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
951 ret <8 x half> %0
952}
953
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000954; FIXME (PR38404)
955;
Sjoerd Meijer590e4e82018-08-01 14:43:59 +0000956;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
957;entry:
958; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
959; %mul = fmul <4 x half> %shuffle, %a
960; ret <4 x half> %mul
961;}
962
963;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
964;entry:
965; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
966; %mul = fmul <8 x half> %shuffle, %a
967; ret <8 x half> %mul
968;}
969
970;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
971;entry:
972; %0 = bitcast float %b.coerce to i32
973; %tmp.0.extract.trunc = trunc i32 %0 to i16
974; %1 = bitcast i16 %tmp.0.extract.trunc to half
975; %vecinit = insertelement <4 x half> undef, half %1, i32 0
976; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
977; %mul = fmul <4 x half> %vecinit4, %a
978; ret <4 x half> %mul
979;}
980
981;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
982;entry:
983; %0 = bitcast float %b.coerce to i32
984; %tmp.0.extract.trunc = trunc i32 %0 to i16
985; %1 = bitcast i16 %tmp.0.extract.trunc to half
986; %vecinit = insertelement <8 x half> undef, half %1, i32 0
987; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
988; %mul = fmul <8 x half> %vecinit8, %a
989; ret <8 x half> %mul
990;}
991
992define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
993; CHECKLABEL: test_vbsl_f16:
994; CHECK: vbsl d0, d1, d2
995; CHECK-NEXT: bx lr
996entry:
997 %0 = bitcast <4 x i16> %a to <8 x i8>
998 %1 = bitcast <4 x half> %b to <8 x i8>
999 %2 = bitcast <4 x half> %c to <8 x i8>
1000 %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2)
1001 %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half>
1002 ret <4 x half> %3
1003}
1004
1005define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) {
1006; CHECKLABEL: test_vbslq_f16:
1007; CHECK: vbsl q0, q1, q2
1008; CHECK-NEXT: bx lr
1009entry:
1010 %0 = bitcast <8 x i16> %a to <16 x i8>
1011 %1 = bitcast <8 x half> %b to <16 x i8>
1012 %2 = bitcast <8 x half> %c to <16 x i8>
1013 %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
1014 %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half>
1015 ret <8 x half> %3
1016}
1017
Sjoerd Meijerd62c5ec22018-08-03 09:24:29 +00001018define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
1019; CHECK-LABEL: test_vzip_f16:
1020; CHECK: vzip.16 d0, d1
1021; CHECK-NEXT: bx lr
1022entry:
1023 %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1024 %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1025 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
1026 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
1027 ret %struct.float16x4x2_t %.fca.0.1.insert
1028}
1029
1030define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
1031; CHECK-LABEL: test_vzipq_f16:
1032; CHECK: vzip.16 q0, q1
1033; CHECK-NEXT: bx lr
1034entry:
1035 %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1036 %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1037 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
1038 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
1039 ret %struct.float16x8x2_t %.fca.0.1.insert
1040}
1041
1042define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
1043; CHECK-LABEL: test_vuzp_f16:
1044; CHECK: vuzp.16 d0, d1
1045; CHECK-NEXT: bx lr
1046entry:
1047 %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1048 %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1049 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
1050 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
1051 ret %struct.float16x4x2_t %.fca.0.1.insert
1052}
1053
1054define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
1055; CHECK-LABEL: test_vuzpq_f16:
1056; CHECK: vuzp.16 q0, q1
1057; CHECK-NEXT: bx lr
1058entry:
1059 %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1060 %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1061 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
1062 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
1063 ret %struct.float16x8x2_t %.fca.0.1.insert
1064}
1065
Sjoerd Meijer590e4e82018-08-01 14:43:59 +00001066; FIXME (PR38404)
1067;
Sjoerd Meijer590e4e82018-08-01 14:43:59 +00001068;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
1069;entry:
1070; %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1071; %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1072; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
1073; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
1074; ret %struct.float16x4x2_t %.fca.0.1.insert
1075;}
1076;
1077;define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
1078;entry:
1079; %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1080; %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1081; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
1082; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
1083; ret %struct.float16x8x2_t %.fca.0.1.insert
1084;}
1085;
1086;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
1087;entry:
1088; %0 = bitcast float %a.coerce to i32
1089; %tmp.0.extract.trunc = trunc i32 %0 to i16
1090; %1 = bitcast i16 %tmp.0.extract.trunc to half
1091; %vecinit = insertelement <4 x half> undef, half %1, i32 0
1092; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1093; ret <4 x half> %vecinit4
1094;}
1095;
1096;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
1097;entry:
1098; %0 = bitcast float %a.coerce to i32
1099; %tmp.0.extract.trunc = trunc i32 %0 to i16
1100; %1 = bitcast i16 %tmp.0.extract.trunc to half
1101; %vecinit = insertelement <8 x half> undef, half %1, i32 0
1102; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1103; ret <8 x half> %vecinit8
1104;}
1105;
1106;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
1107;entry:
1108; %0 = bitcast float %a.coerce to i32
1109; %tmp.0.extract.trunc = trunc i32 %0 to i16
1110; %1 = bitcast i16 %tmp.0.extract.trunc to half
1111; %vecinit = insertelement <4 x half> undef, half %1, i32 0
1112; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1113; ret <4 x half> %vecinit4
1114;}
1115;
1116;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
1117;entry:
1118; %0 = bitcast float %a.coerce to i32
1119; %tmp.0.extract.trunc = trunc i32 %0 to i16
1120; %1 = bitcast i16 %tmp.0.extract.trunc to half
1121; %vecinit = insertelement <8 x half> undef, half %1, i32 0
1122; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1123; ret <8 x half> %vecinit8
1124;}
1125;
1126;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
1127;entry:
1128; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1129; ret <4 x half> %shuffle
1130;}
1131;
1132;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
1133;entry:
1134; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1135; ret <8 x half> %shuffle
1136;}
1137;
1138;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
1139;entry:
1140; %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1141; ret <4 x half> %vext
1142;}
1143;
1144;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
1145;entry:
1146; %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1147; ret <8 x half> %vext
1148;}
1149;
1150;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
1151;entry:
1152; %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1153; ret <4 x half> %shuffle.i
1154;}
1155;
1156;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) {
1157;entry:
1158; %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1159; ret <8 x half> %shuffle.i
1160;}
1161
1162declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1163declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1164declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
1165declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>)
1166declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>)
1167declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>)
1168declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>)
1169declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>)
1170declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>)
1171declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>)
1172declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>)
1173declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>)
1174declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>)
1175declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>)
1176declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>)
1177declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>)
1178declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>)
1179declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>)
1180declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>)
1181declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>)
1182declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>)
1183declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>)
1184declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>)
1185declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>)
1186declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>)
1187declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>)
1188declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>)
1189declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>)
1190declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>)
1191declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>)
1192declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>)
1193declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>)
1194declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>)
1195declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>)
1196declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>)
1197declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>)
1198declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>)
1199declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>)
1200declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>)
1201declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>)
1202declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>)
1203declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>)
1204declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>)
1205declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>)
1206declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>)
1207declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>)
1208declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>)
1209declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
1210declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>)
1211declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>)
1212declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>)
1213declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>)
1214declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>)
1215declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>)
1216declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1217declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1218declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
1219declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)