blob: 702dafa73cd751a7f0a2281471036b1648cfba8a [file] [log] [blame]
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00001; SOFT:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00002; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer89ea2642018-02-06 08:43:56 +00003; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00004
5; SOFTFP:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00006; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00009
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000010; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000014; HARD:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000015; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
16; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
17; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
18
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000019; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000022
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000023; FP-CONTRACT=FAST
24; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
26
27
28define float @RetValBug(float %A.coerce) {
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000029entry:
30 ret float undef
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000031; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
32; any operands) when FullFP16 is enabled.
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000033;
34; CHECK-LABEL: RetValBug:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000035; CHECK-HARDFP-FULLFP16: {{.*}} lr
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000036}
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000037
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000038; 1. VABS: TODO
39
40; 2. VADD
41define float @Add(float %a.coerce, float %b.coerce) {
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000042entry:
43 %0 = bitcast float %a.coerce to i32
44 %tmp.0.extract.trunc = trunc i32 %0 to i16
45 %1 = bitcast i16 %tmp.0.extract.trunc to half
46 %2 = bitcast float %b.coerce to i32
47 %tmp1.0.extract.trunc = trunc i32 %2 to i16
48 %3 = bitcast i16 %tmp1.0.extract.trunc to half
49 %add = fadd half %1, %3
50 %4 = bitcast half %add to i16
51 %tmp4.0.insert.ext = zext i16 %4 to i32
52 %5 = bitcast i32 %tmp4.0.insert.ext to float
53 ret float %5
54
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000055; CHECK-LABEL: Add:
56
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000057; CHECK-SOFT: bl __aeabi_h2f
58; CHECK-SOFT: bl __aeabi_h2f
59; CHECK-SOFT: bl __aeabi_fadd
60; CHECK-SOFT: bl __aeabi_f2h
61
62; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
63; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
64; CHECK-SOFTFP-VFP3: vadd.f32
65; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
66
67; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
68; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
69; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
70; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
71; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
72; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
73; CHECK-SOFTFP-FP16: vmov r0, s0
74
Sjoerd Meijer98d53592018-01-31 10:18:29 +000075; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
76; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
77; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
78; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000079
80; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
81; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
82; CHECK-HARDFP-VFP3: bl __aeabi_h2f
83; CHECK-HARDFP-VFP3: bl __aeabi_h2f
84; CHECK-HARDFP-VFP3: vadd.f32
85; CHECK-HARDFP-VFP3: bl __aeabi_f2h
86; CHECK-HARDFP-VFP3: vmov s0, r0
87
88; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
89; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
90; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
91; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
92
93; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000094}
95
96; 3. VCMP
97define zeroext i1 @VCMP(float %F.coerce, float %G.coerce) {
98entry:
99 %0 = bitcast float %F.coerce to i32
100 %tmp.0.extract.trunc = trunc i32 %0 to i16
101 %1 = bitcast i16 %tmp.0.extract.trunc to half
102 %2 = bitcast float %G.coerce to i32
103 %tmp1.0.extract.trunc = trunc i32 %2 to i16
104 %3 = bitcast i16 %tmp1.0.extract.trunc to half
105 %cmp = fcmp ogt half %1, %3
106 ret i1 %cmp
107
108; CHECK-LABEL: VCMP:
109
110; CHECK-SOFT: bl __aeabi_fcmpgt
111
112; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
113; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
114; CHECK-SOFTFP-VFP3: vcmpe.f32 s{{.}}, s{{.}}
115
116; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
117; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
118; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
119
120; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
121; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
122; CHECK-SOFTFP-FULLFP16: vcmpe.f16 [[S2]], [[S0]]
123
124; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
125; CHECK-SOFTFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
126; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s1
127}
128
129; 4. VCMPE
130
131; FIXME: enable when constant pool is fixed
132;
133;define i32 @VCMPE_IMM(float %F.coerce) {
134;entry:
135; %0 = bitcast float %F.coerce to i32
136; %tmp.0.extract.trunc = trunc i32 %0 to i16
137; %1 = bitcast i16 %tmp.0.extract.trunc to half
138; %tmp = fcmp olt half %1, 1.000000e+00
139; %tmp1 = zext i1 %tmp to i32
140; ret i32 %tmp1
141;}
142
143define i32 @VCMPE(float %F.coerce, float %G.coerce) {
144entry:
145 %0 = bitcast float %F.coerce to i32
146 %tmp.0.extract.trunc = trunc i32 %0 to i16
147 %1 = bitcast i16 %tmp.0.extract.trunc to half
148 %2 = bitcast float %G.coerce to i32
149 %tmp.1.extract.trunc = trunc i32 %2 to i16
150 %3 = bitcast i16 %tmp.1.extract.trunc to half
151 %tmp = fcmp olt half %1, %3
152 %tmp1 = zext i1 %tmp to i32
153 ret i32 %tmp1
154
155; CHECK-LABEL: VCMPE:
156}
157
158; 5. VCVT (between floating-point and fixed-point)
159; Only assembly/disassembly support
160
161; 6. VCVT (between floating-point and integer, both directions)
162define i32 @fptosi(i32 %A.coerce) {
163entry:
164 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
165 %0 = bitcast i16 %tmp.0.extract.trunc to half
166 %conv = fptosi half %0 to i32
167 ret i32 %conv
168
169; CHECK-LABEL: fptosi:
170
171; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
172; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
173; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
174}
175
176define i32 @fptoui(i32 %A.coerce) {
177entry:
178 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
179 %0 = bitcast i16 %tmp.0.extract.trunc to half
180 %conv = fptoui half %0 to i32
181 ret i32 %conv
182
183; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
184; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
185}
186
187define float @UintToH(i32 %a, i32 %b) {
188entry:
189 %0 = uitofp i32 %a to half
190 %1 = bitcast half %0 to i16
191 %tmp0.insert.ext = zext i16 %1 to i32
192 %2 = bitcast i32 %tmp0.insert.ext to float
193 ret float %2
194
195; CHECK-LABEL: UintToH:
196
197; CHECK-HARDFP-FULLFP16: vmov s0, r0
198; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
199}
200
201define float @SintToH(i32 %a, i32 %b) {
202entry:
203 %0 = sitofp i32 %a to half
204 %1 = bitcast half %0 to i16
205 %tmp0.insert.ext = zext i16 %1 to i32
206 %2 = bitcast i32 %tmp0.insert.ext to float
207 ret float %2
208
209; CHECK-LABEL: SintToH:
210
211; CHECK-HARDFP-FULLFP16: vmov s0, r0
212; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
213}
214
215; TODO:
216; 7. VCVTA
217; 8. VCVTM
218; 9. VCVTN
219; 10. VCVTP
220; 11. VCVTR
221
222; 12. VDIV
223define float @Div(float %a.coerce, float %b.coerce) {
224entry:
225 %0 = bitcast float %a.coerce to i32
226 %tmp.0.extract.trunc = trunc i32 %0 to i16
227 %1 = bitcast i16 %tmp.0.extract.trunc to half
228 %2 = bitcast float %b.coerce to i32
229 %tmp1.0.extract.trunc = trunc i32 %2 to i16
230 %3 = bitcast i16 %tmp1.0.extract.trunc to half
231 %add = fdiv half %1, %3
232 %4 = bitcast half %add to i16
233 %tmp4.0.insert.ext = zext i16 %4 to i32
234 %5 = bitcast i32 %tmp4.0.insert.ext to float
235 ret float %5
236
237; CHECK-LABEL: Div:
238
239; CHECK-SOFT: bl __aeabi_h2f
240; CHECK-SOFT: bl __aeabi_h2f
241; CHECK-SOFT: bl __aeabi_fdiv
242; CHECK-SOFT: bl __aeabi_f2h
243
244; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
245; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
246; CHECK-SOFTFP-VFP3: vdiv.f32
247; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
248
249; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
250; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
251; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
252; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
253; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
254; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
255; CHECK-SOFTFP-FP16: vmov r0, s0
256
257; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
258; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
259; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
260; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
261
262; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
263; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
264; CHECK-HARDFP-VFP3: bl __aeabi_h2f
265; CHECK-HARDFP-VFP3: bl __aeabi_h2f
266; CHECK-HARDFP-VFP3: vdiv.f32
267; CHECK-HARDFP-VFP3: bl __aeabi_f2h
268; CHECK-HARDFP-VFP3: vmov s0, r0
269
270; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
271; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
272; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
273; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
274
275; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
276}
277
278; 13. VFMA
279define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
280entry:
281 %0 = bitcast float %a.coerce to i32
282 %tmp.0.extract.trunc = trunc i32 %0 to i16
283 %1 = bitcast i16 %tmp.0.extract.trunc to half
284 %2 = bitcast float %b.coerce to i32
285 %tmp1.0.extract.trunc = trunc i32 %2 to i16
286 %3 = bitcast i16 %tmp1.0.extract.trunc to half
287 %4 = bitcast float %c.coerce to i32
288 %tmp2.0.extract.trunc = trunc i32 %4 to i16
289 %5 = bitcast i16 %tmp2.0.extract.trunc to half
290 %mul = fmul half %1, %3
291 %add = fadd half %mul, %5
292 %6 = bitcast half %add to i16
293 %tmp4.0.insert.ext = zext i16 %6 to i32
294 %7 = bitcast i32 %tmp4.0.insert.ext to float
295 ret float %7
296
297; CHECK-LABEL: VFMA:
298; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
299; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
300}
301
302; 14. VFMS
303define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
304entry:
305 %0 = bitcast float %a.coerce to i32
306 %tmp.0.extract.trunc = trunc i32 %0 to i16
307 %1 = bitcast i16 %tmp.0.extract.trunc to half
308 %2 = bitcast float %b.coerce to i32
309 %tmp1.0.extract.trunc = trunc i32 %2 to i16
310 %3 = bitcast i16 %tmp1.0.extract.trunc to half
311 %4 = bitcast float %c.coerce to i32
312 %tmp2.0.extract.trunc = trunc i32 %4 to i16
313 %5 = bitcast i16 %tmp2.0.extract.trunc to half
314 %mul = fmul half %1, %3
315 %sub = fsub half %5, %mul
316 %6 = bitcast half %sub to i16
317 %tmp4.0.insert.ext = zext i16 %6 to i32
318 %7 = bitcast i32 %tmp4.0.insert.ext to float
319 ret float %7
320
321; CHECK-LABEL: VFMS:
322; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
323; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
324}
325
326; 15. VFNMA
327define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
328entry:
329 %0 = bitcast float %a.coerce to i32
330 %tmp.0.extract.trunc = trunc i32 %0 to i16
331 %1 = bitcast i16 %tmp.0.extract.trunc to half
332 %2 = bitcast float %b.coerce to i32
333 %tmp1.0.extract.trunc = trunc i32 %2 to i16
334 %3 = bitcast i16 %tmp1.0.extract.trunc to half
335 %4 = bitcast float %c.coerce to i32
336 %tmp2.0.extract.trunc = trunc i32 %4 to i16
337 %5 = bitcast i16 %tmp2.0.extract.trunc to half
338 %mul = fmul half %1, %3
339 %sub = fsub half -0.0, %mul
340 %sub2 = fsub half %sub, %5
341 %6 = bitcast half %sub2 to i16
342 %tmp4.0.insert.ext = zext i16 %6 to i32
343 %7 = bitcast i32 %tmp4.0.insert.ext to float
344 ret float %7
345
346; CHECK-LABEL: VFNMA:
347; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
348; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
349}
350
351; 16. VFNMS
352define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
353entry:
354 %0 = bitcast float %a.coerce to i32
355 %tmp.0.extract.trunc = trunc i32 %0 to i16
356 %1 = bitcast i16 %tmp.0.extract.trunc to half
357 %2 = bitcast float %b.coerce to i32
358 %tmp1.0.extract.trunc = trunc i32 %2 to i16
359 %3 = bitcast i16 %tmp1.0.extract.trunc to half
360 %4 = bitcast float %c.coerce to i32
361 %tmp2.0.extract.trunc = trunc i32 %4 to i16
362 %5 = bitcast i16 %tmp2.0.extract.trunc to half
363 %mul = fmul half %1, %3
364 %sub2 = fsub half %mul, %5
365 %6 = bitcast half %sub2 to i16
366 %tmp4.0.insert.ext = zext i16 %6 to i32
367 %7 = bitcast i32 %tmp4.0.insert.ext to float
368 ret float %7
369
370; CHECK-LABEL: VFNMS:
371; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
372; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
373}
374
375; TODO:
376; 17. VMAXNM
377; 18. VMINNM
378
379; 19. VMLA
380define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
381entry:
382 %0 = bitcast float %a.coerce to i32
383 %tmp.0.extract.trunc = trunc i32 %0 to i16
384 %1 = bitcast i16 %tmp.0.extract.trunc to half
385 %2 = bitcast float %b.coerce to i32
386 %tmp1.0.extract.trunc = trunc i32 %2 to i16
387 %3 = bitcast i16 %tmp1.0.extract.trunc to half
388 %4 = bitcast float %c.coerce to i32
389 %tmp2.0.extract.trunc = trunc i32 %4 to i16
390 %5 = bitcast i16 %tmp2.0.extract.trunc to half
391 %mul = fmul half %1, %3
392 %add = fadd half %5, %mul
393 %6 = bitcast half %add to i16
394 %tmp4.0.insert.ext = zext i16 %6 to i32
395 %7 = bitcast i32 %tmp4.0.insert.ext to float
396 ret float %7
397
398; CHECK-LABEL: VMLA:
399; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
400; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
401}
402
403; 20. VMLS
404define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
405entry:
406 %0 = bitcast float %a.coerce to i32
407 %tmp.0.extract.trunc = trunc i32 %0 to i16
408 %1 = bitcast i16 %tmp.0.extract.trunc to half
409 %2 = bitcast float %b.coerce to i32
410 %tmp1.0.extract.trunc = trunc i32 %2 to i16
411 %3 = bitcast i16 %tmp1.0.extract.trunc to half
412 %4 = bitcast float %c.coerce to i32
413 %tmp2.0.extract.trunc = trunc i32 %4 to i16
414 %5 = bitcast i16 %tmp2.0.extract.trunc to half
415 %mul = fmul half %1, %3
416 %add = fsub half %5, %mul
417 %6 = bitcast half %add to i16
418 %tmp4.0.insert.ext = zext i16 %6 to i32
419 %7 = bitcast i32 %tmp4.0.insert.ext to float
420 ret float %7
421
422; CHECK-LABEL: VMLS:
423; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
424; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
425}
426
427; TODO: fix immediates.
428; 21. VMOV (between general-purpose register and half-precision register)
429; 22. VMOV (immediate)
430
431; 23. VMUL
432define float @Mul(float %a.coerce, float %b.coerce) {
433entry:
434 %0 = bitcast float %a.coerce to i32
435 %tmp.0.extract.trunc = trunc i32 %0 to i16
436 %1 = bitcast i16 %tmp.0.extract.trunc to half
437 %2 = bitcast float %b.coerce to i32
438 %tmp1.0.extract.trunc = trunc i32 %2 to i16
439 %3 = bitcast i16 %tmp1.0.extract.trunc to half
440 %add = fmul half %1, %3
441 %4 = bitcast half %add to i16
442 %tmp4.0.insert.ext = zext i16 %4 to i32
443 %5 = bitcast i32 %tmp4.0.insert.ext to float
444 ret float %5
445
446; CHECK-LABEL: Mul:
447
448; CHECK-SOFT: bl __aeabi_h2f
449; CHECK-SOFT: bl __aeabi_h2f
450; CHECK-SOFT: bl __aeabi_fmul
451; CHECK-SOFT: bl __aeabi_f2h
452
453; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
454; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
455; CHECK-SOFTFP-VFP3: vmul.f32
456; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
457
458; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
459; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
460; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
461; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
462; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
463; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
464; CHECK-SOFTFP-FP16: vmov r0, s0
465
466; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
467; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
468; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
469; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
470
471; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
472; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
473; CHECK-HARDFP-VFP3: bl __aeabi_h2f
474; CHECK-HARDFP-VFP3: bl __aeabi_h2f
475; CHECK-HARDFP-VFP3: vmul.f32
476; CHECK-HARDFP-VFP3: bl __aeabi_f2h
477; CHECK-HARDFP-VFP3: vmov s0, r0
478
479; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
480; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
481; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
482; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
483
484; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
485}
486
487; 24. VNEG
488define float @Neg(float %a.coerce) {
489entry:
490 %0 = bitcast float %a.coerce to i32
491 %tmp.0.extract.trunc = trunc i32 %0 to i16
492 %1 = bitcast i16 %tmp.0.extract.trunc to half
493 %2 = fsub half -0.000000e+00, %1
494 %3 = bitcast half %2 to i16
495 %tmp4.0.insert.ext = zext i16 %3 to i32
496 %4 = bitcast i32 %tmp4.0.insert.ext to float
497 ret float %4
498
499; CHECK-LABEL: Neg:
500; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
501}
502
503; 25. VNMLA
504define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
505entry:
506 %0 = bitcast float %a.coerce to i32
507 %tmp.0.extract.trunc = trunc i32 %0 to i16
508 %1 = bitcast i16 %tmp.0.extract.trunc to half
509 %2 = bitcast float %b.coerce to i32
510 %tmp1.0.extract.trunc = trunc i32 %2 to i16
511 %3 = bitcast i16 %tmp1.0.extract.trunc to half
512 %4 = bitcast float %c.coerce to i32
513 %tmp2.0.extract.trunc = trunc i32 %4 to i16
514 %5 = bitcast i16 %tmp2.0.extract.trunc to half
515 %add = fmul half %1, %3
516 %add2 = fsub half -0.000000e+00, %add
517 %add3 = fsub half %add2, %5
518 %6 = bitcast half %add3 to i16
519 %tmp4.0.insert.ext = zext i16 %6 to i32
520 %7 = bitcast i32 %tmp4.0.insert.ext to float
521 ret float %7
522
523; CHECK-LABEL: VNMLA:
524; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
525; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
526}
527
528; 26. VNMLS
529define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
530entry:
531 %0 = bitcast float %a.coerce to i32
532 %tmp.0.extract.trunc = trunc i32 %0 to i16
533 %1 = bitcast i16 %tmp.0.extract.trunc to half
534 %2 = bitcast float %b.coerce to i32
535 %tmp1.0.extract.trunc = trunc i32 %2 to i16
536 %3 = bitcast i16 %tmp1.0.extract.trunc to half
537 %4 = bitcast float %c.coerce to i32
538 %tmp2.0.extract.trunc = trunc i32 %4 to i16
539 %5 = bitcast i16 %tmp2.0.extract.trunc to half
540 %add = fmul half %1, %3
541 %add2 = fsub half %add, %5
542 %6 = bitcast half %add2 to i16
543 %tmp4.0.insert.ext = zext i16 %6 to i32
544 %7 = bitcast i32 %tmp4.0.insert.ext to float
545 ret float %7
546
547; CHECK-LABEL: VNMLS:
548; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
549; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
550}
551
552; 27. VNMUL
553define float @NMul(float %a.coerce, float %b.coerce) {
554entry:
555 %0 = bitcast float %a.coerce to i32
556 %tmp.0.extract.trunc = trunc i32 %0 to i16
557 %1 = bitcast i16 %tmp.0.extract.trunc to half
558 %2 = bitcast float %b.coerce to i32
559 %tmp1.0.extract.trunc = trunc i32 %2 to i16
560 %3 = bitcast i16 %tmp1.0.extract.trunc to half
561 %add = fmul half %1, %3
562 %add2 = fsub half -0.0, %add
563 %4 = bitcast half %add2 to i16
564 %tmp4.0.insert.ext = zext i16 %4 to i32
565 %5 = bitcast i32 %tmp4.0.insert.ext to float
566 ret float %5
567
568; CHECK-LABEL: NMul:
569; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
570}
571
572; 28. VRINTA
573; 29. VRINTM
574; 30. VRINTN
575; 31. VRINTP
576; 32. VRINTR
577; 33. VRINTX
578; 34. VRINTZ
579; 35. VSELEQ
580; 36. VSELGE
581; 37. VSELGT
582; 38. VSELVS
583; 39. VSQRT
584
585; 40. VSUB
586define float @Sub(float %a.coerce, float %b.coerce) {
587entry:
588 %0 = bitcast float %a.coerce to i32
589 %tmp.0.extract.trunc = trunc i32 %0 to i16
590 %1 = bitcast i16 %tmp.0.extract.trunc to half
591 %2 = bitcast float %b.coerce to i32
592 %tmp1.0.extract.trunc = trunc i32 %2 to i16
593 %3 = bitcast i16 %tmp1.0.extract.trunc to half
594 %add = fsub half %1, %3
595 %4 = bitcast half %add to i16
596 %tmp4.0.insert.ext = zext i16 %4 to i32
597 %5 = bitcast i32 %tmp4.0.insert.ext to float
598 ret float %5
599
600; CHECK-LABEL: Sub:
601
602; CHECK-SOFT: bl __aeabi_h2f
603; CHECK-SOFT: bl __aeabi_h2f
604; CHECK-SOFT: bl __aeabi_fsub
605; CHECK-SOFT: bl __aeabi_f2h
606
607; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
608; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
609; CHECK-SOFTFP-VFP3: vsub.f32
610; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
611
612; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
613; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
614; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
615; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
616; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
617; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
618; CHECK-SOFTFP-FP16: vmov r0, s0
619
620; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
621; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
622; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
623; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
624
625; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
626; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
627; CHECK-HARDFP-VFP3: bl __aeabi_h2f
628; CHECK-HARDFP-VFP3: bl __aeabi_h2f
629; CHECK-HARDFP-VFP3: vsub.f32
630; CHECK-HARDFP-VFP3: bl __aeabi_f2h
631; CHECK-HARDFP-VFP3: vmov s0, r0
632
633; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
634; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
635; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
636; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
637
638; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
Sjoerd Meijer011de9c2018-01-26 09:26:40 +0000639}