blob: 75a11a531991fdaed29c740813db28f646865565 [file] [log] [blame]
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00001; SOFT:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00002; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer89ea2642018-02-06 08:43:56 +00003; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00004
5; SOFTFP:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00006; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00009
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000010; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +000014; Test fast-isel
15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
17
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000018; HARD:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000019; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000023; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000026
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000027; FP-CONTRACT=FAST
28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
30
31
32define float @RetValBug(float %A.coerce) {
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000033entry:
34 ret float undef
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000035; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
36; any operands) when FullFP16 is enabled.
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000037;
38; CHECK-LABEL: RetValBug:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000039; CHECK-HARDFP-FULLFP16: {{.*}} lr
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000040}
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000041
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000042; 1. VABS: TODO
43
44; 2. VADD
45define float @Add(float %a.coerce, float %b.coerce) {
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000046entry:
47 %0 = bitcast float %a.coerce to i32
48 %tmp.0.extract.trunc = trunc i32 %0 to i16
49 %1 = bitcast i16 %tmp.0.extract.trunc to half
50 %2 = bitcast float %b.coerce to i32
51 %tmp1.0.extract.trunc = trunc i32 %2 to i16
52 %3 = bitcast i16 %tmp1.0.extract.trunc to half
53 %add = fadd half %1, %3
54 %4 = bitcast half %add to i16
55 %tmp4.0.insert.ext = zext i16 %4 to i32
56 %5 = bitcast i32 %tmp4.0.insert.ext to float
57 ret float %5
58
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000059; CHECK-LABEL: Add:
60
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000061; CHECK-SOFT: bl __aeabi_h2f
62; CHECK-SOFT: bl __aeabi_h2f
63; CHECK-SOFT: bl __aeabi_fadd
64; CHECK-SOFT: bl __aeabi_f2h
65
66; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
67; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
68; CHECK-SOFTFP-VFP3: vadd.f32
69; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
70
71; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
72; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
73; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
74; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
75; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
76; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
77; CHECK-SOFTFP-FP16: vmov r0, s0
78
Sjoerd Meijer98d53592018-01-31 10:18:29 +000079; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
80; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
81; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
82; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000083
84; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
85; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
86; CHECK-HARDFP-VFP3: bl __aeabi_h2f
87; CHECK-HARDFP-VFP3: bl __aeabi_h2f
88; CHECK-HARDFP-VFP3: vadd.f32
89; CHECK-HARDFP-VFP3: bl __aeabi_f2h
90; CHECK-HARDFP-VFP3: vmov s0, r0
91
92; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
93; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
94; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
95; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
96
97; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000098}
99
100; 3. VCMP
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000101define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000102entry:
103 %0 = bitcast float %F.coerce to i32
104 %tmp.0.extract.trunc = trunc i32 %0 to i16
105 %1 = bitcast i16 %tmp.0.extract.trunc to half
106 %2 = bitcast float %G.coerce to i32
107 %tmp1.0.extract.trunc = trunc i32 %2 to i16
108 %3 = bitcast i16 %tmp1.0.extract.trunc to half
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000109 %cmp = fcmp une half %1, %3
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000110 ret i1 %cmp
111
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000112; CHECK-LABEL: VCMP1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000113
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000114; CHECK-SOFT: bl __aeabi_fcmpeq
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000115
116; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
117; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000118; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000119
120; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
121; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000122; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000123
124; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
125; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000126; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000127
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000128; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
129; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
130; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
131}
132
133; Check VCMPZH
134define zeroext i1 @VCMP2(float %F.coerce) {
135entry:
136 %0 = bitcast float %F.coerce to i32
137 %tmp.0.extract.trunc = trunc i32 %0 to i16
138 %1 = bitcast i16 %tmp.0.extract.trunc to half
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000139 %cmp = fcmp une half %1, 0.000000e+00
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000140 ret i1 %cmp
141
142; CHECK-LABEL: VCMP2:
143
144; CHECK-SOFT: bl __aeabi_fcmpeq
145; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
146; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
147; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000148}
149
150; 4. VCMPE
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000151define i32 @VCMPE1(float %F.coerce) {
152entry:
153 %0 = bitcast float %F.coerce to i32
154 %tmp.0.extract.trunc = trunc i32 %0 to i16
155 %1 = bitcast i16 %tmp.0.extract.trunc to half
156 %tmp = fcmp olt half %1, 0.000000e+00
157 %tmp1 = zext i1 %tmp to i32
158 ret i32 %tmp1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000159
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000160; CHECK-LABEL: VCMPE1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000161
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000162; CHECK-SOFT: bl __aeabi_fcmplt
163; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
164; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
165; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
166}
167
168define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000169entry:
170 %0 = bitcast float %F.coerce to i32
171 %tmp.0.extract.trunc = trunc i32 %0 to i16
172 %1 = bitcast i16 %tmp.0.extract.trunc to half
173 %2 = bitcast float %G.coerce to i32
174 %tmp.1.extract.trunc = trunc i32 %2 to i16
175 %3 = bitcast i16 %tmp.1.extract.trunc to half
176 %tmp = fcmp olt half %1, %3
177 %tmp1 = zext i1 %tmp to i32
178 ret i32 %tmp1
179
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000180; CHECK-LABEL: VCMPE2:
181
182; CHECK-SOFT: bl __aeabi_fcmplt
183; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
184; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
185; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000186}
187
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000188; Test lowering of BR_CC
189define hidden i32 @VCMPBRCC() {
190entry:
191 %f = alloca half, align 2
192 br label %for.cond
193
194for.cond:
195 %0 = load half, half* %f, align 2
196 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
197 br i1 %cmp, label %for.body, label %for.end
198
199for.body:
200 ret i32 1
201
202for.end:
203 ret i32 0
204
205; CHECK-LABEL: VCMPBRCC:
206
207; CHECK-SOFT: bl __aeabi_fcmple
208; CHECK-SOFT: cmp r0, #0
209
210; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
211; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
212; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
213
214; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
215; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
216}
217
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000218; 5. VCVT (between floating-point and fixed-point)
219; Only assembly/disassembly support
220
221; 6. VCVT (between floating-point and integer, both directions)
222define i32 @fptosi(i32 %A.coerce) {
223entry:
224 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
225 %0 = bitcast i16 %tmp.0.extract.trunc to half
226 %conv = fptosi half %0 to i32
227 ret i32 %conv
228
229; CHECK-LABEL: fptosi:
230
231; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
232; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
233; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
234}
235
236define i32 @fptoui(i32 %A.coerce) {
237entry:
238 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
239 %0 = bitcast i16 %tmp.0.extract.trunc to half
240 %conv = fptoui half %0 to i32
241 ret i32 %conv
242
243; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
244; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
245}
246
247define float @UintToH(i32 %a, i32 %b) {
248entry:
249 %0 = uitofp i32 %a to half
250 %1 = bitcast half %0 to i16
251 %tmp0.insert.ext = zext i16 %1 to i32
252 %2 = bitcast i32 %tmp0.insert.ext to float
253 ret float %2
254
255; CHECK-LABEL: UintToH:
256
257; CHECK-HARDFP-FULLFP16: vmov s0, r0
258; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
259}
260
261define float @SintToH(i32 %a, i32 %b) {
262entry:
263 %0 = sitofp i32 %a to half
264 %1 = bitcast half %0 to i16
265 %tmp0.insert.ext = zext i16 %1 to i32
266 %2 = bitcast i32 %tmp0.insert.ext to float
267 ret float %2
268
269; CHECK-LABEL: SintToH:
270
271; CHECK-HARDFP-FULLFP16: vmov s0, r0
272; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
273}
274
Sjoerd Meijerd2718ba2018-02-06 16:28:43 +0000275define i32 @f2h(float %f) {
276entry:
277 %conv = fptrunc float %f to half
278 %0 = bitcast half %conv to i16
279 %tmp.0.insert.ext = zext i16 %0 to i32
280 ret i32 %tmp.0.insert.ext
281
282; CHECK-LABEL: f2h:
283; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
284}
285
286define float @h2f(i32 %h.coerce) {
287entry:
288 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
289 %0 = bitcast i16 %tmp.0.extract.trunc to half
290 %conv = fpext half %0 to float
291 ret float %conv
292
293; CHECK-LABEL: h2f:
294; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
295}
296
297
298define double @h2d(i32 %h.coerce) {
299entry:
300 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
301 %0 = bitcast i16 %tmp.0.extract.trunc to half
302 %conv = fpext half %0 to double
303 ret double %conv
304
305; CHECK-LABEL: h2d:
306; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
307}
308
309define i32 @d2h(double %d) {
310entry:
311 %conv = fptrunc double %d to half
312 %0 = bitcast half %conv to i16
313 %tmp.0.insert.ext = zext i16 %0 to i32
314 ret i32 %tmp.0.insert.ext
315
316; CHECK-LABEL: d2h:
317; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
318}
319
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000320; TODO:
321; 7. VCVTA
322; 8. VCVTM
323; 9. VCVTN
324; 10. VCVTP
325; 11. VCVTR
326
327; 12. VDIV
328define float @Div(float %a.coerce, float %b.coerce) {
329entry:
330 %0 = bitcast float %a.coerce to i32
331 %tmp.0.extract.trunc = trunc i32 %0 to i16
332 %1 = bitcast i16 %tmp.0.extract.trunc to half
333 %2 = bitcast float %b.coerce to i32
334 %tmp1.0.extract.trunc = trunc i32 %2 to i16
335 %3 = bitcast i16 %tmp1.0.extract.trunc to half
336 %add = fdiv half %1, %3
337 %4 = bitcast half %add to i16
338 %tmp4.0.insert.ext = zext i16 %4 to i32
339 %5 = bitcast i32 %tmp4.0.insert.ext to float
340 ret float %5
341
342; CHECK-LABEL: Div:
343
344; CHECK-SOFT: bl __aeabi_h2f
345; CHECK-SOFT: bl __aeabi_h2f
346; CHECK-SOFT: bl __aeabi_fdiv
347; CHECK-SOFT: bl __aeabi_f2h
348
349; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
350; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
351; CHECK-SOFTFP-VFP3: vdiv.f32
352; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
353
354; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
355; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
356; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
357; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
358; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
359; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
360; CHECK-SOFTFP-FP16: vmov r0, s0
361
362; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
363; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
364; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
365; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
366
367; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
368; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
369; CHECK-HARDFP-VFP3: bl __aeabi_h2f
370; CHECK-HARDFP-VFP3: bl __aeabi_h2f
371; CHECK-HARDFP-VFP3: vdiv.f32
372; CHECK-HARDFP-VFP3: bl __aeabi_f2h
373; CHECK-HARDFP-VFP3: vmov s0, r0
374
375; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
376; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
377; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
378; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
379
380; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
381}
382
383; 13. VFMA
384define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
385entry:
386 %0 = bitcast float %a.coerce to i32
387 %tmp.0.extract.trunc = trunc i32 %0 to i16
388 %1 = bitcast i16 %tmp.0.extract.trunc to half
389 %2 = bitcast float %b.coerce to i32
390 %tmp1.0.extract.trunc = trunc i32 %2 to i16
391 %3 = bitcast i16 %tmp1.0.extract.trunc to half
392 %4 = bitcast float %c.coerce to i32
393 %tmp2.0.extract.trunc = trunc i32 %4 to i16
394 %5 = bitcast i16 %tmp2.0.extract.trunc to half
395 %mul = fmul half %1, %3
396 %add = fadd half %mul, %5
397 %6 = bitcast half %add to i16
398 %tmp4.0.insert.ext = zext i16 %6 to i32
399 %7 = bitcast i32 %tmp4.0.insert.ext to float
400 ret float %7
401
402; CHECK-LABEL: VFMA:
403; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
404; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
405}
406
407; 14. VFMS
408define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
409entry:
410 %0 = bitcast float %a.coerce to i32
411 %tmp.0.extract.trunc = trunc i32 %0 to i16
412 %1 = bitcast i16 %tmp.0.extract.trunc to half
413 %2 = bitcast float %b.coerce to i32
414 %tmp1.0.extract.trunc = trunc i32 %2 to i16
415 %3 = bitcast i16 %tmp1.0.extract.trunc to half
416 %4 = bitcast float %c.coerce to i32
417 %tmp2.0.extract.trunc = trunc i32 %4 to i16
418 %5 = bitcast i16 %tmp2.0.extract.trunc to half
419 %mul = fmul half %1, %3
420 %sub = fsub half %5, %mul
421 %6 = bitcast half %sub to i16
422 %tmp4.0.insert.ext = zext i16 %6 to i32
423 %7 = bitcast i32 %tmp4.0.insert.ext to float
424 ret float %7
425
426; CHECK-LABEL: VFMS:
427; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
428; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
429}
430
431; 15. VFNMA
432define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
433entry:
434 %0 = bitcast float %a.coerce to i32
435 %tmp.0.extract.trunc = trunc i32 %0 to i16
436 %1 = bitcast i16 %tmp.0.extract.trunc to half
437 %2 = bitcast float %b.coerce to i32
438 %tmp1.0.extract.trunc = trunc i32 %2 to i16
439 %3 = bitcast i16 %tmp1.0.extract.trunc to half
440 %4 = bitcast float %c.coerce to i32
441 %tmp2.0.extract.trunc = trunc i32 %4 to i16
442 %5 = bitcast i16 %tmp2.0.extract.trunc to half
443 %mul = fmul half %1, %3
444 %sub = fsub half -0.0, %mul
445 %sub2 = fsub half %sub, %5
446 %6 = bitcast half %sub2 to i16
447 %tmp4.0.insert.ext = zext i16 %6 to i32
448 %7 = bitcast i32 %tmp4.0.insert.ext to float
449 ret float %7
450
451; CHECK-LABEL: VFNMA:
452; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
453; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
454}
455
456; 16. VFNMS
457define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
458entry:
459 %0 = bitcast float %a.coerce to i32
460 %tmp.0.extract.trunc = trunc i32 %0 to i16
461 %1 = bitcast i16 %tmp.0.extract.trunc to half
462 %2 = bitcast float %b.coerce to i32
463 %tmp1.0.extract.trunc = trunc i32 %2 to i16
464 %3 = bitcast i16 %tmp1.0.extract.trunc to half
465 %4 = bitcast float %c.coerce to i32
466 %tmp2.0.extract.trunc = trunc i32 %4 to i16
467 %5 = bitcast i16 %tmp2.0.extract.trunc to half
468 %mul = fmul half %1, %3
469 %sub2 = fsub half %mul, %5
470 %6 = bitcast half %sub2 to i16
471 %tmp4.0.insert.ext = zext i16 %6 to i32
472 %7 = bitcast i32 %tmp4.0.insert.ext to float
473 ret float %7
474
475; CHECK-LABEL: VFNMS:
476; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
477; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
478}
479
480; TODO:
481; 17. VMAXNM
482; 18. VMINNM
483
484; 19. VMLA
485define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
486entry:
487 %0 = bitcast float %a.coerce to i32
488 %tmp.0.extract.trunc = trunc i32 %0 to i16
489 %1 = bitcast i16 %tmp.0.extract.trunc to half
490 %2 = bitcast float %b.coerce to i32
491 %tmp1.0.extract.trunc = trunc i32 %2 to i16
492 %3 = bitcast i16 %tmp1.0.extract.trunc to half
493 %4 = bitcast float %c.coerce to i32
494 %tmp2.0.extract.trunc = trunc i32 %4 to i16
495 %5 = bitcast i16 %tmp2.0.extract.trunc to half
496 %mul = fmul half %1, %3
497 %add = fadd half %5, %mul
498 %6 = bitcast half %add to i16
499 %tmp4.0.insert.ext = zext i16 %6 to i32
500 %7 = bitcast i32 %tmp4.0.insert.ext to float
501 ret float %7
502
503; CHECK-LABEL: VMLA:
504; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
505; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
506}
507
508; 20. VMLS
509define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
510entry:
511 %0 = bitcast float %a.coerce to i32
512 %tmp.0.extract.trunc = trunc i32 %0 to i16
513 %1 = bitcast i16 %tmp.0.extract.trunc to half
514 %2 = bitcast float %b.coerce to i32
515 %tmp1.0.extract.trunc = trunc i32 %2 to i16
516 %3 = bitcast i16 %tmp1.0.extract.trunc to half
517 %4 = bitcast float %c.coerce to i32
518 %tmp2.0.extract.trunc = trunc i32 %4 to i16
519 %5 = bitcast i16 %tmp2.0.extract.trunc to half
520 %mul = fmul half %1, %3
521 %add = fsub half %5, %mul
522 %6 = bitcast half %add to i16
523 %tmp4.0.insert.ext = zext i16 %6 to i32
524 %7 = bitcast i32 %tmp4.0.insert.ext to float
525 ret float %7
526
527; CHECK-LABEL: VMLS:
528; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
529; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
530}
531
532; TODO: fix immediates.
533; 21. VMOV (between general-purpose register and half-precision register)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000534
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000535; 22. VMOV (immediate)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000536define i32 @movi(i32 %a.coerce) {
537entry:
538 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
539 %0 = bitcast i16 %tmp.0.extract.trunc to half
540 %add = fadd half %0, 0xHC000
541 %1 = bitcast half %add to i16
542 %tmp2.0.insert.ext = zext i16 %1 to i32
543 ret i32 %tmp2.0.insert.ext
544
545; CHECK-LABEL: movi:
546; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
547}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000548
549; 23. VMUL
550define float @Mul(float %a.coerce, float %b.coerce) {
551entry:
552 %0 = bitcast float %a.coerce to i32
553 %tmp.0.extract.trunc = trunc i32 %0 to i16
554 %1 = bitcast i16 %tmp.0.extract.trunc to half
555 %2 = bitcast float %b.coerce to i32
556 %tmp1.0.extract.trunc = trunc i32 %2 to i16
557 %3 = bitcast i16 %tmp1.0.extract.trunc to half
558 %add = fmul half %1, %3
559 %4 = bitcast half %add to i16
560 %tmp4.0.insert.ext = zext i16 %4 to i32
561 %5 = bitcast i32 %tmp4.0.insert.ext to float
562 ret float %5
563
564; CHECK-LABEL: Mul:
565
566; CHECK-SOFT: bl __aeabi_h2f
567; CHECK-SOFT: bl __aeabi_h2f
568; CHECK-SOFT: bl __aeabi_fmul
569; CHECK-SOFT: bl __aeabi_f2h
570
571; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
572; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
573; CHECK-SOFTFP-VFP3: vmul.f32
574; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
575
576; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
577; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
578; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
579; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
580; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
581; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
582; CHECK-SOFTFP-FP16: vmov r0, s0
583
584; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
585; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
586; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
587; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
588
589; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
590; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
591; CHECK-HARDFP-VFP3: bl __aeabi_h2f
592; CHECK-HARDFP-VFP3: bl __aeabi_h2f
593; CHECK-HARDFP-VFP3: vmul.f32
594; CHECK-HARDFP-VFP3: bl __aeabi_f2h
595; CHECK-HARDFP-VFP3: vmov s0, r0
596
597; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
598; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
599; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
600; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
601
602; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
603}
604
605; 24. VNEG
606define float @Neg(float %a.coerce) {
607entry:
608 %0 = bitcast float %a.coerce to i32
609 %tmp.0.extract.trunc = trunc i32 %0 to i16
610 %1 = bitcast i16 %tmp.0.extract.trunc to half
611 %2 = fsub half -0.000000e+00, %1
612 %3 = bitcast half %2 to i16
613 %tmp4.0.insert.ext = zext i16 %3 to i32
614 %4 = bitcast i32 %tmp4.0.insert.ext to float
615 ret float %4
616
617; CHECK-LABEL: Neg:
618; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
619}
620
621; 25. VNMLA
622define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
623entry:
624 %0 = bitcast float %a.coerce to i32
625 %tmp.0.extract.trunc = trunc i32 %0 to i16
626 %1 = bitcast i16 %tmp.0.extract.trunc to half
627 %2 = bitcast float %b.coerce to i32
628 %tmp1.0.extract.trunc = trunc i32 %2 to i16
629 %3 = bitcast i16 %tmp1.0.extract.trunc to half
630 %4 = bitcast float %c.coerce to i32
631 %tmp2.0.extract.trunc = trunc i32 %4 to i16
632 %5 = bitcast i16 %tmp2.0.extract.trunc to half
633 %add = fmul half %1, %3
634 %add2 = fsub half -0.000000e+00, %add
635 %add3 = fsub half %add2, %5
636 %6 = bitcast half %add3 to i16
637 %tmp4.0.insert.ext = zext i16 %6 to i32
638 %7 = bitcast i32 %tmp4.0.insert.ext to float
639 ret float %7
640
641; CHECK-LABEL: VNMLA:
642; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
643; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
644}
645
646; 26. VNMLS
647define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
648entry:
649 %0 = bitcast float %a.coerce to i32
650 %tmp.0.extract.trunc = trunc i32 %0 to i16
651 %1 = bitcast i16 %tmp.0.extract.trunc to half
652 %2 = bitcast float %b.coerce to i32
653 %tmp1.0.extract.trunc = trunc i32 %2 to i16
654 %3 = bitcast i16 %tmp1.0.extract.trunc to half
655 %4 = bitcast float %c.coerce to i32
656 %tmp2.0.extract.trunc = trunc i32 %4 to i16
657 %5 = bitcast i16 %tmp2.0.extract.trunc to half
658 %add = fmul half %1, %3
659 %add2 = fsub half %add, %5
660 %6 = bitcast half %add2 to i16
661 %tmp4.0.insert.ext = zext i16 %6 to i32
662 %7 = bitcast i32 %tmp4.0.insert.ext to float
663 ret float %7
664
665; CHECK-LABEL: VNMLS:
666; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
667; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
668}
669
670; 27. VNMUL
671define float @NMul(float %a.coerce, float %b.coerce) {
672entry:
673 %0 = bitcast float %a.coerce to i32
674 %tmp.0.extract.trunc = trunc i32 %0 to i16
675 %1 = bitcast i16 %tmp.0.extract.trunc to half
676 %2 = bitcast float %b.coerce to i32
677 %tmp1.0.extract.trunc = trunc i32 %2 to i16
678 %3 = bitcast i16 %tmp1.0.extract.trunc to half
679 %add = fmul half %1, %3
680 %add2 = fsub half -0.0, %add
681 %4 = bitcast half %add2 to i16
682 %tmp4.0.insert.ext = zext i16 %4 to i32
683 %5 = bitcast i32 %tmp4.0.insert.ext to float
684 ret float %5
685
686; CHECK-LABEL: NMul:
687; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
688}
689
690; 28. VRINTA
691; 29. VRINTM
692; 30. VRINTN
693; 31. VRINTP
694; 32. VRINTR
695; 33. VRINTX
696; 34. VRINTZ
697; 35. VSELEQ
698; 36. VSELGE
699; 37. VSELGT
700; 38. VSELVS
701; 39. VSQRT
702
703; 40. VSUB
704define float @Sub(float %a.coerce, float %b.coerce) {
705entry:
706 %0 = bitcast float %a.coerce to i32
707 %tmp.0.extract.trunc = trunc i32 %0 to i16
708 %1 = bitcast i16 %tmp.0.extract.trunc to half
709 %2 = bitcast float %b.coerce to i32
710 %tmp1.0.extract.trunc = trunc i32 %2 to i16
711 %3 = bitcast i16 %tmp1.0.extract.trunc to half
712 %add = fsub half %1, %3
713 %4 = bitcast half %add to i16
714 %tmp4.0.insert.ext = zext i16 %4 to i32
715 %5 = bitcast i32 %tmp4.0.insert.ext to float
716 ret float %5
717
718; CHECK-LABEL: Sub:
719
720; CHECK-SOFT: bl __aeabi_h2f
721; CHECK-SOFT: bl __aeabi_h2f
722; CHECK-SOFT: bl __aeabi_fsub
723; CHECK-SOFT: bl __aeabi_f2h
724
725; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
726; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
727; CHECK-SOFTFP-VFP3: vsub.f32
728; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
729
730; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
731; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
732; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
733; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
734; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
735; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
736; CHECK-SOFTFP-FP16: vmov r0, s0
737
738; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
739; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
740; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
741; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
742
743; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
744; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
745; CHECK-HARDFP-VFP3: bl __aeabi_h2f
746; CHECK-HARDFP-VFP3: bl __aeabi_h2f
747; CHECK-HARDFP-VFP3: vsub.f32
748; CHECK-HARDFP-VFP3: bl __aeabi_f2h
749; CHECK-HARDFP-VFP3: vmov s0, r0
750
751; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
752; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
753; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
754; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
755
756; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
Sjoerd Meijer011de9c2018-01-26 09:26:40 +0000757}
Sjoerd Meijer101ee432018-02-13 10:29:03 +0000758
759; Check for VSTRH with a FCONSTH, this checks that addressing mode
760; AddrMode5FP16 is supported.
761define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
762entry:
763 %S = alloca half, align 2
764 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
765 %0 = bitcast i16 %tmp.0.extract.trunc to half
766 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
767 store volatile half 0xH3C00, half* %S, align 2
768 %S.0.S.0. = load volatile half, half* %S, align 2
769 %add = fadd half %S.0.S.0., %0
770 %1 = bitcast half %add to i16
771 %tmp2.0.insert.ext = zext i16 %1 to i32
772 ret i32 %tmp2.0.insert.ext
773
774; CHECK-LABEL: ThumbAddrMode5FP16
775
776; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
777; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
778; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
779; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
780; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
781}
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +0000782
783; Test function calls to check store/load reg to/from stack
784define i32 @fn1() {
785entry:
786 %coerce = alloca half, align 2
787 %tmp2 = alloca i32, align 4
788 store half 0xH7C00, half* %coerce, align 2
789 %0 = load i32, i32* %tmp2, align 4
790 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
791 store half 0xH7C00, half* %coerce, align 2
792 %1 = load i32, i32* %tmp2, align 4
793 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
794 ret i32 %call3
795
796; CHECK-SPILL-RELOAD-LABEL: fn1:
797; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
798; CHECK-SPILL-RELOAD-NEXT: bl fn2
799; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
800}
801
802declare dso_local i32 @fn2(...)
803declare dso_local i32 @fn3(...)