blob: 03deb2e3ef2ec8ae5466a9c214bb3731d2646af5 [file] [log] [blame]
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00001; SOFT:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00002; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer89ea2642018-02-06 08:43:56 +00003; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00004
5; SOFTFP:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00006; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +00007; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00008; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00009
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000010; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +000011; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000012; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +000014; Test fast-isel
15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
17
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000018; HARD:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000019; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000023; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000026
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000027; FP-CONTRACT=FAST
28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
30
Sjoerd Meijer834f7dc2018-04-13 15:34:26 +000031; TODO: we can't pass half-precision arguments as "half" types yet. We do
32; that for the time being by passing "float %f.coerce" and the necessary
33; bitconverts/truncates. But when we can pass half types, we do want to use
34; and test that here.
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000035
36define float @RetValBug(float %A.coerce) {
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000037entry:
38 ret float undef
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000039; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
40; any operands) when FullFP16 is enabled.
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000041;
42; CHECK-LABEL: RetValBug:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000043; CHECK-HARDFP-FULLFP16: {{.*}} lr
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000044}
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000045
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000046; 1. VABS: TODO
47
48; 2. VADD
49define float @Add(float %a.coerce, float %b.coerce) {
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000050entry:
51 %0 = bitcast float %a.coerce to i32
52 %tmp.0.extract.trunc = trunc i32 %0 to i16
53 %1 = bitcast i16 %tmp.0.extract.trunc to half
54 %2 = bitcast float %b.coerce to i32
55 %tmp1.0.extract.trunc = trunc i32 %2 to i16
56 %3 = bitcast i16 %tmp1.0.extract.trunc to half
57 %add = fadd half %1, %3
58 %4 = bitcast half %add to i16
59 %tmp4.0.insert.ext = zext i16 %4 to i32
60 %5 = bitcast i32 %tmp4.0.insert.ext to float
61 ret float %5
62
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000063; CHECK-LABEL: Add:
64
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000065; CHECK-SOFT: bl __aeabi_h2f
66; CHECK-SOFT: bl __aeabi_h2f
67; CHECK-SOFT: bl __aeabi_fadd
68; CHECK-SOFT: bl __aeabi_f2h
69
70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
72; CHECK-SOFTFP-VFP3: vadd.f32
73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
74
75; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
76; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
77; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
78; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
81; CHECK-SOFTFP-FP16: vmov r0, s0
82
Sjoerd Meijer98d53592018-01-31 10:18:29 +000083; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000087
88; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
89; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
90; CHECK-HARDFP-VFP3: bl __aeabi_h2f
91; CHECK-HARDFP-VFP3: bl __aeabi_h2f
92; CHECK-HARDFP-VFP3: vadd.f32
93; CHECK-HARDFP-VFP3: bl __aeabi_f2h
94; CHECK-HARDFP-VFP3: vmov s0, r0
95
96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
100
101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000102}
103
104; 3. VCMP
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000106entry:
107 %0 = bitcast float %F.coerce to i32
108 %tmp.0.extract.trunc = trunc i32 %0 to i16
109 %1 = bitcast i16 %tmp.0.extract.trunc to half
110 %2 = bitcast float %G.coerce to i32
111 %tmp1.0.extract.trunc = trunc i32 %2 to i16
112 %3 = bitcast i16 %tmp1.0.extract.trunc to half
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000113 %cmp = fcmp une half %1, %3
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000114 ret i1 %cmp
115
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000116; CHECK-LABEL: VCMP1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000117
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000118; CHECK-SOFT: bl __aeabi_fcmpeq
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000119
120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000123
124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000127
128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000131
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
135}
136
137; Check VCMPZH
138define zeroext i1 @VCMP2(float %F.coerce) {
139entry:
140 %0 = bitcast float %F.coerce to i32
141 %tmp.0.extract.trunc = trunc i32 %0 to i16
142 %1 = bitcast i16 %tmp.0.extract.trunc to half
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000143 %cmp = fcmp une half %1, 0.000000e+00
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000144 ret i1 %cmp
145
146; CHECK-LABEL: VCMP2:
147
148; CHECK-SOFT: bl __aeabi_fcmpeq
149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000152}
153
154; 4. VCMPE
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000155define i32 @VCMPE1(float %F.coerce) {
156entry:
157 %0 = bitcast float %F.coerce to i32
158 %tmp.0.extract.trunc = trunc i32 %0 to i16
159 %1 = bitcast i16 %tmp.0.extract.trunc to half
160 %tmp = fcmp olt half %1, 0.000000e+00
161 %tmp1 = zext i1 %tmp to i32
162 ret i32 %tmp1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000163
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000164; CHECK-LABEL: VCMPE1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000165
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000166; CHECK-SOFT: bl __aeabi_fcmplt
167; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
168; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
169; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
170}
171
172define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000173entry:
174 %0 = bitcast float %F.coerce to i32
175 %tmp.0.extract.trunc = trunc i32 %0 to i16
176 %1 = bitcast i16 %tmp.0.extract.trunc to half
177 %2 = bitcast float %G.coerce to i32
178 %tmp.1.extract.trunc = trunc i32 %2 to i16
179 %3 = bitcast i16 %tmp.1.extract.trunc to half
180 %tmp = fcmp olt half %1, %3
181 %tmp1 = zext i1 %tmp to i32
182 ret i32 %tmp1
183
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000184; CHECK-LABEL: VCMPE2:
185
186; CHECK-SOFT: bl __aeabi_fcmplt
187; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
188; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
189; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000190}
191
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000192; Test lowering of BR_CC
193define hidden i32 @VCMPBRCC() {
194entry:
195 %f = alloca half, align 2
196 br label %for.cond
197
198for.cond:
199 %0 = load half, half* %f, align 2
200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201 br i1 %cmp, label %for.body, label %for.end
202
203for.body:
204 ret i32 1
205
206for.end:
207 ret i32 0
208
209; CHECK-LABEL: VCMPBRCC:
210
211; CHECK-SOFT: bl __aeabi_fcmple
212; CHECK-SOFT: cmp r0, #0
213
214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
217
218; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
220}
221
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000222; 5. VCVT (between floating-point and fixed-point)
223; Only assembly/disassembly support
224
225; 6. VCVT (between floating-point and integer, both directions)
226define i32 @fptosi(i32 %A.coerce) {
227entry:
228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229 %0 = bitcast i16 %tmp.0.extract.trunc to half
230 %conv = fptosi half %0 to i32
231 ret i32 %conv
232
233; CHECK-LABEL: fptosi:
234
235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
238}
239
240define i32 @fptoui(i32 %A.coerce) {
241entry:
242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243 %0 = bitcast i16 %tmp.0.extract.trunc to half
244 %conv = fptoui half %0 to i32
245 ret i32 %conv
246
247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
249}
250
251define float @UintToH(i32 %a, i32 %b) {
252entry:
253 %0 = uitofp i32 %a to half
254 %1 = bitcast half %0 to i16
255 %tmp0.insert.ext = zext i16 %1 to i32
256 %2 = bitcast i32 %tmp0.insert.ext to float
257 ret float %2
258
259; CHECK-LABEL: UintToH:
260
261; CHECK-HARDFP-FULLFP16: vmov s0, r0
262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
263}
264
265define float @SintToH(i32 %a, i32 %b) {
266entry:
267 %0 = sitofp i32 %a to half
268 %1 = bitcast half %0 to i16
269 %tmp0.insert.ext = zext i16 %1 to i32
270 %2 = bitcast i32 %tmp0.insert.ext to float
271 ret float %2
272
273; CHECK-LABEL: SintToH:
274
275; CHECK-HARDFP-FULLFP16: vmov s0, r0
276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
277}
278
Sjoerd Meijerd2718ba2018-02-06 16:28:43 +0000279define i32 @f2h(float %f) {
280entry:
281 %conv = fptrunc float %f to half
282 %0 = bitcast half %conv to i16
283 %tmp.0.insert.ext = zext i16 %0 to i32
284 ret i32 %tmp.0.insert.ext
285
286; CHECK-LABEL: f2h:
287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
288}
289
290define float @h2f(i32 %h.coerce) {
291entry:
292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293 %0 = bitcast i16 %tmp.0.extract.trunc to half
294 %conv = fpext half %0 to float
295 ret float %conv
296
297; CHECK-LABEL: h2f:
298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
299}
300
301
302define double @h2d(i32 %h.coerce) {
303entry:
304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305 %0 = bitcast i16 %tmp.0.extract.trunc to half
306 %conv = fpext half %0 to double
307 ret double %conv
308
309; CHECK-LABEL: h2d:
310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
311}
312
313define i32 @d2h(double %d) {
314entry:
315 %conv = fptrunc double %d to half
316 %0 = bitcast half %conv to i16
317 %tmp.0.insert.ext = zext i16 %0 to i32
318 ret i32 %tmp.0.insert.ext
319
320; CHECK-LABEL: d2h:
321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
322}
323
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000324; TODO:
325; 7. VCVTA
326; 8. VCVTM
327; 9. VCVTN
328; 10. VCVTP
329; 11. VCVTR
330
331; 12. VDIV
332define float @Div(float %a.coerce, float %b.coerce) {
333entry:
334 %0 = bitcast float %a.coerce to i32
335 %tmp.0.extract.trunc = trunc i32 %0 to i16
336 %1 = bitcast i16 %tmp.0.extract.trunc to half
337 %2 = bitcast float %b.coerce to i32
338 %tmp1.0.extract.trunc = trunc i32 %2 to i16
339 %3 = bitcast i16 %tmp1.0.extract.trunc to half
340 %add = fdiv half %1, %3
341 %4 = bitcast half %add to i16
342 %tmp4.0.insert.ext = zext i16 %4 to i32
343 %5 = bitcast i32 %tmp4.0.insert.ext to float
344 ret float %5
345
346; CHECK-LABEL: Div:
347
348; CHECK-SOFT: bl __aeabi_h2f
349; CHECK-SOFT: bl __aeabi_h2f
350; CHECK-SOFT: bl __aeabi_fdiv
351; CHECK-SOFT: bl __aeabi_f2h
352
353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
355; CHECK-SOFTFP-VFP3: vdiv.f32
356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
357
358; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
359; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
360; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
361; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
364; CHECK-SOFTFP-FP16: vmov r0, s0
365
366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
370
371; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
372; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
373; CHECK-HARDFP-VFP3: bl __aeabi_h2f
374; CHECK-HARDFP-VFP3: bl __aeabi_h2f
375; CHECK-HARDFP-VFP3: vdiv.f32
376; CHECK-HARDFP-VFP3: bl __aeabi_f2h
377; CHECK-HARDFP-VFP3: vmov s0, r0
378
379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
383
384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
385}
386
387; 13. VFMA
388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
389entry:
390 %0 = bitcast float %a.coerce to i32
391 %tmp.0.extract.trunc = trunc i32 %0 to i16
392 %1 = bitcast i16 %tmp.0.extract.trunc to half
393 %2 = bitcast float %b.coerce to i32
394 %tmp1.0.extract.trunc = trunc i32 %2 to i16
395 %3 = bitcast i16 %tmp1.0.extract.trunc to half
396 %4 = bitcast float %c.coerce to i32
397 %tmp2.0.extract.trunc = trunc i32 %4 to i16
398 %5 = bitcast i16 %tmp2.0.extract.trunc to half
399 %mul = fmul half %1, %3
400 %add = fadd half %mul, %5
401 %6 = bitcast half %add to i16
402 %tmp4.0.insert.ext = zext i16 %6 to i32
403 %7 = bitcast i32 %tmp4.0.insert.ext to float
404 ret float %7
405
406; CHECK-LABEL: VFMA:
407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
409}
410
411; 14. VFMS
412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
413entry:
414 %0 = bitcast float %a.coerce to i32
415 %tmp.0.extract.trunc = trunc i32 %0 to i16
416 %1 = bitcast i16 %tmp.0.extract.trunc to half
417 %2 = bitcast float %b.coerce to i32
418 %tmp1.0.extract.trunc = trunc i32 %2 to i16
419 %3 = bitcast i16 %tmp1.0.extract.trunc to half
420 %4 = bitcast float %c.coerce to i32
421 %tmp2.0.extract.trunc = trunc i32 %4 to i16
422 %5 = bitcast i16 %tmp2.0.extract.trunc to half
423 %mul = fmul half %1, %3
424 %sub = fsub half %5, %mul
425 %6 = bitcast half %sub to i16
426 %tmp4.0.insert.ext = zext i16 %6 to i32
427 %7 = bitcast i32 %tmp4.0.insert.ext to float
428 ret float %7
429
430; CHECK-LABEL: VFMS:
431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
433}
434
435; 15. VFNMA
436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
437entry:
438 %0 = bitcast float %a.coerce to i32
439 %tmp.0.extract.trunc = trunc i32 %0 to i16
440 %1 = bitcast i16 %tmp.0.extract.trunc to half
441 %2 = bitcast float %b.coerce to i32
442 %tmp1.0.extract.trunc = trunc i32 %2 to i16
443 %3 = bitcast i16 %tmp1.0.extract.trunc to half
444 %4 = bitcast float %c.coerce to i32
445 %tmp2.0.extract.trunc = trunc i32 %4 to i16
446 %5 = bitcast i16 %tmp2.0.extract.trunc to half
447 %mul = fmul half %1, %3
448 %sub = fsub half -0.0, %mul
449 %sub2 = fsub half %sub, %5
450 %6 = bitcast half %sub2 to i16
451 %tmp4.0.insert.ext = zext i16 %6 to i32
452 %7 = bitcast i32 %tmp4.0.insert.ext to float
453 ret float %7
454
455; CHECK-LABEL: VFNMA:
456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
458}
459
460; 16. VFNMS
461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
462entry:
463 %0 = bitcast float %a.coerce to i32
464 %tmp.0.extract.trunc = trunc i32 %0 to i16
465 %1 = bitcast i16 %tmp.0.extract.trunc to half
466 %2 = bitcast float %b.coerce to i32
467 %tmp1.0.extract.trunc = trunc i32 %2 to i16
468 %3 = bitcast i16 %tmp1.0.extract.trunc to half
469 %4 = bitcast float %c.coerce to i32
470 %tmp2.0.extract.trunc = trunc i32 %4 to i16
471 %5 = bitcast i16 %tmp2.0.extract.trunc to half
472 %mul = fmul half %1, %3
473 %sub2 = fsub half %mul, %5
474 %6 = bitcast half %sub2 to i16
475 %tmp4.0.insert.ext = zext i16 %6 to i32
476 %7 = bitcast i32 %tmp4.0.insert.ext to float
477 ret float %7
478
479; CHECK-LABEL: VFNMS:
480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
482}
483
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000484; 17. VMAXNM
485; 18. VMINNM
Sjoerd Meijer834f7dc2018-04-13 15:34:26 +0000486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
487
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000488
489; 19. VMLA
490define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
491entry:
492 %0 = bitcast float %a.coerce to i32
493 %tmp.0.extract.trunc = trunc i32 %0 to i16
494 %1 = bitcast i16 %tmp.0.extract.trunc to half
495 %2 = bitcast float %b.coerce to i32
496 %tmp1.0.extract.trunc = trunc i32 %2 to i16
497 %3 = bitcast i16 %tmp1.0.extract.trunc to half
498 %4 = bitcast float %c.coerce to i32
499 %tmp2.0.extract.trunc = trunc i32 %4 to i16
500 %5 = bitcast i16 %tmp2.0.extract.trunc to half
501 %mul = fmul half %1, %3
502 %add = fadd half %5, %mul
503 %6 = bitcast half %add to i16
504 %tmp4.0.insert.ext = zext i16 %6 to i32
505 %7 = bitcast i32 %tmp4.0.insert.ext to float
506 ret float %7
507
508; CHECK-LABEL: VMLA:
509; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
510; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
511}
512
513; 20. VMLS
514define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
515entry:
516 %0 = bitcast float %a.coerce to i32
517 %tmp.0.extract.trunc = trunc i32 %0 to i16
518 %1 = bitcast i16 %tmp.0.extract.trunc to half
519 %2 = bitcast float %b.coerce to i32
520 %tmp1.0.extract.trunc = trunc i32 %2 to i16
521 %3 = bitcast i16 %tmp1.0.extract.trunc to half
522 %4 = bitcast float %c.coerce to i32
523 %tmp2.0.extract.trunc = trunc i32 %4 to i16
524 %5 = bitcast i16 %tmp2.0.extract.trunc to half
525 %mul = fmul half %1, %3
526 %add = fsub half %5, %mul
527 %6 = bitcast half %add to i16
528 %tmp4.0.insert.ext = zext i16 %6 to i32
529 %7 = bitcast i32 %tmp4.0.insert.ext to float
530 ret float %7
531
532; CHECK-LABEL: VMLS:
533; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
534; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
535}
536
537; TODO: fix immediates.
538; 21. VMOV (between general-purpose register and half-precision register)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000539
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000540; 22. VMOV (immediate)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000541define i32 @movi(i32 %a.coerce) {
542entry:
543 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
544 %0 = bitcast i16 %tmp.0.extract.trunc to half
545 %add = fadd half %0, 0xHC000
546 %1 = bitcast half %add to i16
547 %tmp2.0.insert.ext = zext i16 %1 to i32
548 ret i32 %tmp2.0.insert.ext
549
550; CHECK-LABEL: movi:
551; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
552}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000553
554; 23. VMUL
555define float @Mul(float %a.coerce, float %b.coerce) {
556entry:
557 %0 = bitcast float %a.coerce to i32
558 %tmp.0.extract.trunc = trunc i32 %0 to i16
559 %1 = bitcast i16 %tmp.0.extract.trunc to half
560 %2 = bitcast float %b.coerce to i32
561 %tmp1.0.extract.trunc = trunc i32 %2 to i16
562 %3 = bitcast i16 %tmp1.0.extract.trunc to half
563 %add = fmul half %1, %3
564 %4 = bitcast half %add to i16
565 %tmp4.0.insert.ext = zext i16 %4 to i32
566 %5 = bitcast i32 %tmp4.0.insert.ext to float
567 ret float %5
568
569; CHECK-LABEL: Mul:
570
571; CHECK-SOFT: bl __aeabi_h2f
572; CHECK-SOFT: bl __aeabi_h2f
573; CHECK-SOFT: bl __aeabi_fmul
574; CHECK-SOFT: bl __aeabi_f2h
575
576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
577; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
578; CHECK-SOFTFP-VFP3: vmul.f32
579; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
580
581; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
582; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
583; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
584; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
585; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
586; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
587; CHECK-SOFTFP-FP16: vmov r0, s0
588
589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
590; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
591; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
592; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
593
594; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
595; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
596; CHECK-HARDFP-VFP3: bl __aeabi_h2f
597; CHECK-HARDFP-VFP3: bl __aeabi_h2f
598; CHECK-HARDFP-VFP3: vmul.f32
599; CHECK-HARDFP-VFP3: bl __aeabi_f2h
600; CHECK-HARDFP-VFP3: vmov s0, r0
601
602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
603; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
604; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
605; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
606
607; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
608}
609
610; 24. VNEG
611define float @Neg(float %a.coerce) {
612entry:
613 %0 = bitcast float %a.coerce to i32
614 %tmp.0.extract.trunc = trunc i32 %0 to i16
615 %1 = bitcast i16 %tmp.0.extract.trunc to half
616 %2 = fsub half -0.000000e+00, %1
617 %3 = bitcast half %2 to i16
618 %tmp4.0.insert.ext = zext i16 %3 to i32
619 %4 = bitcast i32 %tmp4.0.insert.ext to float
620 ret float %4
621
622; CHECK-LABEL: Neg:
623; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
624}
625
626; 25. VNMLA
627define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
628entry:
629 %0 = bitcast float %a.coerce to i32
630 %tmp.0.extract.trunc = trunc i32 %0 to i16
631 %1 = bitcast i16 %tmp.0.extract.trunc to half
632 %2 = bitcast float %b.coerce to i32
633 %tmp1.0.extract.trunc = trunc i32 %2 to i16
634 %3 = bitcast i16 %tmp1.0.extract.trunc to half
635 %4 = bitcast float %c.coerce to i32
636 %tmp2.0.extract.trunc = trunc i32 %4 to i16
637 %5 = bitcast i16 %tmp2.0.extract.trunc to half
638 %add = fmul half %1, %3
639 %add2 = fsub half -0.000000e+00, %add
640 %add3 = fsub half %add2, %5
641 %6 = bitcast half %add3 to i16
642 %tmp4.0.insert.ext = zext i16 %6 to i32
643 %7 = bitcast i32 %tmp4.0.insert.ext to float
644 ret float %7
645
646; CHECK-LABEL: VNMLA:
647; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
648; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
649}
650
651; 26. VNMLS
652define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
653entry:
654 %0 = bitcast float %a.coerce to i32
655 %tmp.0.extract.trunc = trunc i32 %0 to i16
656 %1 = bitcast i16 %tmp.0.extract.trunc to half
657 %2 = bitcast float %b.coerce to i32
658 %tmp1.0.extract.trunc = trunc i32 %2 to i16
659 %3 = bitcast i16 %tmp1.0.extract.trunc to half
660 %4 = bitcast float %c.coerce to i32
661 %tmp2.0.extract.trunc = trunc i32 %4 to i16
662 %5 = bitcast i16 %tmp2.0.extract.trunc to half
663 %add = fmul half %1, %3
664 %add2 = fsub half %add, %5
665 %6 = bitcast half %add2 to i16
666 %tmp4.0.insert.ext = zext i16 %6 to i32
667 %7 = bitcast i32 %tmp4.0.insert.ext to float
668 ret float %7
669
670; CHECK-LABEL: VNMLS:
671; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
672; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
673}
674
675; 27. VNMUL
676define float @NMul(float %a.coerce, float %b.coerce) {
677entry:
678 %0 = bitcast float %a.coerce to i32
679 %tmp.0.extract.trunc = trunc i32 %0 to i16
680 %1 = bitcast i16 %tmp.0.extract.trunc to half
681 %2 = bitcast float %b.coerce to i32
682 %tmp1.0.extract.trunc = trunc i32 %2 to i16
683 %3 = bitcast i16 %tmp1.0.extract.trunc to half
684 %add = fmul half %1, %3
685 %add2 = fsub half -0.0, %add
686 %4 = bitcast half %add2 to i16
687 %tmp4.0.insert.ext = zext i16 %4 to i32
688 %5 = bitcast i32 %tmp4.0.insert.ext to float
689 ret float %5
690
691; CHECK-LABEL: NMul:
692; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
693}
694
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000695; TODO:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000696; 28. VRINTA
697; 29. VRINTM
698; 30. VRINTN
699; 31. VRINTP
700; 32. VRINTR
701; 33. VRINTX
702; 34. VRINTZ
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000703
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000704; 35. VSELEQ
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000705define half @select_cc1() {
706 %1 = fcmp nsz oeq half undef, 0xH0001
707 %2 = select i1 %1, half 0xHC000, half 0xH0002
708 ret half %2
709
710; CHECK-LABEL: select_cc1:
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000711
712; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0
713; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000714; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000715
716; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0
717; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
718; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
719
720; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0
721; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
722; CHECK-SOFTFP-FP16-T32: it eq
723; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000724}
725
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000726; FIXME: more tests need to be added for VSELGE and VSELGT.
727; That is, more combinations of immediate operands that can or can't
728; be encoded as an FP16 immediate need to be added here.
729;
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000730; 36. VSELGE
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000731define half @select_cc_ge1() {
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000732 %1 = fcmp nsz oge half undef, 0xH0001
733 %2 = select i1 %1, half 0xHC000, half 0xH0002
734 ret half %2
735
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000736; CHECK-LABEL: select_cc_ge1:
737
738; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
739; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
740; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
741
742; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
743; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
744; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
745
746; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
747; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
748; CHECK-SOFTFP-FP16-T32-NEXT: it ge
749; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
750}
751
752;
753; FIXME: add fcmp ole, ult here.
754;
755
756define half @select_cc_ge3() {
757 %1 = fcmp nsz ugt half undef, 0xH0001
758 %2 = select i1 %1, half 0xHC000, half 0xH0002
759 ret half %2
760
761; CHECK-LABEL: select_cc_ge3:
762
763; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
764; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
765; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
766
767; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
768; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
769; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
770
771; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
772; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
773; CHECK-SOFTFP-FP16-T32-NEXT: it hi
774; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000775}
776
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000777; 37. VSELGT
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000778define half @select_cc_gt1() {
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000779 %1 = fcmp nsz ogt half undef, 0xH0001
780 %2 = select i1 %1, half 0xHC000, half 0xH0002
781 ret half %2
782
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000783; CHECK-LABEL: select_cc_gt1:
784
785; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
786; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
787; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
788
789; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
790; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
791; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
792
793; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
794; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
795; CHECK-SOFTFP-FP16-T32-NEXT: it gt
796; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000797}
798
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000799define half @select_cc_gt2() {
800 %1 = fcmp nsz uge half undef, 0xH0001
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000801 %2 = select i1 %1, half 0xHC000, half 0xH0002
802 ret half %2
803
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000804; CHECK-LABEL: select_cc_gt2:
805
806; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
807; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
808; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
809
810; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
811; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
812; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
813
814; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
815; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
816; CHECK-SOFTFP-FP16-T32-NEXT: it pl
817; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
818}
819
820;
821; FIXME: add fcmp ule, olt here.
822;
823
824; 38. VSELVS
825define float @select_cc4(float %a.coerce) {
826entry:
827 %0 = bitcast float %a.coerce to i32
828 %tmp.0.extract.trunc = trunc i32 %0 to i16
829 %1 = bitcast i16 %tmp.0.extract.trunc to half
830
831 %2 = fcmp nsz ueq half %1, 0xH0001
832 %3 = select i1 %2, half 0xHC000, half 0xH0002
833
834 %4 = bitcast half %3 to i16
835 %tmp4.0.insert.ext = zext i16 %4 to i32
836 %5 = bitcast i32 %tmp4.0.insert.ext to float
837 ret float %5
838
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000839; CHECK-LABEL: select_cc4:
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000840
841; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
842; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
843; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
844; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
845; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
846; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
847; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
848
849; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
850; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
851; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
852; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
853; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
854; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
855; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
856; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
857; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
858; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
859
860; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
861; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
862; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
863; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
864; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
865; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
866; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
867; CHECK-SOFTFP-FP16-T32: it eq
868; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
869; CHECK-SOFTFP-FP16-T32: it vs
870; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
871; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000872}
873
874; 39. VSQRT - TODO
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000875
876; 40. VSUB
877define float @Sub(float %a.coerce, float %b.coerce) {
878entry:
879 %0 = bitcast float %a.coerce to i32
880 %tmp.0.extract.trunc = trunc i32 %0 to i16
881 %1 = bitcast i16 %tmp.0.extract.trunc to half
882 %2 = bitcast float %b.coerce to i32
883 %tmp1.0.extract.trunc = trunc i32 %2 to i16
884 %3 = bitcast i16 %tmp1.0.extract.trunc to half
885 %add = fsub half %1, %3
886 %4 = bitcast half %add to i16
887 %tmp4.0.insert.ext = zext i16 %4 to i32
888 %5 = bitcast i32 %tmp4.0.insert.ext to float
889 ret float %5
890
891; CHECK-LABEL: Sub:
892
893; CHECK-SOFT: bl __aeabi_h2f
894; CHECK-SOFT: bl __aeabi_h2f
895; CHECK-SOFT: bl __aeabi_fsub
896; CHECK-SOFT: bl __aeabi_f2h
897
898; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
899; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
900; CHECK-SOFTFP-VFP3: vsub.f32
901; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
902
903; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
904; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
905; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
906; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
907; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
908; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
909; CHECK-SOFTFP-FP16: vmov r0, s0
910
911; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
912; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
913; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
914; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
915
916; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
917; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
918; CHECK-HARDFP-VFP3: bl __aeabi_h2f
919; CHECK-HARDFP-VFP3: bl __aeabi_h2f
920; CHECK-HARDFP-VFP3: vsub.f32
921; CHECK-HARDFP-VFP3: bl __aeabi_f2h
922; CHECK-HARDFP-VFP3: vmov s0, r0
923
924; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
925; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
926; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
927; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
928
929; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
Sjoerd Meijer011de9c2018-01-26 09:26:40 +0000930}
Sjoerd Meijer101ee432018-02-13 10:29:03 +0000931
932; Check for VSTRH with a FCONSTH, this checks that addressing mode
933; AddrMode5FP16 is supported.
934define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
935entry:
936 %S = alloca half, align 2
937 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
938 %0 = bitcast i16 %tmp.0.extract.trunc to half
939 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
940 store volatile half 0xH3C00, half* %S, align 2
941 %S.0.S.0. = load volatile half, half* %S, align 2
942 %add = fadd half %S.0.S.0., %0
943 %1 = bitcast half %add to i16
944 %tmp2.0.insert.ext = zext i16 %1 to i32
945 ret i32 %tmp2.0.insert.ext
946
947; CHECK-LABEL: ThumbAddrMode5FP16
948
949; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
950; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
951; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
952; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
953; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
954}
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +0000955
956; Test function calls to check store/load reg to/from stack
957define i32 @fn1() {
958entry:
959 %coerce = alloca half, align 2
960 %tmp2 = alloca i32, align 4
961 store half 0xH7C00, half* %coerce, align 2
962 %0 = load i32, i32* %tmp2, align 4
963 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
964 store half 0xH7C00, half* %coerce, align 2
965 %1 = load i32, i32* %tmp2, align 4
966 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
967 ret i32 %call3
968
969; CHECK-SPILL-RELOAD-LABEL: fn1:
970; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
971; CHECK-SPILL-RELOAD-NEXT: bl fn2
972; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
973}
974
975declare dso_local i32 @fn2(...)
976declare dso_local i32 @fn3(...)