blob: 670fcf58b1edbabd22835a1aeada67c9bf2420a9 [file] [log] [blame]
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00001; SOFT:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00002; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer89ea2642018-02-06 08:43:56 +00003; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00004
5; SOFTFP:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00006; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +00007; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +00008; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00009
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000010; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +000011; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000012; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +000014; Test fast-isel
15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
17
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000018; HARD:
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000019; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000023; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000026
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000027; FP-CONTRACT=FAST
28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
30
Sjoerd Meijer834f7dc2018-04-13 15:34:26 +000031; TODO: we can't pass half-precision arguments as "half" types yet. We do
32; that for the time being by passing "float %f.coerce" and the necessary
33; bitconverts/truncates. But when we can pass half types, we do want to use
34; and test that here.
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000035
36define float @RetValBug(float %A.coerce) {
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000037entry:
38 ret float undef
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000039; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
40; any operands) when FullFP16 is enabled.
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000041;
42; CHECK-LABEL: RetValBug:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000043; CHECK-HARDFP-FULLFP16: {{.*}} lr
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000044}
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000045
Sjoerd Meijer89ea2642018-02-06 08:43:56 +000046; 1. VABS: TODO
47
48; 2. VADD
49define float @Add(float %a.coerce, float %b.coerce) {
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000050entry:
51 %0 = bitcast float %a.coerce to i32
52 %tmp.0.extract.trunc = trunc i32 %0 to i16
53 %1 = bitcast i16 %tmp.0.extract.trunc to half
54 %2 = bitcast float %b.coerce to i32
55 %tmp1.0.extract.trunc = trunc i32 %2 to i16
56 %3 = bitcast i16 %tmp1.0.extract.trunc to half
57 %add = fadd half %1, %3
58 %4 = bitcast half %add to i16
59 %tmp4.0.insert.ext = zext i16 %4 to i32
60 %5 = bitcast i32 %tmp4.0.insert.ext to float
61 ret float %5
62
Sjoerd Meijer9d9a8652018-02-01 13:48:40 +000063; CHECK-LABEL: Add:
64
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000065; CHECK-SOFT: bl __aeabi_h2f
66; CHECK-SOFT: bl __aeabi_h2f
67; CHECK-SOFT: bl __aeabi_fadd
68; CHECK-SOFT: bl __aeabi_f2h
69
70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
72; CHECK-SOFTFP-VFP3: vadd.f32
73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
74
75; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
76; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
77; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
78; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
81; CHECK-SOFTFP-FP16: vmov r0, s0
82
Sjoerd Meijer98d53592018-01-31 10:18:29 +000083; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]]
86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
Sjoerd Meijer011de9c2018-01-26 09:26:40 +000087
88; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
89; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
90; CHECK-HARDFP-VFP3: bl __aeabi_h2f
91; CHECK-HARDFP-VFP3: bl __aeabi_h2f
92; CHECK-HARDFP-VFP3: vadd.f32
93; CHECK-HARDFP-VFP3: bl __aeabi_f2h
94; CHECK-HARDFP-VFP3: vmov s0, r0
95
96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]]
99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
100
101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000102}
103
104; 3. VCMP
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000106entry:
107 %0 = bitcast float %F.coerce to i32
108 %tmp.0.extract.trunc = trunc i32 %0 to i16
109 %1 = bitcast i16 %tmp.0.extract.trunc to half
110 %2 = bitcast float %G.coerce to i32
111 %tmp1.0.extract.trunc = trunc i32 %2 to i16
112 %3 = bitcast i16 %tmp1.0.extract.trunc to half
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000113 %cmp = fcmp une half %1, %3
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000114 ret i1 %cmp
115
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000116; CHECK-LABEL: VCMP1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000117
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000118; CHECK-SOFT: bl __aeabi_fcmpeq
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000119
120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000123
124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}}
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000127
128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]]
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000131
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0
133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1
134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1
135}
136
137; Check VCMPZH
138define zeroext i1 @VCMP2(float %F.coerce) {
139entry:
140 %0 = bitcast float %F.coerce to i32
141 %tmp.0.extract.trunc = trunc i32 %0 to i16
142 %1 = bitcast i16 %tmp.0.extract.trunc to half
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000143 %cmp = fcmp une half %1, 0.000000e+00
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000144 ret i1 %cmp
145
146; CHECK-LABEL: VCMP2:
147
148; CHECK-SOFT: bl __aeabi_fcmpeq
149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0
150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0
151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000152}
153
154; 4. VCMPE
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000155define i32 @VCMPE1(float %F.coerce) {
156entry:
157 %0 = bitcast float %F.coerce to i32
158 %tmp.0.extract.trunc = trunc i32 %0 to i16
159 %1 = bitcast i16 %tmp.0.extract.trunc to half
160 %tmp = fcmp olt half %1, 0.000000e+00
161 %tmp1 = zext i1 %tmp to i32
162 ret i32 %tmp1
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000163
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000164; CHECK-LABEL: VCMPE1:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000165
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000166; CHECK-SOFT: bl __aeabi_fcmplt
167; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0
168; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0
169; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0
170}
171
172define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000173entry:
174 %0 = bitcast float %F.coerce to i32
175 %tmp.0.extract.trunc = trunc i32 %0 to i16
176 %1 = bitcast i16 %tmp.0.extract.trunc to half
177 %2 = bitcast float %G.coerce to i32
178 %tmp.1.extract.trunc = trunc i32 %2 to i16
179 %3 = bitcast i16 %tmp.1.extract.trunc to half
180 %tmp = fcmp olt half %1, %3
181 %tmp1 = zext i1 %tmp to i32
182 ret i32 %tmp1
183
Sjoerd Meijer9430c8c2018-02-15 10:33:07 +0000184; CHECK-LABEL: VCMPE2:
185
186; CHECK-SOFT: bl __aeabi_fcmplt
187; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}}
188; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
189; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000190}
191
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000192; Test lowering of BR_CC
193define hidden i32 @VCMPBRCC() {
194entry:
195 %f = alloca half, align 2
196 br label %for.cond
197
198for.cond:
199 %0 = load half, half* %f, align 2
200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201 br i1 %cmp, label %for.body, label %for.end
202
203for.body:
204 ret i32 1
205
206for.end:
207 ret i32 0
208
209; CHECK-LABEL: VCMPBRCC:
210
Sanjay Patel8652c532018-05-15 14:16:24 +0000211; CHECK-SOFT: bl __aeabi_fcmpgt
Sjoerd Meijer4d5c4042018-02-20 19:28:05 +0000212; CHECK-SOFT: cmp r0, #0
213
214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0
216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr
217
218; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}}
219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr
220}
221
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000222; 5. VCVT (between floating-point and fixed-point)
223; Only assembly/disassembly support
224
225; 6. VCVT (between floating-point and integer, both directions)
226define i32 @fptosi(i32 %A.coerce) {
227entry:
228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229 %0 = bitcast i16 %tmp.0.extract.trunc to half
230 %conv = fptosi half %0 to i32
231 ret i32 %conv
232
233; CHECK-LABEL: fptosi:
234
235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0
236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0
237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
238}
239
240define i32 @fptoui(i32 %A.coerce) {
241entry:
242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243 %0 = bitcast i16 %tmp.0.extract.trunc to half
244 %conv = fptoui half %0 to i32
245 ret i32 %conv
246
247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0
248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0
249}
250
251define float @UintToH(i32 %a, i32 %b) {
252entry:
253 %0 = uitofp i32 %a to half
254 %1 = bitcast half %0 to i16
255 %tmp0.insert.ext = zext i16 %1 to i32
256 %2 = bitcast i32 %tmp0.insert.ext to float
257 ret float %2
258
259; CHECK-LABEL: UintToH:
260
261; CHECK-HARDFP-FULLFP16: vmov s0, r0
262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0
263}
264
265define float @SintToH(i32 %a, i32 %b) {
266entry:
267 %0 = sitofp i32 %a to half
268 %1 = bitcast half %0 to i16
269 %tmp0.insert.ext = zext i16 %1 to i32
270 %2 = bitcast i32 %tmp0.insert.ext to float
271 ret float %2
272
273; CHECK-LABEL: SintToH:
274
275; CHECK-HARDFP-FULLFP16: vmov s0, r0
276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0
277}
278
Sjoerd Meijerd2718ba2018-02-06 16:28:43 +0000279define i32 @f2h(float %f) {
280entry:
281 %conv = fptrunc float %f to half
282 %0 = bitcast half %conv to i16
283 %tmp.0.insert.ext = zext i16 %0 to i32
284 ret i32 %tmp.0.insert.ext
285
286; CHECK-LABEL: f2h:
287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0
288}
289
290define float @h2f(i32 %h.coerce) {
291entry:
292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293 %0 = bitcast i16 %tmp.0.extract.trunc to half
294 %conv = fpext half %0 to float
295 ret float %conv
296
297; CHECK-LABEL: h2f:
298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0
299}
300
301
302define double @h2d(i32 %h.coerce) {
303entry:
304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305 %0 = bitcast i16 %tmp.0.extract.trunc to half
306 %conv = fpext half %0 to double
307 ret double %conv
308
309; CHECK-LABEL: h2d:
310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}}
311}
312
313define i32 @d2h(double %d) {
314entry:
315 %conv = fptrunc double %d to half
316 %0 = bitcast half %conv to i16
317 %tmp.0.insert.ext = zext i16 %0 to i32
318 ret i32 %tmp.0.insert.ext
319
320; CHECK-LABEL: d2h:
321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}}
322}
323
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000324; TODO:
325; 7. VCVTA
326; 8. VCVTM
327; 9. VCVTN
328; 10. VCVTP
329; 11. VCVTR
330
331; 12. VDIV
332define float @Div(float %a.coerce, float %b.coerce) {
333entry:
334 %0 = bitcast float %a.coerce to i32
335 %tmp.0.extract.trunc = trunc i32 %0 to i16
336 %1 = bitcast i16 %tmp.0.extract.trunc to half
337 %2 = bitcast float %b.coerce to i32
338 %tmp1.0.extract.trunc = trunc i32 %2 to i16
339 %3 = bitcast i16 %tmp1.0.extract.trunc to half
340 %add = fdiv half %1, %3
341 %4 = bitcast half %add to i16
342 %tmp4.0.insert.ext = zext i16 %4 to i32
343 %5 = bitcast i32 %tmp4.0.insert.ext to float
344 ret float %5
345
346; CHECK-LABEL: Div:
347
348; CHECK-SOFT: bl __aeabi_h2f
349; CHECK-SOFT: bl __aeabi_h2f
350; CHECK-SOFT: bl __aeabi_fdiv
351; CHECK-SOFT: bl __aeabi_f2h
352
353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
355; CHECK-SOFTFP-VFP3: vdiv.f32
356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
357
358; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
359; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
360; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
361; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
364; CHECK-SOFTFP-FP16: vmov r0, s0
365
366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]]
369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
370
371; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
372; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
373; CHECK-HARDFP-VFP3: bl __aeabi_h2f
374; CHECK-HARDFP-VFP3: bl __aeabi_h2f
375; CHECK-HARDFP-VFP3: vdiv.f32
376; CHECK-HARDFP-VFP3: bl __aeabi_f2h
377; CHECK-HARDFP-VFP3: vmov s0, r0
378
379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]]
382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
383
384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1
385}
386
387; 13. VFMA
388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
389entry:
390 %0 = bitcast float %a.coerce to i32
391 %tmp.0.extract.trunc = trunc i32 %0 to i16
392 %1 = bitcast i16 %tmp.0.extract.trunc to half
393 %2 = bitcast float %b.coerce to i32
394 %tmp1.0.extract.trunc = trunc i32 %2 to i16
395 %3 = bitcast i16 %tmp1.0.extract.trunc to half
396 %4 = bitcast float %c.coerce to i32
397 %tmp2.0.extract.trunc = trunc i32 %4 to i16
398 %5 = bitcast i16 %tmp2.0.extract.trunc to half
399 %mul = fmul half %1, %3
400 %add = fadd half %mul, %5
401 %6 = bitcast half %add to i16
402 %tmp4.0.insert.ext = zext i16 %6 to i32
403 %7 = bitcast i32 %tmp4.0.insert.ext to float
404 ret float %7
405
406; CHECK-LABEL: VFMA:
407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1
408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
409}
410
411; 14. VFMS
412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
413entry:
414 %0 = bitcast float %a.coerce to i32
415 %tmp.0.extract.trunc = trunc i32 %0 to i16
416 %1 = bitcast i16 %tmp.0.extract.trunc to half
417 %2 = bitcast float %b.coerce to i32
418 %tmp1.0.extract.trunc = trunc i32 %2 to i16
419 %3 = bitcast i16 %tmp1.0.extract.trunc to half
420 %4 = bitcast float %c.coerce to i32
421 %tmp2.0.extract.trunc = trunc i32 %4 to i16
422 %5 = bitcast i16 %tmp2.0.extract.trunc to half
423 %mul = fmul half %1, %3
424 %sub = fsub half %5, %mul
425 %6 = bitcast half %sub to i16
426 %tmp4.0.insert.ext = zext i16 %6 to i32
427 %7 = bitcast i32 %tmp4.0.insert.ext to float
428 ret float %7
429
430; CHECK-LABEL: VFMS:
431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1
432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
433}
434
435; 15. VFNMA
436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
437entry:
438 %0 = bitcast float %a.coerce to i32
439 %tmp.0.extract.trunc = trunc i32 %0 to i16
440 %1 = bitcast i16 %tmp.0.extract.trunc to half
441 %2 = bitcast float %b.coerce to i32
442 %tmp1.0.extract.trunc = trunc i32 %2 to i16
443 %3 = bitcast i16 %tmp1.0.extract.trunc to half
444 %4 = bitcast float %c.coerce to i32
445 %tmp2.0.extract.trunc = trunc i32 %4 to i16
446 %5 = bitcast i16 %tmp2.0.extract.trunc to half
447 %mul = fmul half %1, %3
448 %sub = fsub half -0.0, %mul
449 %sub2 = fsub half %sub, %5
450 %6 = bitcast half %sub2 to i16
451 %tmp4.0.insert.ext = zext i16 %6 to i32
452 %7 = bitcast i32 %tmp4.0.insert.ext to float
453 ret float %7
454
455; CHECK-LABEL: VFNMA:
456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1
457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
458}
459
460; 16. VFNMS
461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
462entry:
463 %0 = bitcast float %a.coerce to i32
464 %tmp.0.extract.trunc = trunc i32 %0 to i16
465 %1 = bitcast i16 %tmp.0.extract.trunc to half
466 %2 = bitcast float %b.coerce to i32
467 %tmp1.0.extract.trunc = trunc i32 %2 to i16
468 %3 = bitcast i16 %tmp1.0.extract.trunc to half
469 %4 = bitcast float %c.coerce to i32
470 %tmp2.0.extract.trunc = trunc i32 %4 to i16
471 %5 = bitcast i16 %tmp2.0.extract.trunc to half
472 %mul = fmul half %1, %3
473 %sub2 = fsub half %mul, %5
474 %6 = bitcast half %sub2 to i16
475 %tmp4.0.insert.ext = zext i16 %6 to i32
476 %7 = bitcast i32 %tmp4.0.insert.ext to float
477 ret float %7
478
479; CHECK-LABEL: VFNMS:
480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1
481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2
482}
483
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000484; 17. VMAXNM
485; 18. VMINNM
Sjoerd Meijer834f7dc2018-04-13 15:34:26 +0000486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
487
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000488; 19. VMLA
489define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
490entry:
491 %0 = bitcast float %a.coerce to i32
492 %tmp.0.extract.trunc = trunc i32 %0 to i16
493 %1 = bitcast i16 %tmp.0.extract.trunc to half
494 %2 = bitcast float %b.coerce to i32
495 %tmp1.0.extract.trunc = trunc i32 %2 to i16
496 %3 = bitcast i16 %tmp1.0.extract.trunc to half
497 %4 = bitcast float %c.coerce to i32
498 %tmp2.0.extract.trunc = trunc i32 %4 to i16
499 %5 = bitcast i16 %tmp2.0.extract.trunc to half
500 %mul = fmul half %1, %3
501 %add = fadd half %5, %mul
502 %6 = bitcast half %add to i16
503 %tmp4.0.insert.ext = zext i16 %6 to i32
504 %7 = bitcast i32 %tmp4.0.insert.ext to float
505 ret float %7
506
507; CHECK-LABEL: VMLA:
508; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1
509; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
510}
511
512; 20. VMLS
513define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
514entry:
515 %0 = bitcast float %a.coerce to i32
516 %tmp.0.extract.trunc = trunc i32 %0 to i16
517 %1 = bitcast i16 %tmp.0.extract.trunc to half
518 %2 = bitcast float %b.coerce to i32
519 %tmp1.0.extract.trunc = trunc i32 %2 to i16
520 %3 = bitcast i16 %tmp1.0.extract.trunc to half
521 %4 = bitcast float %c.coerce to i32
522 %tmp2.0.extract.trunc = trunc i32 %4 to i16
523 %5 = bitcast i16 %tmp2.0.extract.trunc to half
524 %mul = fmul half %1, %3
525 %add = fsub half %5, %mul
526 %6 = bitcast half %add to i16
527 %tmp4.0.insert.ext = zext i16 %6 to i32
528 %7 = bitcast i32 %tmp4.0.insert.ext to float
529 ret float %7
530
531; CHECK-LABEL: VMLS:
532; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1
533; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2
534}
535
536; TODO: fix immediates.
537; 21. VMOV (between general-purpose register and half-precision register)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000538
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000539; 22. VMOV (immediate)
Sjoerd Meijer8c073932018-02-07 08:37:17 +0000540define i32 @movi(i32 %a.coerce) {
541entry:
542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
543 %0 = bitcast i16 %tmp.0.extract.trunc to half
544 %add = fadd half %0, 0xHC000
545 %1 = bitcast half %add to i16
546 %tmp2.0.insert.ext = zext i16 %1 to i32
547 ret i32 %tmp2.0.insert.ext
548
549; CHECK-LABEL: movi:
550; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00
551}
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000552
553; 23. VMUL
554define float @Mul(float %a.coerce, float %b.coerce) {
555entry:
556 %0 = bitcast float %a.coerce to i32
557 %tmp.0.extract.trunc = trunc i32 %0 to i16
558 %1 = bitcast i16 %tmp.0.extract.trunc to half
559 %2 = bitcast float %b.coerce to i32
560 %tmp1.0.extract.trunc = trunc i32 %2 to i16
561 %3 = bitcast i16 %tmp1.0.extract.trunc to half
562 %add = fmul half %1, %3
563 %4 = bitcast half %add to i16
564 %tmp4.0.insert.ext = zext i16 %4 to i32
565 %5 = bitcast i32 %tmp4.0.insert.ext to float
566 ret float %5
567
568; CHECK-LABEL: Mul:
569
570; CHECK-SOFT: bl __aeabi_h2f
571; CHECK-SOFT: bl __aeabi_h2f
572; CHECK-SOFT: bl __aeabi_fmul
573; CHECK-SOFT: bl __aeabi_f2h
574
575; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
577; CHECK-SOFTFP-VFP3: vmul.f32
578; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
579
580; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
581; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
582; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
583; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
584; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
585; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
586; CHECK-SOFTFP-FP16: vmov r0, s0
587
588; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
590; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]]
591; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
592
593; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
594; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
595; CHECK-HARDFP-VFP3: bl __aeabi_h2f
596; CHECK-HARDFP-VFP3: bl __aeabi_h2f
597; CHECK-HARDFP-VFP3: vmul.f32
598; CHECK-HARDFP-VFP3: bl __aeabi_f2h
599; CHECK-HARDFP-VFP3: vmov s0, r0
600
601; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
603; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]]
604; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
605
606; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1
607}
608
609; 24. VNEG
610define float @Neg(float %a.coerce) {
611entry:
612 %0 = bitcast float %a.coerce to i32
613 %tmp.0.extract.trunc = trunc i32 %0 to i16
614 %1 = bitcast i16 %tmp.0.extract.trunc to half
615 %2 = fsub half -0.000000e+00, %1
616 %3 = bitcast half %2 to i16
617 %tmp4.0.insert.ext = zext i16 %3 to i32
618 %4 = bitcast i32 %tmp4.0.insert.ext to float
619 ret float %4
620
621; CHECK-LABEL: Neg:
622; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0
623}
624
625; 25. VNMLA
626define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
627entry:
628 %0 = bitcast float %a.coerce to i32
629 %tmp.0.extract.trunc = trunc i32 %0 to i16
630 %1 = bitcast i16 %tmp.0.extract.trunc to half
631 %2 = bitcast float %b.coerce to i32
632 %tmp1.0.extract.trunc = trunc i32 %2 to i16
633 %3 = bitcast i16 %tmp1.0.extract.trunc to half
634 %4 = bitcast float %c.coerce to i32
635 %tmp2.0.extract.trunc = trunc i32 %4 to i16
636 %5 = bitcast i16 %tmp2.0.extract.trunc to half
637 %add = fmul half %1, %3
638 %add2 = fsub half -0.000000e+00, %add
639 %add3 = fsub half %add2, %5
640 %6 = bitcast half %add3 to i16
641 %tmp4.0.insert.ext = zext i16 %6 to i32
642 %7 = bitcast i32 %tmp4.0.insert.ext to float
643 ret float %7
644
645; CHECK-LABEL: VNMLA:
646; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1
647; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
648}
649
650; 26. VNMLS
651define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
652entry:
653 %0 = bitcast float %a.coerce to i32
654 %tmp.0.extract.trunc = trunc i32 %0 to i16
655 %1 = bitcast i16 %tmp.0.extract.trunc to half
656 %2 = bitcast float %b.coerce to i32
657 %tmp1.0.extract.trunc = trunc i32 %2 to i16
658 %3 = bitcast i16 %tmp1.0.extract.trunc to half
659 %4 = bitcast float %c.coerce to i32
660 %tmp2.0.extract.trunc = trunc i32 %4 to i16
661 %5 = bitcast i16 %tmp2.0.extract.trunc to half
662 %add = fmul half %1, %3
663 %add2 = fsub half %add, %5
664 %6 = bitcast half %add2 to i16
665 %tmp4.0.insert.ext = zext i16 %6 to i32
666 %7 = bitcast i32 %tmp4.0.insert.ext to float
667 ret float %7
668
669; CHECK-LABEL: VNMLS:
670; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1
671; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2
672}
673
674; 27. VNMUL
675define float @NMul(float %a.coerce, float %b.coerce) {
676entry:
677 %0 = bitcast float %a.coerce to i32
678 %tmp.0.extract.trunc = trunc i32 %0 to i16
679 %1 = bitcast i16 %tmp.0.extract.trunc to half
680 %2 = bitcast float %b.coerce to i32
681 %tmp1.0.extract.trunc = trunc i32 %2 to i16
682 %3 = bitcast i16 %tmp1.0.extract.trunc to half
683 %add = fmul half %1, %3
684 %add2 = fsub half -0.0, %add
685 %4 = bitcast half %add2 to i16
686 %tmp4.0.insert.ext = zext i16 %4 to i32
687 %5 = bitcast i32 %tmp4.0.insert.ext to float
688 ret float %5
689
690; CHECK-LABEL: NMul:
691; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
692}
693
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000694; TODO:
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000695; 28. VRINTA
696; 29. VRINTM
697; 30. VRINTN
698; 31. VRINTP
699; 32. VRINTR
700; 33. VRINTX
701; 34. VRINTZ
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000702
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000703; 35. VSELEQ
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000704define half @select_cc1() {
705 %1 = fcmp nsz oeq half undef, 0xH0001
706 %2 = select i1 %1, half 0xHC000, half 0xH0002
707 ret half %2
708
709; CHECK-LABEL: select_cc1:
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000710
711; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0
712; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000713; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000714
715; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0
716; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
717; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}}
718
719; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0
720; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
721; CHECK-SOFTFP-FP16-T32: it eq
722; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000723}
724
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000725; FIXME: more tests need to be added for VSELGE and VSELGT.
726; That is, more combinations of immediate operands that can or can't
727; be encoded as an FP16 immediate need to be added here.
728;
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000729; 36. VSELGE
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000730define half @select_cc_ge1() {
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000731 %1 = fcmp nsz oge half undef, 0xH0001
732 %2 = select i1 %1, half 0xHC000, half 0xH0002
733 ret half %2
734
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000735; CHECK-LABEL: select_cc_ge1:
736
Sjoerd Meijera79ea802018-04-19 08:21:50 +0000737; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000738; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
739; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
740
741; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
742; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
743; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}}
744
745; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
746; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
747; CHECK-SOFTFP-FP16-T32-NEXT: it ge
748; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}}
749}
750
Sjoerd Meijera79ea802018-04-19 08:21:50 +0000751define half @select_cc_ge2() {
752 %1 = fcmp nsz ole half undef, 0xH0001
753 %2 = select i1 %1, half 0xHC000, half 0xH0002
754 ret half %2
755
756; CHECK-LABEL: select_cc_ge2:
757
758; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
759; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
760; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
761
762; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
763; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
764; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}}
765
766; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
767; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
768; CHECK-SOFTFP-FP16-T32-NEXT: it ls
769; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}}
770}
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000771
772define half @select_cc_ge3() {
773 %1 = fcmp nsz ugt half undef, 0xH0001
774 %2 = select i1 %1, half 0xHC000, half 0xH0002
775 ret half %2
776
777; CHECK-LABEL: select_cc_ge3:
778
779; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
780; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
781; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
782
783; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
784; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
785; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
786
787; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
788; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
789; CHECK-SOFTFP-FP16-T32-NEXT: it hi
790; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000791}
792
Sjoerd Meijera79ea802018-04-19 08:21:50 +0000793define half @select_cc_ge4() {
794 %1 = fcmp nsz ult half undef, 0xH0001
795 %2 = select i1 %1, half 0xHC000, half 0xH0002
796 ret half %2
797
798; CHECK-LABEL: select_cc_ge4:
799
800; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
801; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
802; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
803
804; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
805; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
806; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
807
808; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
809; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
810; CHECK-SOFTFP-FP16-T32-NEXT: it lt
811; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}}
812}
813
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000814; 37. VSELGT
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000815define half @select_cc_gt1() {
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000816 %1 = fcmp nsz ogt half undef, 0xH0001
817 %2 = select i1 %1, half 0xHC000, half 0xH0002
818 ret half %2
819
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000820; CHECK-LABEL: select_cc_gt1:
821
822; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
823; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
824; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
825
826; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
827; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
828; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
829
830; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
831; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
832; CHECK-SOFTFP-FP16-T32-NEXT: it gt
833; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}}
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000834}
835
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000836define half @select_cc_gt2() {
837 %1 = fcmp nsz uge half undef, 0xH0001
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000838 %2 = select i1 %1, half 0xHC000, half 0xH0002
839 ret half %2
840
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000841; CHECK-LABEL: select_cc_gt2:
842
843; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
844; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
845; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
846
847; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
848; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
849; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
850
851; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
852; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
853; CHECK-SOFTFP-FP16-T32-NEXT: it pl
854; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}}
855}
856
Sjoerd Meijera79ea802018-04-19 08:21:50 +0000857define half @select_cc_gt3() {
858 %1 = fcmp nsz ule half undef, 0xH0001
859 %2 = select i1 %1, half 0xHC000, half 0xH0002
860 ret half %2
861
862; CHECK-LABEL: select_cc_gt3:
863
864; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
865; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
866; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
867
868; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
869; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
870; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}}
871
872; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
873; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
874; CHECK-SOFTFP-FP16-T32-NEXT: it le
875; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}}
876}
877
878define half @select_cc_gt4() {
879 %1 = fcmp nsz olt half undef, 0xH0001
880 %2 = select i1 %1, half 0xHC000, half 0xH0002
881 ret half %2
882
883; CHECK-LABEL: select_cc_gt4:
884
885; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0
886; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
887; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
888
889; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0
890; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
891; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
892
893; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0
894; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr
895; CHECK-SOFTFP-FP16-T32-NEXT: it mi
896; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}}
897}
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000898
899; 38. VSELVS
900define float @select_cc4(float %a.coerce) {
901entry:
902 %0 = bitcast float %a.coerce to i32
903 %tmp.0.extract.trunc = trunc i32 %0 to i16
904 %1 = bitcast i16 %tmp.0.extract.trunc to half
905
906 %2 = fcmp nsz ueq half %1, 0xH0001
907 %3 = select i1 %2, half 0xHC000, half 0xH0002
908
909 %4 = bitcast half %3 to i16
910 %tmp4.0.insert.ext = zext i16 %4 to i32
911 %5 = bitcast i32 %tmp4.0.insert.ext to float
912 ret float %5
913
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000914; CHECK-LABEL: select_cc4:
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000915
916; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
917; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
918; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
919; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
920; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
921; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
922; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
923
924; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0
925; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}}
926; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]]
927; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
928; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0
929; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
930; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr
931; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]]
932; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]]
933; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]]
934
935; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
936; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
937; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000938; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
Jonas Paulsson611b5332018-10-30 15:04:40 +0000939; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
940; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
Sjoerd Meijerac96d7c2018-04-11 09:28:04 +0000941; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
942; CHECK-SOFTFP-FP16-T32: it eq
943; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
944; CHECK-SOFTFP-FP16-T32: it vs
945; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]]
946; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]]
Sjoerd Meijerd391a1a2018-03-16 08:06:25 +0000947}
948
949; 39. VSQRT - TODO
Sjoerd Meijer89ea2642018-02-06 08:43:56 +0000950
951; 40. VSUB
952define float @Sub(float %a.coerce, float %b.coerce) {
953entry:
954 %0 = bitcast float %a.coerce to i32
955 %tmp.0.extract.trunc = trunc i32 %0 to i16
956 %1 = bitcast i16 %tmp.0.extract.trunc to half
957 %2 = bitcast float %b.coerce to i32
958 %tmp1.0.extract.trunc = trunc i32 %2 to i16
959 %3 = bitcast i16 %tmp1.0.extract.trunc to half
960 %add = fsub half %1, %3
961 %4 = bitcast half %add to i16
962 %tmp4.0.insert.ext = zext i16 %4 to i32
963 %5 = bitcast i32 %tmp4.0.insert.ext to float
964 ret float %5
965
966; CHECK-LABEL: Sub:
967
968; CHECK-SOFT: bl __aeabi_h2f
969; CHECK-SOFT: bl __aeabi_h2f
970; CHECK-SOFT: bl __aeabi_fsub
971; CHECK-SOFT: bl __aeabi_f2h
972
973; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
974; CHECK-SOFTFP-VFP3: bl __aeabi_h2f
975; CHECK-SOFTFP-VFP3: vsub.f32
976; CHECK-SOFTFP-VFP3: bl __aeabi_f2h
977
978; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1
979; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0
980; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]]
981; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]]
982; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
983; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
984; CHECK-SOFTFP-FP16: vmov r0, s0
985
986; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1
987; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0
988; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]]
989; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0
990
991; CHECK-HARDFP-VFP3: vmov r{{.}}, s0
992; CHECK-HARDFP-VFP3: vmov{{.*}}, s1
993; CHECK-HARDFP-VFP3: bl __aeabi_h2f
994; CHECK-HARDFP-VFP3: bl __aeabi_h2f
995; CHECK-HARDFP-VFP3: vsub.f32
996; CHECK-HARDFP-VFP3: bl __aeabi_f2h
997; CHECK-HARDFP-VFP3: vmov s0, r0
998
999; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1
1000; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0
1001; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]]
1002; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]]
1003
1004; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1
Sjoerd Meijer011de9c2018-01-26 09:26:40 +00001005}
Sjoerd Meijer101ee432018-02-13 10:29:03 +00001006
1007; Check for VSTRH with a FCONSTH, this checks that addressing mode
1008; AddrMode5FP16 is supported.
1009define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1010entry:
1011 %S = alloca half, align 2
1012 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1013 %0 = bitcast i16 %tmp.0.extract.trunc to half
1014 %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1015 store volatile half 0xH3C00, half* %S, align 2
1016 %S.0.S.0. = load volatile half, half* %S, align 2
1017 %add = fadd half %S.0.S.0., %0
1018 %1 = bitcast half %add to i16
1019 %tmp2.0.insert.ext = zext i16 %1 to i32
1020 ret i32 %tmp2.0.insert.ext
1021
1022; CHECK-LABEL: ThumbAddrMode5FP16
1023
1024; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00
1025; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}]
1026; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0
1027; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}]
1028; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]]
1029}
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +00001030
1031; Test function calls to check store/load reg to/from stack
1032define i32 @fn1() {
1033entry:
1034 %coerce = alloca half, align 2
1035 %tmp2 = alloca i32, align 4
1036 store half 0xH7C00, half* %coerce, align 2
1037 %0 = load i32, i32* %tmp2, align 4
1038 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1039 store half 0xH7C00, half* %coerce, align 2
1040 %1 = load i32, i32* %tmp2, align 4
1041 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1042 ret i32 %call3
1043
1044; CHECK-SPILL-RELOAD-LABEL: fn1:
1045; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill
Matthias Braunc045c552018-10-29 20:10:42 +00001046; CHECK-SPILL-RELOAD: bl fn2
Sjoerd Meijer3b4294ed2018-02-14 15:09:09 +00001047; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload
1048}
1049
1050declare dso_local i32 @fn2(...)
1051declare dso_local i32 @fn3(...)