Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 1 | ; SOFT: |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 2 | ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 3 | ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 4 | |
| 5 | ; SOFTFP: |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 6 | ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 |
| 7 | ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 |
| 8 | ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 9 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 10 | ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 |
| 11 | ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 |
| 12 | ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 |
| 13 | |
Sjoerd Meijer | 3b4294ed | 2018-02-14 15:09:09 +0000 | [diff] [blame] | 14 | ; Test fast-isel |
| 15 | ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD |
| 16 | ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD |
| 17 | |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 18 | ; HARD: |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 19 | ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 |
| 20 | ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 |
| 21 | ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 |
| 22 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 23 | ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 |
| 24 | ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 |
| 25 | ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 26 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 27 | ; FP-CONTRACT=FAST |
| 28 | ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST |
| 29 | ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST |
| 30 | |
| 31 | |
| 32 | define float @RetValBug(float %A.coerce) { |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 33 | entry: |
| 34 | ret float undef |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 35 | ; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have |
| 36 | ; any operands) when FullFP16 is enabled. |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 37 | ; |
| 38 | ; CHECK-LABEL: RetValBug: |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 39 | ; CHECK-HARDFP-FULLFP16: {{.*}} lr |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 40 | } |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 41 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 42 | ; 1. VABS: TODO |
| 43 | |
| 44 | ; 2. VADD |
| 45 | define float @Add(float %a.coerce, float %b.coerce) { |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 46 | entry: |
| 47 | %0 = bitcast float %a.coerce to i32 |
| 48 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 49 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 50 | %2 = bitcast float %b.coerce to i32 |
| 51 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 52 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 53 | %add = fadd half %1, %3 |
| 54 | %4 = bitcast half %add to i16 |
| 55 | %tmp4.0.insert.ext = zext i16 %4 to i32 |
| 56 | %5 = bitcast i32 %tmp4.0.insert.ext to float |
| 57 | ret float %5 |
| 58 | |
Sjoerd Meijer | 9d9a865 | 2018-02-01 13:48:40 +0000 | [diff] [blame] | 59 | ; CHECK-LABEL: Add: |
| 60 | |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 61 | ; CHECK-SOFT: bl __aeabi_h2f |
| 62 | ; CHECK-SOFT: bl __aeabi_h2f |
| 63 | ; CHECK-SOFT: bl __aeabi_fadd |
| 64 | ; CHECK-SOFT: bl __aeabi_f2h |
| 65 | |
| 66 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 67 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 68 | ; CHECK-SOFTFP-VFP3: vadd.f32 |
| 69 | ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h |
| 70 | |
| 71 | ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 |
| 72 | ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 |
| 73 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] |
| 74 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] |
| 75 | ; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] |
| 76 | ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 77 | ; CHECK-SOFTFP-FP16: vmov r0, s0 |
| 78 | |
Sjoerd Meijer | 98d5359 | 2018-01-31 10:18:29 +0000 | [diff] [blame] | 79 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 |
| 80 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 |
| 81 | ; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] |
| 82 | ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 83 | |
| 84 | ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 |
| 85 | ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 |
| 86 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 87 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 88 | ; CHECK-HARDFP-VFP3: vadd.f32 |
| 89 | ; CHECK-HARDFP-VFP3: bl __aeabi_f2h |
| 90 | ; CHECK-HARDFP-VFP3: vmov s0, r0 |
| 91 | |
| 92 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 |
| 93 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 |
| 94 | ; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] |
| 95 | ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 96 | |
| 97 | ; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 98 | } |
| 99 | |
| 100 | ; 3. VCMP |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 101 | define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 102 | entry: |
| 103 | %0 = bitcast float %F.coerce to i32 |
| 104 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 105 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 106 | %2 = bitcast float %G.coerce to i32 |
| 107 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 108 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 109 | %cmp = fcmp une half %1, %3 |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 110 | ret i1 %cmp |
| 111 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 112 | ; CHECK-LABEL: VCMP1: |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 113 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 114 | ; CHECK-SOFT: bl __aeabi_fcmpeq |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 115 | |
| 116 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 117 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 118 | ; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 119 | |
| 120 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} |
| 121 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 122 | ; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 123 | |
| 124 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 |
| 125 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 126 | ; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 127 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 128 | ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 |
| 129 | ; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 |
| 130 | ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 |
| 131 | } |
| 132 | |
| 133 | ; Check VCMPZH |
| 134 | define zeroext i1 @VCMP2(float %F.coerce) { |
| 135 | entry: |
| 136 | %0 = bitcast float %F.coerce to i32 |
| 137 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 138 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
Sjoerd Meijer | 4d5c404 | 2018-02-20 19:28:05 +0000 | [diff] [blame] | 139 | %cmp = fcmp une half %1, 0.000000e+00 |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 140 | ret i1 %cmp |
| 141 | |
| 142 | ; CHECK-LABEL: VCMP2: |
| 143 | |
| 144 | ; CHECK-SOFT: bl __aeabi_fcmpeq |
| 145 | ; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 |
| 146 | ; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 |
| 147 | ; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | ; 4. VCMPE |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 151 | define i32 @VCMPE1(float %F.coerce) { |
| 152 | entry: |
| 153 | %0 = bitcast float %F.coerce to i32 |
| 154 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 155 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 156 | %tmp = fcmp olt half %1, 0.000000e+00 |
| 157 | %tmp1 = zext i1 %tmp to i32 |
| 158 | ret i32 %tmp1 |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 159 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 160 | ; CHECK-LABEL: VCMPE1: |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 161 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 162 | ; CHECK-SOFT: bl __aeabi_fcmplt |
| 163 | ; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 |
| 164 | ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 |
| 165 | ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 |
| 166 | } |
| 167 | |
| 168 | define i32 @VCMPE2(float %F.coerce, float %G.coerce) { |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 169 | entry: |
| 170 | %0 = bitcast float %F.coerce to i32 |
| 171 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 172 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 173 | %2 = bitcast float %G.coerce to i32 |
| 174 | %tmp.1.extract.trunc = trunc i32 %2 to i16 |
| 175 | %3 = bitcast i16 %tmp.1.extract.trunc to half |
| 176 | %tmp = fcmp olt half %1, %3 |
| 177 | %tmp1 = zext i1 %tmp to i32 |
| 178 | ret i32 %tmp1 |
| 179 | |
Sjoerd Meijer | 9430c8c | 2018-02-15 10:33:07 +0000 | [diff] [blame] | 180 | ; CHECK-LABEL: VCMPE2: |
| 181 | |
| 182 | ; CHECK-SOFT: bl __aeabi_fcmplt |
| 183 | ; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} |
| 184 | ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} |
| 185 | ; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 186 | } |
| 187 | |
Sjoerd Meijer | 4d5c404 | 2018-02-20 19:28:05 +0000 | [diff] [blame] | 188 | ; Test lowering of BR_CC |
| 189 | define hidden i32 @VCMPBRCC() { |
| 190 | entry: |
| 191 | %f = alloca half, align 2 |
| 192 | br label %for.cond |
| 193 | |
| 194 | for.cond: |
| 195 | %0 = load half, half* %f, align 2 |
| 196 | %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 |
| 197 | br i1 %cmp, label %for.body, label %for.end |
| 198 | |
| 199 | for.body: |
| 200 | ret i32 1 |
| 201 | |
| 202 | for.end: |
| 203 | ret i32 0 |
| 204 | |
| 205 | ; CHECK-LABEL: VCMPBRCC: |
| 206 | |
| 207 | ; CHECK-SOFT: bl __aeabi_fcmple |
| 208 | ; CHECK-SOFT: cmp r0, #0 |
| 209 | |
| 210 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] |
| 211 | ; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 |
| 212 | ; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr |
| 213 | |
| 214 | ; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} |
| 215 | ; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr |
| 216 | } |
| 217 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 218 | ; 5. VCVT (between floating-point and fixed-point) |
| 219 | ; Only assembly/disassembly support |
| 220 | |
| 221 | ; 6. VCVT (between floating-point and integer, both directions) |
| 222 | define i32 @fptosi(i32 %A.coerce) { |
| 223 | entry: |
| 224 | %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 |
| 225 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 226 | %conv = fptosi half %0 to i32 |
| 227 | ret i32 %conv |
| 228 | |
| 229 | ; CHECK-LABEL: fptosi: |
| 230 | |
| 231 | ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 |
| 232 | ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 |
| 233 | ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 |
| 234 | } |
| 235 | |
| 236 | define i32 @fptoui(i32 %A.coerce) { |
| 237 | entry: |
| 238 | %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 |
| 239 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 240 | %conv = fptoui half %0 to i32 |
| 241 | ret i32 %conv |
| 242 | |
| 243 | ; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 |
| 244 | ; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 |
| 245 | } |
| 246 | |
| 247 | define float @UintToH(i32 %a, i32 %b) { |
| 248 | entry: |
| 249 | %0 = uitofp i32 %a to half |
| 250 | %1 = bitcast half %0 to i16 |
| 251 | %tmp0.insert.ext = zext i16 %1 to i32 |
| 252 | %2 = bitcast i32 %tmp0.insert.ext to float |
| 253 | ret float %2 |
| 254 | |
| 255 | ; CHECK-LABEL: UintToH: |
| 256 | |
| 257 | ; CHECK-HARDFP-FULLFP16: vmov s0, r0 |
| 258 | ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 |
| 259 | } |
| 260 | |
| 261 | define float @SintToH(i32 %a, i32 %b) { |
| 262 | entry: |
| 263 | %0 = sitofp i32 %a to half |
| 264 | %1 = bitcast half %0 to i16 |
| 265 | %tmp0.insert.ext = zext i16 %1 to i32 |
| 266 | %2 = bitcast i32 %tmp0.insert.ext to float |
| 267 | ret float %2 |
| 268 | |
| 269 | ; CHECK-LABEL: SintToH: |
| 270 | |
| 271 | ; CHECK-HARDFP-FULLFP16: vmov s0, r0 |
| 272 | ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 |
| 273 | } |
| 274 | |
Sjoerd Meijer | d2718ba | 2018-02-06 16:28:43 +0000 | [diff] [blame] | 275 | define i32 @f2h(float %f) { |
| 276 | entry: |
| 277 | %conv = fptrunc float %f to half |
| 278 | %0 = bitcast half %conv to i16 |
| 279 | %tmp.0.insert.ext = zext i16 %0 to i32 |
| 280 | ret i32 %tmp.0.insert.ext |
| 281 | |
| 282 | ; CHECK-LABEL: f2h: |
| 283 | ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 |
| 284 | } |
| 285 | |
| 286 | define float @h2f(i32 %h.coerce) { |
| 287 | entry: |
| 288 | %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 |
| 289 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 290 | %conv = fpext half %0 to float |
| 291 | ret float %conv |
| 292 | |
| 293 | ; CHECK-LABEL: h2f: |
| 294 | ; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 |
| 295 | } |
| 296 | |
| 297 | |
| 298 | define double @h2d(i32 %h.coerce) { |
| 299 | entry: |
| 300 | %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 |
| 301 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 302 | %conv = fpext half %0 to double |
| 303 | ret double %conv |
| 304 | |
| 305 | ; CHECK-LABEL: h2d: |
| 306 | ; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} |
| 307 | } |
| 308 | |
| 309 | define i32 @d2h(double %d) { |
| 310 | entry: |
| 311 | %conv = fptrunc double %d to half |
| 312 | %0 = bitcast half %conv to i16 |
| 313 | %tmp.0.insert.ext = zext i16 %0 to i32 |
| 314 | ret i32 %tmp.0.insert.ext |
| 315 | |
| 316 | ; CHECK-LABEL: d2h: |
| 317 | ; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} |
| 318 | } |
| 319 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 320 | ; TODO: |
| 321 | ; 7. VCVTA |
| 322 | ; 8. VCVTM |
| 323 | ; 9. VCVTN |
| 324 | ; 10. VCVTP |
| 325 | ; 11. VCVTR |
| 326 | |
| 327 | ; 12. VDIV |
| 328 | define float @Div(float %a.coerce, float %b.coerce) { |
| 329 | entry: |
| 330 | %0 = bitcast float %a.coerce to i32 |
| 331 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 332 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 333 | %2 = bitcast float %b.coerce to i32 |
| 334 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 335 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 336 | %add = fdiv half %1, %3 |
| 337 | %4 = bitcast half %add to i16 |
| 338 | %tmp4.0.insert.ext = zext i16 %4 to i32 |
| 339 | %5 = bitcast i32 %tmp4.0.insert.ext to float |
| 340 | ret float %5 |
| 341 | |
| 342 | ; CHECK-LABEL: Div: |
| 343 | |
| 344 | ; CHECK-SOFT: bl __aeabi_h2f |
| 345 | ; CHECK-SOFT: bl __aeabi_h2f |
| 346 | ; CHECK-SOFT: bl __aeabi_fdiv |
| 347 | ; CHECK-SOFT: bl __aeabi_f2h |
| 348 | |
| 349 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 350 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 351 | ; CHECK-SOFTFP-VFP3: vdiv.f32 |
| 352 | ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h |
| 353 | |
| 354 | ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 |
| 355 | ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 |
| 356 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] |
| 357 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] |
| 358 | ; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] |
| 359 | ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 360 | ; CHECK-SOFTFP-FP16: vmov r0, s0 |
| 361 | |
| 362 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 |
| 363 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 |
| 364 | ; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] |
| 365 | ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 |
| 366 | |
| 367 | ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 |
| 368 | ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 |
| 369 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 370 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 371 | ; CHECK-HARDFP-VFP3: vdiv.f32 |
| 372 | ; CHECK-HARDFP-VFP3: bl __aeabi_f2h |
| 373 | ; CHECK-HARDFP-VFP3: vmov s0, r0 |
| 374 | |
| 375 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 |
| 376 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 |
| 377 | ; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] |
| 378 | ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 379 | |
| 380 | ; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 |
| 381 | } |
| 382 | |
| 383 | ; 13. VFMA |
| 384 | define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 385 | entry: |
| 386 | %0 = bitcast float %a.coerce to i32 |
| 387 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 388 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 389 | %2 = bitcast float %b.coerce to i32 |
| 390 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 391 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 392 | %4 = bitcast float %c.coerce to i32 |
| 393 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 394 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 395 | %mul = fmul half %1, %3 |
| 396 | %add = fadd half %mul, %5 |
| 397 | %6 = bitcast half %add to i16 |
| 398 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 399 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 400 | ret float %7 |
| 401 | |
| 402 | ; CHECK-LABEL: VFMA: |
| 403 | ; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 |
| 404 | ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 |
| 405 | } |
| 406 | |
| 407 | ; 14. VFMS |
| 408 | define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 409 | entry: |
| 410 | %0 = bitcast float %a.coerce to i32 |
| 411 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 412 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 413 | %2 = bitcast float %b.coerce to i32 |
| 414 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 415 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 416 | %4 = bitcast float %c.coerce to i32 |
| 417 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 418 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 419 | %mul = fmul half %1, %3 |
| 420 | %sub = fsub half %5, %mul |
| 421 | %6 = bitcast half %sub to i16 |
| 422 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 423 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 424 | ret float %7 |
| 425 | |
| 426 | ; CHECK-LABEL: VFMS: |
| 427 | ; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 |
| 428 | ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 |
| 429 | } |
| 430 | |
| 431 | ; 15. VFNMA |
| 432 | define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 433 | entry: |
| 434 | %0 = bitcast float %a.coerce to i32 |
| 435 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 436 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 437 | %2 = bitcast float %b.coerce to i32 |
| 438 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 439 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 440 | %4 = bitcast float %c.coerce to i32 |
| 441 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 442 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 443 | %mul = fmul half %1, %3 |
| 444 | %sub = fsub half -0.0, %mul |
| 445 | %sub2 = fsub half %sub, %5 |
| 446 | %6 = bitcast half %sub2 to i16 |
| 447 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 448 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 449 | ret float %7 |
| 450 | |
| 451 | ; CHECK-LABEL: VFNMA: |
| 452 | ; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 |
| 453 | ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 |
| 454 | } |
| 455 | |
| 456 | ; 16. VFNMS |
| 457 | define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 458 | entry: |
| 459 | %0 = bitcast float %a.coerce to i32 |
| 460 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 461 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 462 | %2 = bitcast float %b.coerce to i32 |
| 463 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 464 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 465 | %4 = bitcast float %c.coerce to i32 |
| 466 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 467 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 468 | %mul = fmul half %1, %3 |
| 469 | %sub2 = fsub half %mul, %5 |
| 470 | %6 = bitcast half %sub2 to i16 |
| 471 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 472 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 473 | ret float %7 |
| 474 | |
| 475 | ; CHECK-LABEL: VFNMS: |
| 476 | ; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 |
| 477 | ; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 |
| 478 | } |
| 479 | |
| 480 | ; TODO: |
| 481 | ; 17. VMAXNM |
| 482 | ; 18. VMINNM |
| 483 | |
| 484 | ; 19. VMLA |
| 485 | define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 486 | entry: |
| 487 | %0 = bitcast float %a.coerce to i32 |
| 488 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 489 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 490 | %2 = bitcast float %b.coerce to i32 |
| 491 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 492 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 493 | %4 = bitcast float %c.coerce to i32 |
| 494 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 495 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 496 | %mul = fmul half %1, %3 |
| 497 | %add = fadd half %5, %mul |
| 498 | %6 = bitcast half %add to i16 |
| 499 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 500 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 501 | ret float %7 |
| 502 | |
| 503 | ; CHECK-LABEL: VMLA: |
| 504 | ; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 |
| 505 | ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 |
| 506 | } |
| 507 | |
| 508 | ; 20. VMLS |
| 509 | define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 510 | entry: |
| 511 | %0 = bitcast float %a.coerce to i32 |
| 512 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 513 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 514 | %2 = bitcast float %b.coerce to i32 |
| 515 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 516 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 517 | %4 = bitcast float %c.coerce to i32 |
| 518 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 519 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 520 | %mul = fmul half %1, %3 |
| 521 | %add = fsub half %5, %mul |
| 522 | %6 = bitcast half %add to i16 |
| 523 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 524 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 525 | ret float %7 |
| 526 | |
| 527 | ; CHECK-LABEL: VMLS: |
| 528 | ; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 |
| 529 | ; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 |
| 530 | } |
| 531 | |
| 532 | ; TODO: fix immediates. |
| 533 | ; 21. VMOV (between general-purpose register and half-precision register) |
Sjoerd Meijer | 8c07393 | 2018-02-07 08:37:17 +0000 | [diff] [blame] | 534 | |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 535 | ; 22. VMOV (immediate) |
Sjoerd Meijer | 8c07393 | 2018-02-07 08:37:17 +0000 | [diff] [blame] | 536 | define i32 @movi(i32 %a.coerce) { |
| 537 | entry: |
| 538 | %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 |
| 539 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 540 | %add = fadd half %0, 0xHC000 |
| 541 | %1 = bitcast half %add to i16 |
| 542 | %tmp2.0.insert.ext = zext i16 %1 to i32 |
| 543 | ret i32 %tmp2.0.insert.ext |
| 544 | |
| 545 | ; CHECK-LABEL: movi: |
| 546 | ; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 |
| 547 | } |
Sjoerd Meijer | 89ea264 | 2018-02-06 08:43:56 +0000 | [diff] [blame] | 548 | |
| 549 | ; 23. VMUL |
| 550 | define float @Mul(float %a.coerce, float %b.coerce) { |
| 551 | entry: |
| 552 | %0 = bitcast float %a.coerce to i32 |
| 553 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 554 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 555 | %2 = bitcast float %b.coerce to i32 |
| 556 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 557 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 558 | %add = fmul half %1, %3 |
| 559 | %4 = bitcast half %add to i16 |
| 560 | %tmp4.0.insert.ext = zext i16 %4 to i32 |
| 561 | %5 = bitcast i32 %tmp4.0.insert.ext to float |
| 562 | ret float %5 |
| 563 | |
| 564 | ; CHECK-LABEL: Mul: |
| 565 | |
| 566 | ; CHECK-SOFT: bl __aeabi_h2f |
| 567 | ; CHECK-SOFT: bl __aeabi_h2f |
| 568 | ; CHECK-SOFT: bl __aeabi_fmul |
| 569 | ; CHECK-SOFT: bl __aeabi_f2h |
| 570 | |
| 571 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 572 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 573 | ; CHECK-SOFTFP-VFP3: vmul.f32 |
| 574 | ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h |
| 575 | |
| 576 | ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 |
| 577 | ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 |
| 578 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] |
| 579 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] |
| 580 | ; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] |
| 581 | ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 582 | ; CHECK-SOFTFP-FP16: vmov r0, s0 |
| 583 | |
| 584 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 |
| 585 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 |
| 586 | ; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] |
| 587 | ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 |
| 588 | |
| 589 | ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 |
| 590 | ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 |
| 591 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 592 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 593 | ; CHECK-HARDFP-VFP3: vmul.f32 |
| 594 | ; CHECK-HARDFP-VFP3: bl __aeabi_f2h |
| 595 | ; CHECK-HARDFP-VFP3: vmov s0, r0 |
| 596 | |
| 597 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 |
| 598 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 |
| 599 | ; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] |
| 600 | ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 601 | |
| 602 | ; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 |
| 603 | } |
| 604 | |
| 605 | ; 24. VNEG |
| 606 | define float @Neg(float %a.coerce) { |
| 607 | entry: |
| 608 | %0 = bitcast float %a.coerce to i32 |
| 609 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 610 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 611 | %2 = fsub half -0.000000e+00, %1 |
| 612 | %3 = bitcast half %2 to i16 |
| 613 | %tmp4.0.insert.ext = zext i16 %3 to i32 |
| 614 | %4 = bitcast i32 %tmp4.0.insert.ext to float |
| 615 | ret float %4 |
| 616 | |
| 617 | ; CHECK-LABEL: Neg: |
| 618 | ; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 |
| 619 | } |
| 620 | |
| 621 | ; 25. VNMLA |
| 622 | define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 623 | entry: |
| 624 | %0 = bitcast float %a.coerce to i32 |
| 625 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 626 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 627 | %2 = bitcast float %b.coerce to i32 |
| 628 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 629 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 630 | %4 = bitcast float %c.coerce to i32 |
| 631 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 632 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 633 | %add = fmul half %1, %3 |
| 634 | %add2 = fsub half -0.000000e+00, %add |
| 635 | %add3 = fsub half %add2, %5 |
| 636 | %6 = bitcast half %add3 to i16 |
| 637 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 638 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 639 | ret float %7 |
| 640 | |
| 641 | ; CHECK-LABEL: VNMLA: |
| 642 | ; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 |
| 643 | ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 |
| 644 | } |
| 645 | |
| 646 | ; 26. VNMLS |
| 647 | define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { |
| 648 | entry: |
| 649 | %0 = bitcast float %a.coerce to i32 |
| 650 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 651 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 652 | %2 = bitcast float %b.coerce to i32 |
| 653 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 654 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 655 | %4 = bitcast float %c.coerce to i32 |
| 656 | %tmp2.0.extract.trunc = trunc i32 %4 to i16 |
| 657 | %5 = bitcast i16 %tmp2.0.extract.trunc to half |
| 658 | %add = fmul half %1, %3 |
| 659 | %add2 = fsub half %add, %5 |
| 660 | %6 = bitcast half %add2 to i16 |
| 661 | %tmp4.0.insert.ext = zext i16 %6 to i32 |
| 662 | %7 = bitcast i32 %tmp4.0.insert.ext to float |
| 663 | ret float %7 |
| 664 | |
| 665 | ; CHECK-LABEL: VNMLS: |
| 666 | ; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 |
| 667 | ; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 |
| 668 | } |
| 669 | |
| 670 | ; 27. VNMUL |
| 671 | define float @NMul(float %a.coerce, float %b.coerce) { |
| 672 | entry: |
| 673 | %0 = bitcast float %a.coerce to i32 |
| 674 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 675 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 676 | %2 = bitcast float %b.coerce to i32 |
| 677 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 678 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 679 | %add = fmul half %1, %3 |
| 680 | %add2 = fsub half -0.0, %add |
| 681 | %4 = bitcast half %add2 to i16 |
| 682 | %tmp4.0.insert.ext = zext i16 %4 to i32 |
| 683 | %5 = bitcast i32 %tmp4.0.insert.ext to float |
| 684 | ret float %5 |
| 685 | |
| 686 | ; CHECK-LABEL: NMul: |
| 687 | ; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 |
| 688 | } |
| 689 | |
| 690 | ; 28. VRINTA |
| 691 | ; 29. VRINTM |
| 692 | ; 30. VRINTN |
| 693 | ; 31. VRINTP |
| 694 | ; 32. VRINTR |
| 695 | ; 33. VRINTX |
| 696 | ; 34. VRINTZ |
| 697 | ; 35. VSELEQ |
| 698 | ; 36. VSELGE |
| 699 | ; 37. VSELGT |
| 700 | ; 38. VSELVS |
| 701 | ; 39. VSQRT |
| 702 | |
| 703 | ; 40. VSUB |
| 704 | define float @Sub(float %a.coerce, float %b.coerce) { |
| 705 | entry: |
| 706 | %0 = bitcast float %a.coerce to i32 |
| 707 | %tmp.0.extract.trunc = trunc i32 %0 to i16 |
| 708 | %1 = bitcast i16 %tmp.0.extract.trunc to half |
| 709 | %2 = bitcast float %b.coerce to i32 |
| 710 | %tmp1.0.extract.trunc = trunc i32 %2 to i16 |
| 711 | %3 = bitcast i16 %tmp1.0.extract.trunc to half |
| 712 | %add = fsub half %1, %3 |
| 713 | %4 = bitcast half %add to i16 |
| 714 | %tmp4.0.insert.ext = zext i16 %4 to i32 |
| 715 | %5 = bitcast i32 %tmp4.0.insert.ext to float |
| 716 | ret float %5 |
| 717 | |
| 718 | ; CHECK-LABEL: Sub: |
| 719 | |
| 720 | ; CHECK-SOFT: bl __aeabi_h2f |
| 721 | ; CHECK-SOFT: bl __aeabi_h2f |
| 722 | ; CHECK-SOFT: bl __aeabi_fsub |
| 723 | ; CHECK-SOFT: bl __aeabi_f2h |
| 724 | |
| 725 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 726 | ; CHECK-SOFTFP-VFP3: bl __aeabi_h2f |
| 727 | ; CHECK-SOFTFP-VFP3: vsub.f32 |
| 728 | ; CHECK-SOFTFP-VFP3: bl __aeabi_f2h |
| 729 | |
| 730 | ; CHECK-SOFTFP-FP16: vmov [[S2:s[0-9]]], r1 |
| 731 | ; CHECK-SOFTFP-FP16: vmov [[S0:s[0-9]]], r0 |
| 732 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2]], [[S2]] |
| 733 | ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S0]], [[S0]] |
| 734 | ; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] |
| 735 | ; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 736 | ; CHECK-SOFTFP-FP16: vmov r0, s0 |
| 737 | |
| 738 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 |
| 739 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 |
| 740 | ; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] |
| 741 | ; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 |
| 742 | |
| 743 | ; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 |
| 744 | ; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 |
| 745 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 746 | ; CHECK-HARDFP-VFP3: bl __aeabi_h2f |
| 747 | ; CHECK-HARDFP-VFP3: vsub.f32 |
| 748 | ; CHECK-HARDFP-VFP3: bl __aeabi_f2h |
| 749 | ; CHECK-HARDFP-VFP3: vmov s0, r0 |
| 750 | |
| 751 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 |
| 752 | ; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 |
| 753 | ; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] |
| 754 | ; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] |
| 755 | |
| 756 | ; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 |
Sjoerd Meijer | 011de9c | 2018-01-26 09:26:40 +0000 | [diff] [blame] | 757 | } |
Sjoerd Meijer | 101ee43 | 2018-02-13 10:29:03 +0000 | [diff] [blame] | 758 | |
| 759 | ; Check for VSTRH with a FCONSTH, this checks that addressing mode |
| 760 | ; AddrMode5FP16 is supported. |
| 761 | define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { |
| 762 | entry: |
| 763 | %S = alloca half, align 2 |
| 764 | %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 |
| 765 | %0 = bitcast i16 %tmp.0.extract.trunc to half |
| 766 | %S.0.S.0..sroa_cast = bitcast half* %S to i8* |
| 767 | store volatile half 0xH3C00, half* %S, align 2 |
| 768 | %S.0.S.0. = load volatile half, half* %S, align 2 |
| 769 | %add = fadd half %S.0.S.0., %0 |
| 770 | %1 = bitcast half %add to i16 |
| 771 | %tmp2.0.insert.ext = zext i16 %1 to i32 |
| 772 | ret i32 %tmp2.0.insert.ext |
| 773 | |
| 774 | ; CHECK-LABEL: ThumbAddrMode5FP16 |
| 775 | |
| 776 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 |
| 777 | ; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] |
| 778 | ; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 |
| 779 | ; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] |
| 780 | ; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] |
| 781 | } |
Sjoerd Meijer | 3b4294ed | 2018-02-14 15:09:09 +0000 | [diff] [blame] | 782 | |
| 783 | ; Test function calls to check store/load reg to/from stack |
| 784 | define i32 @fn1() { |
| 785 | entry: |
| 786 | %coerce = alloca half, align 2 |
| 787 | %tmp2 = alloca i32, align 4 |
| 788 | store half 0xH7C00, half* %coerce, align 2 |
| 789 | %0 = load i32, i32* %tmp2, align 4 |
| 790 | %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) |
| 791 | store half 0xH7C00, half* %coerce, align 2 |
| 792 | %1 = load i32, i32* %tmp2, align 4 |
| 793 | %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) |
| 794 | ret i32 %call3 |
| 795 | |
| 796 | ; CHECK-SPILL-RELOAD-LABEL: fn1: |
| 797 | ; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill |
| 798 | ; CHECK-SPILL-RELOAD-NEXT: bl fn2 |
| 799 | ; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload |
| 800 | } |
| 801 | |
| 802 | declare dso_local i32 @fn2(...) |
| 803 | declare dso_local i32 @fn3(...) |