| /* |
| * IEEE754 floating point arithmetic |
| * single precision: MSUB.f (Fused Multiply Subtract) |
| * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) |
| * |
| * MIPS floating point support |
| * Copyright (C) 2015 Imagination Technologies, Ltd. |
| * Author: Markos Chandras <markos.chandras@imgtec.com> |
| * |
| * This program is free software; you can distribute it and/or modify it |
| * under the terms of the GNU General Public License as published by the |
| * Free Software Foundation; version 2 of the License. |
| */ |
| |
| #include "ieee754sp.h" |
| |
| union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, |
| union ieee754sp y) |
| { |
| int re; |
| int rs; |
| unsigned rm; |
| unsigned short lxm; |
| unsigned short hxm; |
| unsigned short lym; |
| unsigned short hym; |
| unsigned lrm; |
| unsigned hrm; |
| unsigned t; |
| unsigned at; |
| int s; |
| |
| COMPXSP; |
| COMPYSP; |
| u32 zm; int ze; int zs __maybe_unused; int zc; |
| |
| EXPLODEXSP; |
| EXPLODEYSP; |
| EXPLODESP(z, zc, zs, ze, zm) |
| |
| FLUSHXSP; |
| FLUSHYSP; |
| FLUSHSP(z, zc, zs, ze, zm); |
| |
| ieee754_clearcx(); |
| |
| switch (zc) { |
| case IEEE754_CLASS_SNAN: |
| ieee754_setcx(IEEE754_INVALID_OPERATION); |
| return ieee754sp_nanxcpt(z); |
| case IEEE754_CLASS_DNORM: |
| SPDNORMx(zm, ze); |
| /* QNAN is handled separately below */ |
| } |
| |
| switch (CLPAIR(xc, yc)) { |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): |
| return ieee754sp_nanxcpt(y); |
| |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): |
| return ieee754sp_nanxcpt(x); |
| |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): |
| return y; |
| |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): |
| return x; |
| |
| /* |
| * Infinity handling |
| */ |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| ieee754_setcx(IEEE754_INVALID_OPERATION); |
| return ieee754sp_indef(); |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| return ieee754sp_inf(xs ^ ys); |
| |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): |
| case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): |
| if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| /* Multiplication is 0 so just return z */ |
| return z; |
| |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): |
| SPDNORMX; |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| SPDNORMY; |
| break; |
| |
| case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| SPDNORMX; |
| break; |
| |
| case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): |
| if (zc == IEEE754_CLASS_QNAN) |
| return z; |
| else if (zc == IEEE754_CLASS_INF) |
| return ieee754sp_inf(zs); |
| /* fall through to real compuation */ |
| } |
| |
| /* Finally get to do some computation */ |
| |
| /* |
| * Do the multiplication bit first |
| * |
| * rm = xm * ym, re = xe + ye basically |
| * |
| * At this point xm and ym should have been normalized. |
| */ |
| |
| /* rm = xm * ym, re = xe+ye basically */ |
| assert(xm & SP_HIDDEN_BIT); |
| assert(ym & SP_HIDDEN_BIT); |
| |
| re = xe + ye; |
| rs = xs ^ ys; |
| |
| /* shunt to top of word */ |
| xm <<= 32 - (SP_FBITS + 1); |
| ym <<= 32 - (SP_FBITS + 1); |
| |
| /* |
| * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. |
| */ |
| lxm = xm & 0xffff; |
| hxm = xm >> 16; |
| lym = ym & 0xffff; |
| hym = ym >> 16; |
| |
| lrm = lxm * lym; /* 16 * 16 => 32 */ |
| hrm = hxm * hym; /* 16 * 16 => 32 */ |
| |
| t = lxm * hym; /* 16 * 16 => 32 */ |
| at = lrm + (t << 16); |
| hrm += at < lrm; |
| lrm = at; |
| hrm = hrm + (t >> 16); |
| |
| t = hxm * lym; /* 16 * 16 => 32 */ |
| at = lrm + (t << 16); |
| hrm += at < lrm; |
| lrm = at; |
| hrm = hrm + (t >> 16); |
| |
| rm = hrm | (lrm != 0); |
| |
| /* |
| * Sticky shift down to normal rounding precision. |
| */ |
| if ((int) rm < 0) { |
| rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | |
| ((rm << (SP_FBITS + 1 + 3)) != 0); |
| re++; |
| } else { |
| rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | |
| ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); |
| } |
| assert(rm & (SP_HIDDEN_BIT << 3)); |
| |
| /* And now the subtraction */ |
| |
| /* Flip sign of r and handle as add */ |
| rs ^= 1; |
| |
| assert(zm & SP_HIDDEN_BIT); |
| |
| /* |
| * Provide guard,round and stick bit space. |
| */ |
| zm <<= 3; |
| |
| if (ze > re) { |
| /* |
| * Have to shift y fraction right to align. |
| */ |
| s = ze - re; |
| SPXSRSYn(s); |
| } else if (re > ze) { |
| /* |
| * Have to shift x fraction right to align. |
| */ |
| s = re - ze; |
| SPXSRSYn(s); |
| } |
| assert(ze == re); |
| assert(ze <= SP_EMAX); |
| |
| if (zs == rs) { |
| /* |
| * Generate 28 bit result of adding two 27 bit numbers |
| * leaving result in zm, zs and ze. |
| */ |
| zm = zm + rm; |
| |
| if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ |
| SPXSRSX1(); /* shift preserving sticky */ |
| } |
| } else { |
| if (zm >= rm) { |
| zm = zm - rm; |
| } else { |
| zm = rm - zm; |
| zs = rs; |
| } |
| if (zm == 0) |
| return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); |
| |
| /* |
| * Normalize in extended single precision |
| */ |
| while ((zm >> (SP_MBITS + 3)) == 0) { |
| zm <<= 1; |
| ze--; |
| } |
| |
| } |
| return ieee754sp_format(zs, ze, zm); |
| } |