blob: 6cdaa2a85c9d7a0c5b33bd23eb721ae05ba8e6ed [file] [log] [blame]
Markos Chandrase24c3be2015-08-13 09:56:31 +02001/*
2 * IEEE754 floating point arithmetic
3 * single precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754sp.h"
16
Paul Burton61620512016-04-21 14:04:49 +010017enum maddf_flags {
18 maddf_negate_product = 1 << 0,
19};
20
21static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22 union ieee754sp y, enum maddf_flags flags)
Markos Chandrase24c3be2015-08-13 09:56:31 +020023{
24 int re;
25 int rs;
26 unsigned rm;
27 unsigned short lxm;
28 unsigned short hxm;
29 unsigned short lym;
30 unsigned short hym;
31 unsigned lrm;
32 unsigned hrm;
33 unsigned t;
34 unsigned at;
35 int s;
36
37 COMPXSP;
38 COMPYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010039 COMPZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020040
41 EXPLODEXSP;
42 EXPLODEYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010043 EXPLODEZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020044
45 FLUSHXSP;
46 FLUSHYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010047 FLUSHZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020048
49 ieee754_clearcx();
50
Aleksandar Markovice840be62017-07-27 18:08:54 +020051 /*
52 * Handle the cases when at least one of x, y or z is a NaN.
53 * Order of precedence is sNaN, qNaN and z, x, y.
54 */
55 if (zc == IEEE754_CLASS_SNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020056 return ieee754sp_nanxcpt(z);
Aleksandar Markovice840be62017-07-27 18:08:54 +020057 if (xc == IEEE754_CLASS_SNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020058 return ieee754sp_nanxcpt(x);
Aleksandar Markovice840be62017-07-27 18:08:54 +020059 if (yc == IEEE754_CLASS_SNAN)
60 return ieee754sp_nanxcpt(y);
61 if (zc == IEEE754_CLASS_QNAN)
62 return z;
63 if (xc == IEEE754_CLASS_QNAN)
64 return x;
65 if (yc == IEEE754_CLASS_QNAN)
Markos Chandrase24c3be2015-08-13 09:56:31 +020066 return y;
67
Aleksandar Markovice840be62017-07-27 18:08:54 +020068 if (zc == IEEE754_CLASS_DNORM)
69 SPDNORMZ;
70 /* ZERO z cases are handled separately below */
71
72 switch (CLPAIR(xc, yc)) {
73
Markos Chandrase24c3be2015-08-13 09:56:31 +020074
75 /*
76 * Infinity handling
77 */
78 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
79 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandrase24c3be2015-08-13 09:56:31 +020080 ieee754_setcx(IEEE754_INVALID_OPERATION);
81 return ieee754sp_indef();
82
83 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
84 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
85 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
86 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
87 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Markos Chandrase24c3be2015-08-13 09:56:31 +020088 return ieee754sp_inf(xs ^ ys);
89
90 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
91 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
92 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
93 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
94 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
95 if (zc == IEEE754_CLASS_INF)
96 return ieee754sp_inf(zs);
97 /* Multiplication is 0 so just return z */
98 return z;
99
100 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
101 SPDNORMX;
102
103 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200104 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200105 return ieee754sp_inf(zs);
106 SPDNORMY;
107 break;
108
109 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200110 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200111 return ieee754sp_inf(zs);
112 SPDNORMX;
113 break;
114
115 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovice840be62017-07-27 18:08:54 +0200116 if (zc == IEEE754_CLASS_INF)
Markos Chandrase24c3be2015-08-13 09:56:31 +0200117 return ieee754sp_inf(zs);
118 /* fall through to real computations */
119 }
120
121 /* Finally get to do some computation */
122
123 /*
124 * Do the multiplication bit first
125 *
126 * rm = xm * ym, re = xe + ye basically
127 *
128 * At this point xm and ym should have been normalized.
129 */
130
131 /* rm = xm * ym, re = xe+ye basically */
132 assert(xm & SP_HIDDEN_BIT);
133 assert(ym & SP_HIDDEN_BIT);
134
135 re = xe + ye;
136 rs = xs ^ ys;
Paul Burton61620512016-04-21 14:04:49 +0100137 if (flags & maddf_negate_product)
138 rs ^= 1;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200139
140 /* shunt to top of word */
141 xm <<= 32 - (SP_FBITS + 1);
142 ym <<= 32 - (SP_FBITS + 1);
143
144 /*
145 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
146 */
147 lxm = xm & 0xffff;
148 hxm = xm >> 16;
149 lym = ym & 0xffff;
150 hym = ym >> 16;
151
152 lrm = lxm * lym; /* 16 * 16 => 32 */
153 hrm = hxm * hym; /* 16 * 16 => 32 */
154
155 t = lxm * hym; /* 16 * 16 => 32 */
156 at = lrm + (t << 16);
157 hrm += at < lrm;
158 lrm = at;
159 hrm = hrm + (t >> 16);
160
161 t = hxm * lym; /* 16 * 16 => 32 */
162 at = lrm + (t << 16);
163 hrm += at < lrm;
164 lrm = at;
165 hrm = hrm + (t >> 16);
166
167 rm = hrm | (lrm != 0);
168
169 /*
170 * Sticky shift down to normal rounding precision.
171 */
172 if ((int) rm < 0) {
173 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
174 ((rm << (SP_FBITS + 1 + 3)) != 0);
175 re++;
176 } else {
177 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
178 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
179 }
180 assert(rm & (SP_HIDDEN_BIT << 3));
181
Aleksandar Markovicddbfff72017-06-19 17:50:12 +0200182 if (zc == IEEE754_CLASS_ZERO)
183 return ieee754sp_format(rs, re, rm);
184
Markos Chandrase24c3be2015-08-13 09:56:31 +0200185 /* And now the addition */
186
187 assert(zm & SP_HIDDEN_BIT);
188
189 /*
190 * Provide guard,round and stick bit space.
191 */
192 zm <<= 3;
193
194 if (ze > re) {
195 /*
Paul Burtondb57f292016-04-21 14:04:54 +0100196 * Have to shift r fraction right to align.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200197 */
198 s = ze - re;
Paul Burtondb57f292016-04-21 14:04:54 +0100199 rm = XSPSRS(rm, s);
200 re += s;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200201 } else if (re > ze) {
202 /*
Paul Burtondb57f292016-04-21 14:04:54 +0100203 * Have to shift z fraction right to align.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200204 */
205 s = re - ze;
Paul Burtondb57f292016-04-21 14:04:54 +0100206 zm = XSPSRS(zm, s);
207 ze += s;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200208 }
209 assert(ze == re);
210 assert(ze <= SP_EMAX);
211
212 if (zs == rs) {
213 /*
214 * Generate 28 bit result of adding two 27 bit numbers
215 * leaving result in zm, zs and ze.
216 */
217 zm = zm + rm;
218
219 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
Paul Burtondb57f292016-04-21 14:04:54 +0100220 zm = XSPSRS1(zm);
221 ze++;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200222 }
223 } else {
224 if (zm >= rm) {
225 zm = zm - rm;
226 } else {
227 zm = rm - zm;
228 zs = rs;
229 }
230 if (zm == 0)
231 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
232
233 /*
234 * Normalize in extended single precision
235 */
236 while ((zm >> (SP_MBITS + 3)) == 0) {
237 zm <<= 1;
238 ze--;
239 }
240
241 }
242 return ieee754sp_format(zs, ze, zm);
243}
Paul Burton61620512016-04-21 14:04:49 +0100244
245union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
246 union ieee754sp y)
247{
248 return _sp_maddf(z, x, y, 0);
249}
250
251union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
252 union ieee754sp y)
253{
254 return _sp_maddf(z, x, y, maddf_negate_product);
255}