blob: d5e0fb13ef0faa74efbbbc99fcfc80905e3d6bcd [file] [log] [blame]
Markos Chandrase24c3be2015-08-13 09:56:31 +02001/*
2 * IEEE754 floating point arithmetic
3 * double precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754dp.h"
16
Paul Burtond728f672016-04-21 14:04:50 +010017enum maddf_flags {
18 maddf_negate_product = 1 << 0,
19};
20
21static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
22 union ieee754dp y, enum maddf_flags flags)
Markos Chandrase24c3be2015-08-13 09:56:31 +020023{
24 int re;
25 int rs;
26 u64 rm;
27 unsigned lxm;
28 unsigned hxm;
29 unsigned lym;
30 unsigned hym;
31 u64 lrm;
32 u64 hrm;
33 u64 t;
34 u64 at;
35 int s;
36
37 COMPXDP;
38 COMPYDP;
39
40 u64 zm; int ze; int zs __maybe_unused; int zc;
41
42 EXPLODEXDP;
43 EXPLODEYDP;
44 EXPLODEDP(z, zc, zs, ze, zm)
45
46 FLUSHXDP;
47 FLUSHYDP;
48 FLUSHDP(z, zc, zs, ze, zm);
49
50 ieee754_clearcx();
51
52 switch (zc) {
53 case IEEE754_CLASS_SNAN:
54 ieee754_setcx(IEEE754_INVALID_OPERATION);
55 return ieee754dp_nanxcpt(z);
56 case IEEE754_CLASS_DNORM:
57 DPDNORMx(zm, ze);
58 /* QNAN is handled separately below */
59 }
60
61 switch (CLPAIR(xc, yc)) {
62 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
63 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
64 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
65 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
66 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
67 return ieee754dp_nanxcpt(y);
68
69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
71 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
72 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
73 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
74 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
75 return ieee754dp_nanxcpt(x);
76
77 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
78 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
79 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
80 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
81 return y;
82
83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
84 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
85 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
86 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
87 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
88 return x;
89
90
91 /*
92 * Infinity handling
93 */
94 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
95 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
96 if (zc == IEEE754_CLASS_QNAN)
97 return z;
98 ieee754_setcx(IEEE754_INVALID_OPERATION);
99 return ieee754dp_indef();
100
101 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
102 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
104 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
105 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
106 if (zc == IEEE754_CLASS_QNAN)
107 return z;
108 return ieee754dp_inf(xs ^ ys);
109
110 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
111 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
112 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
113 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
114 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
115 if (zc == IEEE754_CLASS_INF)
116 return ieee754dp_inf(zs);
117 /* Multiplication is 0 so just return z */
118 return z;
119
120 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121 DPDNORMX;
122
123 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124 if (zc == IEEE754_CLASS_QNAN)
125 return z;
126 else if (zc == IEEE754_CLASS_INF)
127 return ieee754dp_inf(zs);
128 DPDNORMY;
129 break;
130
131 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
132 if (zc == IEEE754_CLASS_QNAN)
133 return z;
134 else if (zc == IEEE754_CLASS_INF)
135 return ieee754dp_inf(zs);
136 DPDNORMX;
137 break;
138
139 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
140 if (zc == IEEE754_CLASS_QNAN)
141 return z;
142 else if (zc == IEEE754_CLASS_INF)
143 return ieee754dp_inf(zs);
144 /* fall through to real computations */
145 }
146
147 /* Finally get to do some computation */
148
149 /*
150 * Do the multiplication bit first
151 *
152 * rm = xm * ym, re = xe + ye basically
153 *
154 * At this point xm and ym should have been normalized.
155 */
156 assert(xm & DP_HIDDEN_BIT);
157 assert(ym & DP_HIDDEN_BIT);
158
159 re = xe + ye;
160 rs = xs ^ ys;
Paul Burtond728f672016-04-21 14:04:50 +0100161 if (flags & maddf_negate_product)
162 rs ^= 1;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200163
164 /* shunt to top of word */
165 xm <<= 64 - (DP_FBITS + 1);
166 ym <<= 64 - (DP_FBITS + 1);
167
168 /*
169 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170 */
171
172 /* 32 * 32 => 64 */
173#define DPXMULT(x, y) ((u64)(x) * (u64)y)
174
175 lxm = xm;
176 hxm = xm >> 32;
177 lym = ym;
178 hym = ym >> 32;
179
180 lrm = DPXMULT(lxm, lym);
181 hrm = DPXMULT(hxm, hym);
182
183 t = DPXMULT(lxm, hym);
184
185 at = lrm + (t << 32);
186 hrm += at < lrm;
187 lrm = at;
188
189 hrm = hrm + (t >> 32);
190
191 t = DPXMULT(hxm, lym);
192
193 at = lrm + (t << 32);
194 hrm += at < lrm;
195 lrm = at;
196
197 hrm = hrm + (t >> 32);
198
199 rm = hrm | (lrm != 0);
200
201 /*
202 * Sticky shift down to normal rounding precision.
203 */
204 if ((s64) rm < 0) {
205 rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
206 ((rm << (DP_FBITS + 1 + 3)) != 0);
207 re++;
208 } else {
209 rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
210 ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
211 }
212 assert(rm & (DP_HIDDEN_BIT << 3));
213
214 /* And now the addition */
215 assert(zm & DP_HIDDEN_BIT);
216
217 /*
218 * Provide guard,round and stick bit space.
219 */
220 zm <<= 3;
221
222 if (ze > re) {
223 /*
224 * Have to shift y fraction right to align.
225 */
226 s = ze - re;
227 rm = XDPSRS(rm, s);
228 re += s;
229 } else if (re > ze) {
230 /*
231 * Have to shift x fraction right to align.
232 */
233 s = re - ze;
234 zm = XDPSRS(zm, s);
235 ze += s;
236 }
237 assert(ze == re);
238 assert(ze <= DP_EMAX);
239
240 if (zs == rs) {
241 /*
242 * Generate 28 bit result of adding two 27 bit numbers
243 * leaving result in xm, xs and xe.
244 */
245 zm = zm + rm;
246
247 if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
248 zm = XDPSRS1(zm);
249 ze++;
250 }
251 } else {
252 if (zm >= rm) {
253 zm = zm - rm;
254 } else {
255 zm = rm - zm;
256 zs = rs;
257 }
258 if (zm == 0)
259 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
260
261 /*
262 * Normalize to rounding precision.
263 */
264 while ((zm >> (DP_FBITS + 3)) == 0) {
265 zm <<= 1;
266 ze--;
267 }
268 }
269
270 return ieee754dp_format(zs, ze, zm);
271}
Paul Burtond728f672016-04-21 14:04:50 +0100272
273union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
274 union ieee754dp y)
275{
276 return _dp_maddf(z, x, y, 0);
277}
278
279union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
280 union ieee754dp y)
281{
282 return _dp_maddf(z, x, y, maddf_negate_product);
283}