blob: a8cd8b4f235eb810db8c0472dea29c82dc2cd173 [file] [log] [blame]
Markos Chandrase24c3be2015-08-13 09:56:31 +02001/*
2 * IEEE754 floating point arithmetic
3 * single precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754sp.h"
16
Paul Burton61620512016-04-21 14:04:49 +010017enum maddf_flags {
18 maddf_negate_product = 1 << 0,
19};
20
21static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22 union ieee754sp y, enum maddf_flags flags)
Markos Chandrase24c3be2015-08-13 09:56:31 +020023{
24 int re;
25 int rs;
26 unsigned rm;
27 unsigned short lxm;
28 unsigned short hxm;
29 unsigned short lym;
30 unsigned short hym;
31 unsigned lrm;
32 unsigned hrm;
33 unsigned t;
34 unsigned at;
35 int s;
36
37 COMPXSP;
38 COMPYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010039 COMPZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020040
41 EXPLODEXSP;
42 EXPLODEYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010043 EXPLODEZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020044
45 FLUSHXSP;
46 FLUSHYSP;
Paul Burtone2d11e12016-04-21 14:04:51 +010047 FLUSHZSP;
Markos Chandrase24c3be2015-08-13 09:56:31 +020048
49 ieee754_clearcx();
50
51 switch (zc) {
52 case IEEE754_CLASS_SNAN:
53 ieee754_setcx(IEEE754_INVALID_OPERATION);
54 return ieee754sp_nanxcpt(z);
55 case IEEE754_CLASS_DNORM:
Paul Burtone2d11e12016-04-21 14:04:51 +010056 SPDNORMZ;
Markos Chandrase24c3be2015-08-13 09:56:31 +020057 /* QNAN is handled separately below */
58 }
59
60 switch (CLPAIR(xc, yc)) {
61 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
62 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
63 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
64 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
65 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
66 return ieee754sp_nanxcpt(y);
67
68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
71 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
72 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
73 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
74 return ieee754sp_nanxcpt(x);
75
76 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
80 return y;
81
82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
84 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
85 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
86 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
87 return x;
88
89 /*
90 * Infinity handling
91 */
92 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
93 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
94 if (zc == IEEE754_CLASS_QNAN)
95 return z;
96 ieee754_setcx(IEEE754_INVALID_OPERATION);
97 return ieee754sp_indef();
98
99 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
100 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
101 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
102 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
104 if (zc == IEEE754_CLASS_QNAN)
105 return z;
106 return ieee754sp_inf(xs ^ ys);
107
108 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
109 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
110 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
111 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
112 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
113 if (zc == IEEE754_CLASS_INF)
114 return ieee754sp_inf(zs);
115 /* Multiplication is 0 so just return z */
116 return z;
117
118 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
119 SPDNORMX;
120
121 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
122 if (zc == IEEE754_CLASS_QNAN)
123 return z;
124 else if (zc == IEEE754_CLASS_INF)
125 return ieee754sp_inf(zs);
126 SPDNORMY;
127 break;
128
129 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130 if (zc == IEEE754_CLASS_QNAN)
131 return z;
132 else if (zc == IEEE754_CLASS_INF)
133 return ieee754sp_inf(zs);
134 SPDNORMX;
135 break;
136
137 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
138 if (zc == IEEE754_CLASS_QNAN)
139 return z;
140 else if (zc == IEEE754_CLASS_INF)
141 return ieee754sp_inf(zs);
142 /* fall through to real computations */
143 }
144
145 /* Finally get to do some computation */
146
147 /*
148 * Do the multiplication bit first
149 *
150 * rm = xm * ym, re = xe + ye basically
151 *
152 * At this point xm and ym should have been normalized.
153 */
154
155 /* rm = xm * ym, re = xe+ye basically */
156 assert(xm & SP_HIDDEN_BIT);
157 assert(ym & SP_HIDDEN_BIT);
158
159 re = xe + ye;
160 rs = xs ^ ys;
Paul Burton61620512016-04-21 14:04:49 +0100161 if (flags & maddf_negate_product)
162 rs ^= 1;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200163
164 /* shunt to top of word */
165 xm <<= 32 - (SP_FBITS + 1);
166 ym <<= 32 - (SP_FBITS + 1);
167
168 /*
169 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170 */
171 lxm = xm & 0xffff;
172 hxm = xm >> 16;
173 lym = ym & 0xffff;
174 hym = ym >> 16;
175
176 lrm = lxm * lym; /* 16 * 16 => 32 */
177 hrm = hxm * hym; /* 16 * 16 => 32 */
178
179 t = lxm * hym; /* 16 * 16 => 32 */
180 at = lrm + (t << 16);
181 hrm += at < lrm;
182 lrm = at;
183 hrm = hrm + (t >> 16);
184
185 t = hxm * lym; /* 16 * 16 => 32 */
186 at = lrm + (t << 16);
187 hrm += at < lrm;
188 lrm = at;
189 hrm = hrm + (t >> 16);
190
191 rm = hrm | (lrm != 0);
192
193 /*
194 * Sticky shift down to normal rounding precision.
195 */
196 if ((int) rm < 0) {
197 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
198 ((rm << (SP_FBITS + 1 + 3)) != 0);
199 re++;
200 } else {
201 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
202 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
203 }
204 assert(rm & (SP_HIDDEN_BIT << 3));
205
206 /* And now the addition */
207
208 assert(zm & SP_HIDDEN_BIT);
209
210 /*
211 * Provide guard,round and stick bit space.
212 */
213 zm <<= 3;
214
215 if (ze > re) {
216 /*
Paul Burtondb57f292016-04-21 14:04:54 +0100217 * Have to shift r fraction right to align.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200218 */
219 s = ze - re;
Paul Burtondb57f292016-04-21 14:04:54 +0100220 rm = XSPSRS(rm, s);
221 re += s;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200222 } else if (re > ze) {
223 /*
Paul Burtondb57f292016-04-21 14:04:54 +0100224 * Have to shift z fraction right to align.
Markos Chandrase24c3be2015-08-13 09:56:31 +0200225 */
226 s = re - ze;
Paul Burtondb57f292016-04-21 14:04:54 +0100227 zm = XSPSRS(zm, s);
228 ze += s;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200229 }
230 assert(ze == re);
231 assert(ze <= SP_EMAX);
232
233 if (zs == rs) {
234 /*
235 * Generate 28 bit result of adding two 27 bit numbers
236 * leaving result in zm, zs and ze.
237 */
238 zm = zm + rm;
239
240 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
Paul Burtondb57f292016-04-21 14:04:54 +0100241 zm = XSPSRS1(zm);
242 ze++;
Markos Chandrase24c3be2015-08-13 09:56:31 +0200243 }
244 } else {
245 if (zm >= rm) {
246 zm = zm - rm;
247 } else {
248 zm = rm - zm;
249 zs = rs;
250 }
251 if (zm == 0)
252 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
253
254 /*
255 * Normalize in extended single precision
256 */
257 while ((zm >> (SP_MBITS + 3)) == 0) {
258 zm <<= 1;
259 ze--;
260 }
261
262 }
263 return ieee754sp_format(zs, ze, zm);
264}
Paul Burton61620512016-04-21 14:04:49 +0100265
266union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
267 union ieee754sp y)
268{
269 return _sp_maddf(z, x, y, 0);
270}
271
272union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
273 union ieee754sp y)
274{
275 return _sp_maddf(z, x, y, maddf_negate_product);
276}