blob: 5dd7f93a89be5b39d5a3e6fcf497f0ada9b436bc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3 *
4 * Floating-point emulation code
5 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21/*
22 * BEGIN_DESC
23 *
24 * File:
25 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
26 *
27 * Purpose:
28 * Double Floating-point Multiply Fused Add
29 * Double Floating-point Multiply Negate Fused Add
30 * Single Floating-point Multiply Fused Add
31 * Single Floating-point Multiply Negate Fused Add
32 *
33 * External Interfaces:
34 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38 *
39 * Internal Interfaces:
40 *
41 * Theory:
42 * <<please update with a overview of the operation of this file>>
43 *
44 * END_DESC
45*/
46
47
48#include "float.h"
49#include "sgl_float.h"
50#include "dbl_float.h"
51
52
53/*
54 * Double Floating-point Multiply Fused Add
55 */
56
57int
58dbl_fmpyfadd(
59 dbl_floating_point *src1ptr,
60 dbl_floating_point *src2ptr,
61 dbl_floating_point *src3ptr,
62 unsigned int *status,
63 dbl_floating_point *dstptr)
64{
65 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67 unsigned int rightp1, rightp2, rightp3, rightp4;
68 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69 register int mpy_exponent, add_exponent, count;
70 boolean inexact = FALSE, is_tiny = FALSE;
71
72 unsigned int signlessleft1, signlessright1, save;
73 register int result_exponent, diff_exponent;
74 int sign_save, jumpsize;
75
76 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79
80 /*
81 * set sign bit of result of multiply
82 */
83 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
84 Dbl_setnegativezerop1(resultp1);
85 else Dbl_setzerop1(resultp1);
86
87 /*
88 * Generate multiply exponent
89 */
90 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91
92 /*
93 * check first operand for NaN's or infinity
94 */
95 if (Dbl_isinfinity_exponent(opnd1p1)) {
96 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98 Dbl_isnotnan(opnd3p1,opnd3p2)) {
99 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100 /*
101 * invalid since operands are infinity
102 * and zero
103 */
104 if (Is_invalidtrap_enabled())
105 return(OPC_2E_INVALIDEXCEPTION);
106 Set_invalidflag();
107 Dbl_makequietnan(resultp1,resultp2);
108 Dbl_copytoptr(resultp1,resultp2,dstptr);
109 return(NOEXCEPTION);
110 }
111 /*
112 * Check third operand for infinity with a
113 * sign opposite of the multiply result
114 */
115 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117 /*
118 * invalid since attempting a magnitude
119 * subtraction of infinities
120 */
121 if (Is_invalidtrap_enabled())
122 return(OPC_2E_INVALIDEXCEPTION);
123 Set_invalidflag();
124 Dbl_makequietnan(resultp1,resultp2);
125 Dbl_copytoptr(resultp1,resultp2,dstptr);
126 return(NOEXCEPTION);
127 }
128
129 /*
130 * return infinity
131 */
132 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133 Dbl_copytoptr(resultp1,resultp2,dstptr);
134 return(NOEXCEPTION);
135 }
136 }
137 else {
138 /*
139 * is NaN; signaling or quiet?
140 */
141 if (Dbl_isone_signaling(opnd1p1)) {
142 /* trap if INVALIDTRAP enabled */
143 if (Is_invalidtrap_enabled())
144 return(OPC_2E_INVALIDEXCEPTION);
145 /* make NaN quiet */
146 Set_invalidflag();
147 Dbl_set_quiet(opnd1p1);
148 }
149 /*
150 * is second operand a signaling NaN?
151 */
152 else if (Dbl_is_signalingnan(opnd2p1)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION);
156 /* make NaN quiet */
157 Set_invalidflag();
158 Dbl_set_quiet(opnd2p1);
159 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160 return(NOEXCEPTION);
161 }
162 /*
163 * is third operand a signaling NaN?
164 */
165 else if (Dbl_is_signalingnan(opnd3p1)) {
166 /* trap if INVALIDTRAP enabled */
167 if (Is_invalidtrap_enabled())
168 return(OPC_2E_INVALIDEXCEPTION);
169 /* make NaN quiet */
170 Set_invalidflag();
171 Dbl_set_quiet(opnd3p1);
172 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173 return(NOEXCEPTION);
174 }
175 /*
176 * return quiet NaN
177 */
178 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179 return(NOEXCEPTION);
180 }
181 }
182
183 /*
184 * check second operand for NaN's or infinity
185 */
186 if (Dbl_isinfinity_exponent(opnd2p1)) {
187 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190 /*
191 * invalid since multiply operands are
192 * zero & infinity
193 */
194 if (Is_invalidtrap_enabled())
195 return(OPC_2E_INVALIDEXCEPTION);
196 Set_invalidflag();
197 Dbl_makequietnan(opnd2p1,opnd2p2);
198 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199 return(NOEXCEPTION);
200 }
201
202 /*
203 * Check third operand for infinity with a
204 * sign opposite of the multiply result
205 */
206 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208 /*
209 * invalid since attempting a magnitude
210 * subtraction of infinities
211 */
212 if (Is_invalidtrap_enabled())
213 return(OPC_2E_INVALIDEXCEPTION);
214 Set_invalidflag();
215 Dbl_makequietnan(resultp1,resultp2);
216 Dbl_copytoptr(resultp1,resultp2,dstptr);
217 return(NOEXCEPTION);
218 }
219
220 /*
221 * return infinity
222 */
223 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224 Dbl_copytoptr(resultp1,resultp2,dstptr);
225 return(NOEXCEPTION);
226 }
227 }
228 else {
229 /*
230 * is NaN; signaling or quiet?
231 */
232 if (Dbl_isone_signaling(opnd2p1)) {
233 /* trap if INVALIDTRAP enabled */
234 if (Is_invalidtrap_enabled())
235 return(OPC_2E_INVALIDEXCEPTION);
236 /* make NaN quiet */
237 Set_invalidflag();
238 Dbl_set_quiet(opnd2p1);
239 }
240 /*
241 * is third operand a signaling NaN?
242 */
243 else if (Dbl_is_signalingnan(opnd3p1)) {
244 /* trap if INVALIDTRAP enabled */
245 if (Is_invalidtrap_enabled())
246 return(OPC_2E_INVALIDEXCEPTION);
247 /* make NaN quiet */
248 Set_invalidflag();
249 Dbl_set_quiet(opnd3p1);
250 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251 return(NOEXCEPTION);
252 }
253 /*
254 * return quiet NaN
255 */
256 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257 return(NOEXCEPTION);
258 }
259 }
260
261 /*
262 * check third operand for NaN's or infinity
263 */
264 if (Dbl_isinfinity_exponent(opnd3p1)) {
265 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266 /* return infinity */
267 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268 return(NOEXCEPTION);
269 } else {
270 /*
271 * is NaN; signaling or quiet?
272 */
273 if (Dbl_isone_signaling(opnd3p1)) {
274 /* trap if INVALIDTRAP enabled */
275 if (Is_invalidtrap_enabled())
276 return(OPC_2E_INVALIDEXCEPTION);
277 /* make NaN quiet */
278 Set_invalidflag();
279 Dbl_set_quiet(opnd3p1);
280 }
281 /*
282 * return quiet NaN
283 */
284 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285 return(NOEXCEPTION);
286 }
287 }
288
289 /*
290 * Generate multiply mantissa
291 */
292 if (Dbl_isnotzero_exponent(opnd1p1)) {
293 /* set hidden bit */
294 Dbl_clear_signexponent_set_hidden(opnd1p1);
295 }
296 else {
297 /* check for zero */
298 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299 /*
300 * Perform the add opnd3 with zero here.
301 */
302 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303 if (Is_rounding_mode(ROUNDMINUS)) {
304 Dbl_or_signs(opnd3p1,resultp1);
305 } else {
306 Dbl_and_signs(opnd3p1,resultp1);
307 }
308 }
309 /*
310 * Now let's check for trapped underflow case.
311 */
312 else if (Dbl_iszero_exponent(opnd3p1) &&
313 Is_underflowtrap_enabled()) {
314 /* need to normalize results mantissa */
315 sign_save = Dbl_signextendedsign(opnd3p1);
316 result_exponent = 0;
317 Dbl_leftshiftby1(opnd3p1,opnd3p2);
318 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319 Dbl_set_sign(opnd3p1,/*using*/sign_save);
320 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321 unfl);
322 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323 /* inexact = FALSE */
324 return(OPC_2E_UNDERFLOWEXCEPTION);
325 }
326 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327 return(NOEXCEPTION);
328 }
329 /* is denormalized, adjust exponent */
330 Dbl_clear_signexponent(opnd1p1);
331 Dbl_leftshiftby1(opnd1p1,opnd1p2);
332 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333 }
334 /* opnd2 needs to have hidden bit set with msb in hidden bit */
335 if (Dbl_isnotzero_exponent(opnd2p1)) {
336 Dbl_clear_signexponent_set_hidden(opnd2p1);
337 }
338 else {
339 /* check for zero */
340 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341 /*
342 * Perform the add opnd3 with zero here.
343 */
344 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345 if (Is_rounding_mode(ROUNDMINUS)) {
346 Dbl_or_signs(opnd3p1,resultp1);
347 } else {
348 Dbl_and_signs(opnd3p1,resultp1);
349 }
350 }
351 /*
352 * Now let's check for trapped underflow case.
353 */
354 else if (Dbl_iszero_exponent(opnd3p1) &&
355 Is_underflowtrap_enabled()) {
356 /* need to normalize results mantissa */
357 sign_save = Dbl_signextendedsign(opnd3p1);
358 result_exponent = 0;
359 Dbl_leftshiftby1(opnd3p1,opnd3p2);
360 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361 Dbl_set_sign(opnd3p1,/*using*/sign_save);
362 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363 unfl);
364 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365 /* inexact = FALSE */
366 return(OPC_2E_UNDERFLOWEXCEPTION);
367 }
368 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369 return(NOEXCEPTION);
370 }
371 /* is denormalized; want to normalize */
372 Dbl_clear_signexponent(opnd2p1);
373 Dbl_leftshiftby1(opnd2p1,opnd2p2);
374 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375 }
376
377 /* Multiply the first two source mantissas together */
378
379 /*
380 * The intermediate result will be kept in tmpres,
381 * which needs enough room for 106 bits of mantissa,
382 * so lets call it a Double extended.
383 */
384 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385
386 /*
387 * Four bits at a time are inspected in each loop, and a
388 * simple shift and add multiply algorithm is used.
389 */
390 for (count = DBL_P-1; count >= 0; count -= 4) {
391 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392 if (Dbit28p2(opnd1p2)) {
393 /* Fourword_add should be an ADD followed by 3 ADDC's */
394 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
395 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396 }
397 if (Dbit29p2(opnd1p2)) {
398 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400 }
401 if (Dbit30p2(opnd1p2)) {
402 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404 }
405 if (Dbit31p2(opnd1p2)) {
406 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407 opnd2p1, opnd2p2, 0, 0);
408 }
409 Dbl_rightshiftby4(opnd1p1,opnd1p2);
410 }
411 if (Is_dexthiddenoverflow(tmpresp1)) {
412 /* result mantissa >= 2 (mantissa overflow) */
413 mpy_exponent++;
414 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415 }
416
417 /*
418 * Restore the sign of the mpy result which was saved in resultp1.
419 * The exponent will continue to be kept in mpy_exponent.
420 */
421 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422
423 /*
424 * No rounding is required, since the result of the multiply
425 * is exact in the extended format.
426 */
427
428 /*
429 * Now we are ready to perform the add portion of the operation.
430 *
431 * The exponents need to be kept as integers for now, since the
432 * multiply result might not fit into the exponent field. We
433 * can't overflow or underflow because of this yet, since the
434 * add could bring the final result back into range.
435 */
436 add_exponent = Dbl_exponent(opnd3p1);
437
438 /*
439 * Check for denormalized or zero add operand.
440 */
441 if (add_exponent == 0) {
442 /* check for zero */
443 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444 /* right is zero */
445 /* Left can't be zero and must be result.
446 *
447 * The final result is now in tmpres and mpy_exponent,
448 * and needs to be rounded and squeezed back into
449 * double precision format from double extended.
450 */
451 result_exponent = mpy_exponent;
452 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453 resultp1,resultp2,resultp3,resultp4);
454 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455 goto round;
456 }
457
458 /*
459 * Neither are zeroes.
460 * Adjust exponent and normalize add operand.
461 */
462 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
463 Dbl_clear_signexponent(opnd3p1);
464 Dbl_leftshiftby1(opnd3p1,opnd3p2);
465 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
467 } else {
468 Dbl_clear_exponent_set_hidden(opnd3p1);
469 }
470 /*
471 * Copy opnd3 to the double extended variable called right.
472 */
473 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474
475 /*
476 * A zero "save" helps discover equal operands (for later),
477 * and is used in swapping operands (if needed).
478 */
479 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480
481 /*
482 * Compare magnitude of operands.
483 */
484 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488 /*
489 * Set the left operand to the larger one by XOR swap.
490 * First finish the first word "save".
491 */
492 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495 rightp2,rightp3,rightp4);
496 /* also setup exponents used in rest of routine */
497 diff_exponent = add_exponent - mpy_exponent;
498 result_exponent = add_exponent;
499 } else {
500 /* also setup exponents used in rest of routine */
501 diff_exponent = mpy_exponent - add_exponent;
502 result_exponent = mpy_exponent;
503 }
504 /* Invariant: left is not smaller than right. */
505
506 /*
507 * Special case alignment of operands that would force alignment
508 * beyond the extent of the extension. A further optimization
509 * could special case this but only reduces the path length for
510 * this infrequent case.
511 */
512 if (diff_exponent > DBLEXT_THRESHOLD) {
513 diff_exponent = DBLEXT_THRESHOLD;
514 }
515
516 /* Align right operand by shifting it to the right */
517 Dblext_clear_sign(rightp1);
518 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519 /*shifted by*/diff_exponent);
520
521 /* Treat sum and difference of the operands separately. */
522 if ((int)save < 0) {
523 /*
524 * Difference of the two operands. Overflow can occur if the
525 * multiply overflowed. A borrow can occur out of the hidden
526 * bit and force a post normalization phase.
527 */
528 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529 rightp1,rightp2,rightp3,rightp4,
530 resultp1,resultp2,resultp3,resultp4);
531 sign_save = Dbl_signextendedsign(resultp1);
532 if (Dbl_iszero_hidden(resultp1)) {
533 /* Handle normalization */
534 /* A straight foward algorithm would now shift the
535 * result and extension left until the hidden bit
536 * becomes one. Not all of the extension bits need
537 * participate in the shift. Only the two most
538 * significant bits (round and guard) are needed.
539 * If only a single shift is needed then the guard
540 * bit becomes a significant low order bit and the
541 * extension must participate in the rounding.
542 * If more than a single shift is needed, then all
543 * bits to the right of the guard bit are zeros,
544 * and the guard bit may or may not be zero. */
545 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546 resultp4);
547
548 /* Need to check for a zero result. The sign and
549 * exponent fields have already been zeroed. The more
550 * efficient test of the full object can be used.
551 */
552 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553 /* Must have been "x-x" or "x+(-x)". */
554 if (Is_rounding_mode(ROUNDMINUS))
555 Dbl_setone_sign(resultp1);
556 Dbl_copytoptr(resultp1,resultp2,dstptr);
557 return(NOEXCEPTION);
558 }
559 result_exponent--;
560
561 /* Look to see if normalization is finished. */
562 if (Dbl_isone_hidden(resultp1)) {
563 /* No further normalization is needed */
564 goto round;
565 }
566
567 /* Discover first one bit to determine shift amount.
568 * Use a modified binary search. We have already
569 * shifted the result one position right and still
570 * not found a one so the remainder of the extension
571 * must be zero and simplifies rounding. */
572 /* Scan bytes */
573 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575 result_exponent -= 8;
576 }
577 /* Now narrow it down to the nibble */
578 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579 /* The lower nibble contains the
580 * normalizing one */
581 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582 result_exponent -= 4;
583 }
584 /* Select case where first bit is set (already
585 * normalized) otherwise select the proper shift. */
586 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587 if (jumpsize <= 7) switch(jumpsize) {
588 case 1:
589 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590 resultp4);
591 result_exponent -= 3;
592 break;
593 case 2:
594 case 3:
595 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596 resultp4);
597 result_exponent -= 2;
598 break;
599 case 4:
600 case 5:
601 case 6:
602 case 7:
603 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604 resultp4);
605 result_exponent -= 1;
606 break;
607 }
608 } /* end if (hidden...)... */
609 /* Fall through and round */
610 } /* end if (save < 0)... */
611 else {
612 /* Add magnitudes */
613 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614 rightp1,rightp2,rightp3,rightp4,
615 /*to*/resultp1,resultp2,resultp3,resultp4);
616 sign_save = Dbl_signextendedsign(resultp1);
617 if (Dbl_isone_hiddenoverflow(resultp1)) {
618 /* Prenormalization required. */
619 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620 resultp4);
621 result_exponent++;
622 } /* end if hiddenoverflow... */
623 } /* end else ...add magnitudes... */
624
625 /* Round the result. If the extension and lower two words are
626 * all zeros, then the result is exact. Otherwise round in the
627 * correct direction. Underflow is possible. If a postnormalization
628 * is necessary, then the mantissa is all zeros so no shift is needed.
629 */
630 round:
631 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633 result_exponent,is_tiny);
634 }
635 Dbl_set_sign(resultp1,/*using*/sign_save);
636 if (Dblext_isnotzero_mantissap3(resultp3) ||
637 Dblext_isnotzero_mantissap4(resultp4)) {
638 inexact = TRUE;
639 switch(Rounding_mode()) {
640 case ROUNDNEAREST: /* The default. */
641 if (Dblext_isone_highp3(resultp3)) {
642 /* at least 1/2 ulp */
643 if (Dblext_isnotzero_low31p3(resultp3) ||
644 Dblext_isnotzero_mantissap4(resultp4) ||
645 Dblext_isone_lowp2(resultp2)) {
646 /* either exactly half way and odd or
647 * more than 1/2ulp */
648 Dbl_increment(resultp1,resultp2);
649 }
650 }
651 break;
652
653 case ROUNDPLUS:
654 if (Dbl_iszero_sign(resultp1)) {
655 /* Round up positive results */
656 Dbl_increment(resultp1,resultp2);
657 }
658 break;
659
660 case ROUNDMINUS:
661 if (Dbl_isone_sign(resultp1)) {
662 /* Round down negative results */
663 Dbl_increment(resultp1,resultp2);
664 }
665
666 case ROUNDZERO:;
667 /* truncate is simple */
668 } /* end switch... */
669 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670 }
671 if (result_exponent >= DBL_INFINITY_EXPONENT) {
672 /* trap if OVERFLOWTRAP enabled */
673 if (Is_overflowtrap_enabled()) {
674 /*
675 * Adjust bias of result
676 */
677 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678 Dbl_copytoptr(resultp1,resultp2,dstptr);
679 if (inexact)
680 if (Is_inexacttrap_enabled())
681 return (OPC_2E_OVERFLOWEXCEPTION |
682 OPC_2E_INEXACTEXCEPTION);
683 else Set_inexactflag();
684 return (OPC_2E_OVERFLOWEXCEPTION);
685 }
686 inexact = TRUE;
687 Set_overflowflag();
688 /* set result to infinity or largest number */
689 Dbl_setoverflow(resultp1,resultp2);
690
691 } else if (result_exponent <= 0) { /* underflow case */
692 if (Is_underflowtrap_enabled()) {
693 /*
694 * Adjust bias of result
695 */
696 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697 Dbl_copytoptr(resultp1,resultp2,dstptr);
698 if (inexact)
699 if (Is_inexacttrap_enabled())
700 return (OPC_2E_UNDERFLOWEXCEPTION |
701 OPC_2E_INEXACTEXCEPTION);
702 else Set_inexactflag();
703 return(OPC_2E_UNDERFLOWEXCEPTION);
704 }
705 else if (inexact && is_tiny) Set_underflowflag();
706 }
707 else Dbl_set_exponent(resultp1,result_exponent);
708 Dbl_copytoptr(resultp1,resultp2,dstptr);
709 if (inexact)
710 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711 else Set_inexactflag();
712 return(NOEXCEPTION);
713}
714
715/*
716 * Double Floating-point Multiply Negate Fused Add
717 */
718
719dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720
721dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722unsigned int *status;
723{
724 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726 unsigned int rightp1, rightp2, rightp3, rightp4;
727 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728 register int mpy_exponent, add_exponent, count;
729 boolean inexact = FALSE, is_tiny = FALSE;
730
731 unsigned int signlessleft1, signlessright1, save;
732 register int result_exponent, diff_exponent;
733 int sign_save, jumpsize;
734
735 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738
739 /*
740 * set sign bit of result of multiply
741 */
742 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
743 Dbl_setzerop1(resultp1);
744 else
745 Dbl_setnegativezerop1(resultp1);
746
747 /*
748 * Generate multiply exponent
749 */
750 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751
752 /*
753 * check first operand for NaN's or infinity
754 */
755 if (Dbl_isinfinity_exponent(opnd1p1)) {
756 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758 Dbl_isnotnan(opnd3p1,opnd3p2)) {
759 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760 /*
761 * invalid since operands are infinity
762 * and zero
763 */
764 if (Is_invalidtrap_enabled())
765 return(OPC_2E_INVALIDEXCEPTION);
766 Set_invalidflag();
767 Dbl_makequietnan(resultp1,resultp2);
768 Dbl_copytoptr(resultp1,resultp2,dstptr);
769 return(NOEXCEPTION);
770 }
771 /*
772 * Check third operand for infinity with a
773 * sign opposite of the multiply result
774 */
775 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777 /*
778 * invalid since attempting a magnitude
779 * subtraction of infinities
780 */
781 if (Is_invalidtrap_enabled())
782 return(OPC_2E_INVALIDEXCEPTION);
783 Set_invalidflag();
784 Dbl_makequietnan(resultp1,resultp2);
785 Dbl_copytoptr(resultp1,resultp2,dstptr);
786 return(NOEXCEPTION);
787 }
788
789 /*
790 * return infinity
791 */
792 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793 Dbl_copytoptr(resultp1,resultp2,dstptr);
794 return(NOEXCEPTION);
795 }
796 }
797 else {
798 /*
799 * is NaN; signaling or quiet?
800 */
801 if (Dbl_isone_signaling(opnd1p1)) {
802 /* trap if INVALIDTRAP enabled */
803 if (Is_invalidtrap_enabled())
804 return(OPC_2E_INVALIDEXCEPTION);
805 /* make NaN quiet */
806 Set_invalidflag();
807 Dbl_set_quiet(opnd1p1);
808 }
809 /*
810 * is second operand a signaling NaN?
811 */
812 else if (Dbl_is_signalingnan(opnd2p1)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION);
816 /* make NaN quiet */
817 Set_invalidflag();
818 Dbl_set_quiet(opnd2p1);
819 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820 return(NOEXCEPTION);
821 }
822 /*
823 * is third operand a signaling NaN?
824 */
825 else if (Dbl_is_signalingnan(opnd3p1)) {
826 /* trap if INVALIDTRAP enabled */
827 if (Is_invalidtrap_enabled())
828 return(OPC_2E_INVALIDEXCEPTION);
829 /* make NaN quiet */
830 Set_invalidflag();
831 Dbl_set_quiet(opnd3p1);
832 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833 return(NOEXCEPTION);
834 }
835 /*
836 * return quiet NaN
837 */
838 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839 return(NOEXCEPTION);
840 }
841 }
842
843 /*
844 * check second operand for NaN's or infinity
845 */
846 if (Dbl_isinfinity_exponent(opnd2p1)) {
847 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850 /*
851 * invalid since multiply operands are
852 * zero & infinity
853 */
854 if (Is_invalidtrap_enabled())
855 return(OPC_2E_INVALIDEXCEPTION);
856 Set_invalidflag();
857 Dbl_makequietnan(opnd2p1,opnd2p2);
858 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859 return(NOEXCEPTION);
860 }
861
862 /*
863 * Check third operand for infinity with a
864 * sign opposite of the multiply result
865 */
866 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868 /*
869 * invalid since attempting a magnitude
870 * subtraction of infinities
871 */
872 if (Is_invalidtrap_enabled())
873 return(OPC_2E_INVALIDEXCEPTION);
874 Set_invalidflag();
875 Dbl_makequietnan(resultp1,resultp2);
876 Dbl_copytoptr(resultp1,resultp2,dstptr);
877 return(NOEXCEPTION);
878 }
879
880 /*
881 * return infinity
882 */
883 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884 Dbl_copytoptr(resultp1,resultp2,dstptr);
885 return(NOEXCEPTION);
886 }
887 }
888 else {
889 /*
890 * is NaN; signaling or quiet?
891 */
892 if (Dbl_isone_signaling(opnd2p1)) {
893 /* trap if INVALIDTRAP enabled */
894 if (Is_invalidtrap_enabled())
895 return(OPC_2E_INVALIDEXCEPTION);
896 /* make NaN quiet */
897 Set_invalidflag();
898 Dbl_set_quiet(opnd2p1);
899 }
900 /*
901 * is third operand a signaling NaN?
902 */
903 else if (Dbl_is_signalingnan(opnd3p1)) {
904 /* trap if INVALIDTRAP enabled */
905 if (Is_invalidtrap_enabled())
906 return(OPC_2E_INVALIDEXCEPTION);
907 /* make NaN quiet */
908 Set_invalidflag();
909 Dbl_set_quiet(opnd3p1);
910 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911 return(NOEXCEPTION);
912 }
913 /*
914 * return quiet NaN
915 */
916 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917 return(NOEXCEPTION);
918 }
919 }
920
921 /*
922 * check third operand for NaN's or infinity
923 */
924 if (Dbl_isinfinity_exponent(opnd3p1)) {
925 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926 /* return infinity */
927 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928 return(NOEXCEPTION);
929 } else {
930 /*
931 * is NaN; signaling or quiet?
932 */
933 if (Dbl_isone_signaling(opnd3p1)) {
934 /* trap if INVALIDTRAP enabled */
935 if (Is_invalidtrap_enabled())
936 return(OPC_2E_INVALIDEXCEPTION);
937 /* make NaN quiet */
938 Set_invalidflag();
939 Dbl_set_quiet(opnd3p1);
940 }
941 /*
942 * return quiet NaN
943 */
944 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945 return(NOEXCEPTION);
946 }
947 }
948
949 /*
950 * Generate multiply mantissa
951 */
952 if (Dbl_isnotzero_exponent(opnd1p1)) {
953 /* set hidden bit */
954 Dbl_clear_signexponent_set_hidden(opnd1p1);
955 }
956 else {
957 /* check for zero */
958 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959 /*
960 * Perform the add opnd3 with zero here.
961 */
962 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963 if (Is_rounding_mode(ROUNDMINUS)) {
964 Dbl_or_signs(opnd3p1,resultp1);
965 } else {
966 Dbl_and_signs(opnd3p1,resultp1);
967 }
968 }
969 /*
970 * Now let's check for trapped underflow case.
971 */
972 else if (Dbl_iszero_exponent(opnd3p1) &&
973 Is_underflowtrap_enabled()) {
974 /* need to normalize results mantissa */
975 sign_save = Dbl_signextendedsign(opnd3p1);
976 result_exponent = 0;
977 Dbl_leftshiftby1(opnd3p1,opnd3p2);
978 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979 Dbl_set_sign(opnd3p1,/*using*/sign_save);
980 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981 unfl);
982 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983 /* inexact = FALSE */
984 return(OPC_2E_UNDERFLOWEXCEPTION);
985 }
986 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987 return(NOEXCEPTION);
988 }
989 /* is denormalized, adjust exponent */
990 Dbl_clear_signexponent(opnd1p1);
991 Dbl_leftshiftby1(opnd1p1,opnd1p2);
992 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993 }
994 /* opnd2 needs to have hidden bit set with msb in hidden bit */
995 if (Dbl_isnotzero_exponent(opnd2p1)) {
996 Dbl_clear_signexponent_set_hidden(opnd2p1);
997 }
998 else {
999 /* check for zero */
1000 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001 /*
1002 * Perform the add opnd3 with zero here.
1003 */
1004 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005 if (Is_rounding_mode(ROUNDMINUS)) {
1006 Dbl_or_signs(opnd3p1,resultp1);
1007 } else {
1008 Dbl_and_signs(opnd3p1,resultp1);
1009 }
1010 }
1011 /*
1012 * Now let's check for trapped underflow case.
1013 */
1014 else if (Dbl_iszero_exponent(opnd3p1) &&
1015 Is_underflowtrap_enabled()) {
1016 /* need to normalize results mantissa */
1017 sign_save = Dbl_signextendedsign(opnd3p1);
1018 result_exponent = 0;
1019 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023 unfl);
1024 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025 /* inexact = FALSE */
1026 return(OPC_2E_UNDERFLOWEXCEPTION);
1027 }
1028 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029 return(NOEXCEPTION);
1030 }
1031 /* is denormalized; want to normalize */
1032 Dbl_clear_signexponent(opnd2p1);
1033 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035 }
1036
1037 /* Multiply the first two source mantissas together */
1038
1039 /*
1040 * The intermediate result will be kept in tmpres,
1041 * which needs enough room for 106 bits of mantissa,
1042 * so lets call it a Double extended.
1043 */
1044 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046 /*
1047 * Four bits at a time are inspected in each loop, and a
1048 * simple shift and add multiply algorithm is used.
1049 */
1050 for (count = DBL_P-1; count >= 0; count -= 4) {
1051 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052 if (Dbit28p2(opnd1p2)) {
1053 /* Fourword_add should be an ADD followed by 3 ADDC's */
1054 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1055 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056 }
1057 if (Dbit29p2(opnd1p2)) {
1058 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060 }
1061 if (Dbit30p2(opnd1p2)) {
1062 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064 }
1065 if (Dbit31p2(opnd1p2)) {
1066 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067 opnd2p1, opnd2p2, 0, 0);
1068 }
1069 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070 }
1071 if (Is_dexthiddenoverflow(tmpresp1)) {
1072 /* result mantissa >= 2 (mantissa overflow) */
1073 mpy_exponent++;
1074 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075 }
1076
1077 /*
1078 * Restore the sign of the mpy result which was saved in resultp1.
1079 * The exponent will continue to be kept in mpy_exponent.
1080 */
1081 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083 /*
1084 * No rounding is required, since the result of the multiply
1085 * is exact in the extended format.
1086 */
1087
1088 /*
1089 * Now we are ready to perform the add portion of the operation.
1090 *
1091 * The exponents need to be kept as integers for now, since the
1092 * multiply result might not fit into the exponent field. We
1093 * can't overflow or underflow because of this yet, since the
1094 * add could bring the final result back into range.
1095 */
1096 add_exponent = Dbl_exponent(opnd3p1);
1097
1098 /*
1099 * Check for denormalized or zero add operand.
1100 */
1101 if (add_exponent == 0) {
1102 /* check for zero */
1103 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104 /* right is zero */
1105 /* Left can't be zero and must be result.
1106 *
1107 * The final result is now in tmpres and mpy_exponent,
1108 * and needs to be rounded and squeezed back into
1109 * double precision format from double extended.
1110 */
1111 result_exponent = mpy_exponent;
1112 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113 resultp1,resultp2,resultp3,resultp4);
1114 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115 goto round;
1116 }
1117
1118 /*
1119 * Neither are zeroes.
1120 * Adjust exponent and normalize add operand.
1121 */
1122 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1123 Dbl_clear_signexponent(opnd3p1);
1124 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1127 } else {
1128 Dbl_clear_exponent_set_hidden(opnd3p1);
1129 }
1130 /*
1131 * Copy opnd3 to the double extended variable called right.
1132 */
1133 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135 /*
1136 * A zero "save" helps discover equal operands (for later),
1137 * and is used in swapping operands (if needed).
1138 */
1139 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141 /*
1142 * Compare magnitude of operands.
1143 */
1144 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148 /*
1149 * Set the left operand to the larger one by XOR swap.
1150 * First finish the first word "save".
1151 */
1152 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155 rightp2,rightp3,rightp4);
1156 /* also setup exponents used in rest of routine */
1157 diff_exponent = add_exponent - mpy_exponent;
1158 result_exponent = add_exponent;
1159 } else {
1160 /* also setup exponents used in rest of routine */
1161 diff_exponent = mpy_exponent - add_exponent;
1162 result_exponent = mpy_exponent;
1163 }
1164 /* Invariant: left is not smaller than right. */
1165
1166 /*
1167 * Special case alignment of operands that would force alignment
1168 * beyond the extent of the extension. A further optimization
1169 * could special case this but only reduces the path length for
1170 * this infrequent case.
1171 */
1172 if (diff_exponent > DBLEXT_THRESHOLD) {
1173 diff_exponent = DBLEXT_THRESHOLD;
1174 }
1175
1176 /* Align right operand by shifting it to the right */
1177 Dblext_clear_sign(rightp1);
1178 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179 /*shifted by*/diff_exponent);
1180
1181 /* Treat sum and difference of the operands separately. */
1182 if ((int)save < 0) {
1183 /*
1184 * Difference of the two operands. Overflow can occur if the
1185 * multiply overflowed. A borrow can occur out of the hidden
1186 * bit and force a post normalization phase.
1187 */
1188 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189 rightp1,rightp2,rightp3,rightp4,
1190 resultp1,resultp2,resultp3,resultp4);
1191 sign_save = Dbl_signextendedsign(resultp1);
1192 if (Dbl_iszero_hidden(resultp1)) {
1193 /* Handle normalization */
1194 /* A straight foward algorithm would now shift the
1195 * result and extension left until the hidden bit
1196 * becomes one. Not all of the extension bits need
1197 * participate in the shift. Only the two most
1198 * significant bits (round and guard) are needed.
1199 * If only a single shift is needed then the guard
1200 * bit becomes a significant low order bit and the
1201 * extension must participate in the rounding.
1202 * If more than a single shift is needed, then all
1203 * bits to the right of the guard bit are zeros,
1204 * and the guard bit may or may not be zero. */
1205 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206 resultp4);
1207
1208 /* Need to check for a zero result. The sign and
1209 * exponent fields have already been zeroed. The more
1210 * efficient test of the full object can be used.
1211 */
1212 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213 /* Must have been "x-x" or "x+(-x)". */
1214 if (Is_rounding_mode(ROUNDMINUS))
1215 Dbl_setone_sign(resultp1);
1216 Dbl_copytoptr(resultp1,resultp2,dstptr);
1217 return(NOEXCEPTION);
1218 }
1219 result_exponent--;
1220
1221 /* Look to see if normalization is finished. */
1222 if (Dbl_isone_hidden(resultp1)) {
1223 /* No further normalization is needed */
1224 goto round;
1225 }
1226
1227 /* Discover first one bit to determine shift amount.
1228 * Use a modified binary search. We have already
1229 * shifted the result one position right and still
1230 * not found a one so the remainder of the extension
1231 * must be zero and simplifies rounding. */
1232 /* Scan bytes */
1233 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235 result_exponent -= 8;
1236 }
1237 /* Now narrow it down to the nibble */
1238 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239 /* The lower nibble contains the
1240 * normalizing one */
1241 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242 result_exponent -= 4;
1243 }
1244 /* Select case where first bit is set (already
1245 * normalized) otherwise select the proper shift. */
1246 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247 if (jumpsize <= 7) switch(jumpsize) {
1248 case 1:
1249 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250 resultp4);
1251 result_exponent -= 3;
1252 break;
1253 case 2:
1254 case 3:
1255 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256 resultp4);
1257 result_exponent -= 2;
1258 break;
1259 case 4:
1260 case 5:
1261 case 6:
1262 case 7:
1263 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264 resultp4);
1265 result_exponent -= 1;
1266 break;
1267 }
1268 } /* end if (hidden...)... */
1269 /* Fall through and round */
1270 } /* end if (save < 0)... */
1271 else {
1272 /* Add magnitudes */
1273 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274 rightp1,rightp2,rightp3,rightp4,
1275 /*to*/resultp1,resultp2,resultp3,resultp4);
1276 sign_save = Dbl_signextendedsign(resultp1);
1277 if (Dbl_isone_hiddenoverflow(resultp1)) {
1278 /* Prenormalization required. */
1279 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280 resultp4);
1281 result_exponent++;
1282 } /* end if hiddenoverflow... */
1283 } /* end else ...add magnitudes... */
1284
1285 /* Round the result. If the extension and lower two words are
1286 * all zeros, then the result is exact. Otherwise round in the
1287 * correct direction. Underflow is possible. If a postnormalization
1288 * is necessary, then the mantissa is all zeros so no shift is needed.
1289 */
1290 round:
1291 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293 result_exponent,is_tiny);
1294 }
1295 Dbl_set_sign(resultp1,/*using*/sign_save);
1296 if (Dblext_isnotzero_mantissap3(resultp3) ||
1297 Dblext_isnotzero_mantissap4(resultp4)) {
1298 inexact = TRUE;
1299 switch(Rounding_mode()) {
1300 case ROUNDNEAREST: /* The default. */
1301 if (Dblext_isone_highp3(resultp3)) {
1302 /* at least 1/2 ulp */
1303 if (Dblext_isnotzero_low31p3(resultp3) ||
1304 Dblext_isnotzero_mantissap4(resultp4) ||
1305 Dblext_isone_lowp2(resultp2)) {
1306 /* either exactly half way and odd or
1307 * more than 1/2ulp */
1308 Dbl_increment(resultp1,resultp2);
1309 }
1310 }
1311 break;
1312
1313 case ROUNDPLUS:
1314 if (Dbl_iszero_sign(resultp1)) {
1315 /* Round up positive results */
1316 Dbl_increment(resultp1,resultp2);
1317 }
1318 break;
1319
1320 case ROUNDMINUS:
1321 if (Dbl_isone_sign(resultp1)) {
1322 /* Round down negative results */
1323 Dbl_increment(resultp1,resultp2);
1324 }
1325
1326 case ROUNDZERO:;
1327 /* truncate is simple */
1328 } /* end switch... */
1329 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330 }
1331 if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332 /* Overflow */
1333 if (Is_overflowtrap_enabled()) {
1334 /*
1335 * Adjust bias of result
1336 */
1337 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338 Dbl_copytoptr(resultp1,resultp2,dstptr);
1339 if (inexact)
1340 if (Is_inexacttrap_enabled())
1341 return (OPC_2E_OVERFLOWEXCEPTION |
1342 OPC_2E_INEXACTEXCEPTION);
1343 else Set_inexactflag();
1344 return (OPC_2E_OVERFLOWEXCEPTION);
1345 }
1346 inexact = TRUE;
1347 Set_overflowflag();
1348 Dbl_setoverflow(resultp1,resultp2);
1349 } else if (result_exponent <= 0) { /* underflow case */
1350 if (Is_underflowtrap_enabled()) {
1351 /*
1352 * Adjust bias of result
1353 */
1354 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355 Dbl_copytoptr(resultp1,resultp2,dstptr);
1356 if (inexact)
1357 if (Is_inexacttrap_enabled())
1358 return (OPC_2E_UNDERFLOWEXCEPTION |
1359 OPC_2E_INEXACTEXCEPTION);
1360 else Set_inexactflag();
1361 return(OPC_2E_UNDERFLOWEXCEPTION);
1362 }
1363 else if (inexact && is_tiny) Set_underflowflag();
1364 }
1365 else Dbl_set_exponent(resultp1,result_exponent);
1366 Dbl_copytoptr(resultp1,resultp2,dstptr);
1367 if (inexact)
1368 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369 else Set_inexactflag();
1370 return(NOEXCEPTION);
1371}
1372
1373/*
1374 * Single Floating-point Multiply Fused Add
1375 */
1376
1377sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380unsigned int *status;
1381{
1382 unsigned int opnd1, opnd2, opnd3;
1383 register unsigned int tmpresp1, tmpresp2;
1384 unsigned int rightp1, rightp2;
1385 unsigned int resultp1, resultp2 = 0;
1386 register int mpy_exponent, add_exponent, count;
1387 boolean inexact = FALSE, is_tiny = FALSE;
1388
1389 unsigned int signlessleft1, signlessright1, save;
1390 register int result_exponent, diff_exponent;
1391 int sign_save, jumpsize;
1392
1393 Sgl_copyfromptr(src1ptr,opnd1);
1394 Sgl_copyfromptr(src2ptr,opnd2);
1395 Sgl_copyfromptr(src3ptr,opnd3);
1396
1397 /*
1398 * set sign bit of result of multiply
1399 */
1400 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1401 Sgl_setnegativezero(resultp1);
1402 else Sgl_setzero(resultp1);
1403
1404 /*
1405 * Generate multiply exponent
1406 */
1407 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409 /*
1410 * check first operand for NaN's or infinity
1411 */
1412 if (Sgl_isinfinity_exponent(opnd1)) {
1413 if (Sgl_iszero_mantissa(opnd1)) {
1414 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415 if (Sgl_iszero_exponentmantissa(opnd2)) {
1416 /*
1417 * invalid since operands are infinity
1418 * and zero
1419 */
1420 if (Is_invalidtrap_enabled())
1421 return(OPC_2E_INVALIDEXCEPTION);
1422 Set_invalidflag();
1423 Sgl_makequietnan(resultp1);
1424 Sgl_copytoptr(resultp1,dstptr);
1425 return(NOEXCEPTION);
1426 }
1427 /*
1428 * Check third operand for infinity with a
1429 * sign opposite of the multiply result
1430 */
1431 if (Sgl_isinfinity(opnd3) &&
1432 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433 /*
1434 * invalid since attempting a magnitude
1435 * subtraction of infinities
1436 */
1437 if (Is_invalidtrap_enabled())
1438 return(OPC_2E_INVALIDEXCEPTION);
1439 Set_invalidflag();
1440 Sgl_makequietnan(resultp1);
1441 Sgl_copytoptr(resultp1,dstptr);
1442 return(NOEXCEPTION);
1443 }
1444
1445 /*
1446 * return infinity
1447 */
1448 Sgl_setinfinity_exponentmantissa(resultp1);
1449 Sgl_copytoptr(resultp1,dstptr);
1450 return(NOEXCEPTION);
1451 }
1452 }
1453 else {
1454 /*
1455 * is NaN; signaling or quiet?
1456 */
1457 if (Sgl_isone_signaling(opnd1)) {
1458 /* trap if INVALIDTRAP enabled */
1459 if (Is_invalidtrap_enabled())
1460 return(OPC_2E_INVALIDEXCEPTION);
1461 /* make NaN quiet */
1462 Set_invalidflag();
1463 Sgl_set_quiet(opnd1);
1464 }
1465 /*
1466 * is second operand a signaling NaN?
1467 */
1468 else if (Sgl_is_signalingnan(opnd2)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION);
1472 /* make NaN quiet */
1473 Set_invalidflag();
1474 Sgl_set_quiet(opnd2);
1475 Sgl_copytoptr(opnd2,dstptr);
1476 return(NOEXCEPTION);
1477 }
1478 /*
1479 * is third operand a signaling NaN?
1480 */
1481 else if (Sgl_is_signalingnan(opnd3)) {
1482 /* trap if INVALIDTRAP enabled */
1483 if (Is_invalidtrap_enabled())
1484 return(OPC_2E_INVALIDEXCEPTION);
1485 /* make NaN quiet */
1486 Set_invalidflag();
1487 Sgl_set_quiet(opnd3);
1488 Sgl_copytoptr(opnd3,dstptr);
1489 return(NOEXCEPTION);
1490 }
1491 /*
1492 * return quiet NaN
1493 */
1494 Sgl_copytoptr(opnd1,dstptr);
1495 return(NOEXCEPTION);
1496 }
1497 }
1498
1499 /*
1500 * check second operand for NaN's or infinity
1501 */
1502 if (Sgl_isinfinity_exponent(opnd2)) {
1503 if (Sgl_iszero_mantissa(opnd2)) {
1504 if (Sgl_isnotnan(opnd3)) {
1505 if (Sgl_iszero_exponentmantissa(opnd1)) {
1506 /*
1507 * invalid since multiply operands are
1508 * zero & infinity
1509 */
1510 if (Is_invalidtrap_enabled())
1511 return(OPC_2E_INVALIDEXCEPTION);
1512 Set_invalidflag();
1513 Sgl_makequietnan(opnd2);
1514 Sgl_copytoptr(opnd2,dstptr);
1515 return(NOEXCEPTION);
1516 }
1517
1518 /*
1519 * Check third operand for infinity with a
1520 * sign opposite of the multiply result
1521 */
1522 if (Sgl_isinfinity(opnd3) &&
1523 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524 /*
1525 * invalid since attempting a magnitude
1526 * subtraction of infinities
1527 */
1528 if (Is_invalidtrap_enabled())
1529 return(OPC_2E_INVALIDEXCEPTION);
1530 Set_invalidflag();
1531 Sgl_makequietnan(resultp1);
1532 Sgl_copytoptr(resultp1,dstptr);
1533 return(NOEXCEPTION);
1534 }
1535
1536 /*
1537 * return infinity
1538 */
1539 Sgl_setinfinity_exponentmantissa(resultp1);
1540 Sgl_copytoptr(resultp1,dstptr);
1541 return(NOEXCEPTION);
1542 }
1543 }
1544 else {
1545 /*
1546 * is NaN; signaling or quiet?
1547 */
1548 if (Sgl_isone_signaling(opnd2)) {
1549 /* trap if INVALIDTRAP enabled */
1550 if (Is_invalidtrap_enabled())
1551 return(OPC_2E_INVALIDEXCEPTION);
1552 /* make NaN quiet */
1553 Set_invalidflag();
1554 Sgl_set_quiet(opnd2);
1555 }
1556 /*
1557 * is third operand a signaling NaN?
1558 */
1559 else if (Sgl_is_signalingnan(opnd3)) {
1560 /* trap if INVALIDTRAP enabled */
1561 if (Is_invalidtrap_enabled())
1562 return(OPC_2E_INVALIDEXCEPTION);
1563 /* make NaN quiet */
1564 Set_invalidflag();
1565 Sgl_set_quiet(opnd3);
1566 Sgl_copytoptr(opnd3,dstptr);
1567 return(NOEXCEPTION);
1568 }
1569 /*
1570 * return quiet NaN
1571 */
1572 Sgl_copytoptr(opnd2,dstptr);
1573 return(NOEXCEPTION);
1574 }
1575 }
1576
1577 /*
1578 * check third operand for NaN's or infinity
1579 */
1580 if (Sgl_isinfinity_exponent(opnd3)) {
1581 if (Sgl_iszero_mantissa(opnd3)) {
1582 /* return infinity */
1583 Sgl_copytoptr(opnd3,dstptr);
1584 return(NOEXCEPTION);
1585 } else {
1586 /*
1587 * is NaN; signaling or quiet?
1588 */
1589 if (Sgl_isone_signaling(opnd3)) {
1590 /* trap if INVALIDTRAP enabled */
1591 if (Is_invalidtrap_enabled())
1592 return(OPC_2E_INVALIDEXCEPTION);
1593 /* make NaN quiet */
1594 Set_invalidflag();
1595 Sgl_set_quiet(opnd3);
1596 }
1597 /*
1598 * return quiet NaN
1599 */
1600 Sgl_copytoptr(opnd3,dstptr);
1601 return(NOEXCEPTION);
1602 }
1603 }
1604
1605 /*
1606 * Generate multiply mantissa
1607 */
1608 if (Sgl_isnotzero_exponent(opnd1)) {
1609 /* set hidden bit */
1610 Sgl_clear_signexponent_set_hidden(opnd1);
1611 }
1612 else {
1613 /* check for zero */
1614 if (Sgl_iszero_mantissa(opnd1)) {
1615 /*
1616 * Perform the add opnd3 with zero here.
1617 */
1618 if (Sgl_iszero_exponentmantissa(opnd3)) {
1619 if (Is_rounding_mode(ROUNDMINUS)) {
1620 Sgl_or_signs(opnd3,resultp1);
1621 } else {
1622 Sgl_and_signs(opnd3,resultp1);
1623 }
1624 }
1625 /*
1626 * Now let's check for trapped underflow case.
1627 */
1628 else if (Sgl_iszero_exponent(opnd3) &&
1629 Is_underflowtrap_enabled()) {
1630 /* need to normalize results mantissa */
1631 sign_save = Sgl_signextendedsign(opnd3);
1632 result_exponent = 0;
1633 Sgl_leftshiftby1(opnd3);
1634 Sgl_normalize(opnd3,result_exponent);
1635 Sgl_set_sign(opnd3,/*using*/sign_save);
1636 Sgl_setwrapped_exponent(opnd3,result_exponent,
1637 unfl);
1638 Sgl_copytoptr(opnd3,dstptr);
1639 /* inexact = FALSE */
1640 return(OPC_2E_UNDERFLOWEXCEPTION);
1641 }
1642 Sgl_copytoptr(opnd3,dstptr);
1643 return(NOEXCEPTION);
1644 }
1645 /* is denormalized, adjust exponent */
1646 Sgl_clear_signexponent(opnd1);
1647 Sgl_leftshiftby1(opnd1);
1648 Sgl_normalize(opnd1,mpy_exponent);
1649 }
1650 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651 if (Sgl_isnotzero_exponent(opnd2)) {
1652 Sgl_clear_signexponent_set_hidden(opnd2);
1653 }
1654 else {
1655 /* check for zero */
1656 if (Sgl_iszero_mantissa(opnd2)) {
1657 /*
1658 * Perform the add opnd3 with zero here.
1659 */
1660 if (Sgl_iszero_exponentmantissa(opnd3)) {
1661 if (Is_rounding_mode(ROUNDMINUS)) {
1662 Sgl_or_signs(opnd3,resultp1);
1663 } else {
1664 Sgl_and_signs(opnd3,resultp1);
1665 }
1666 }
1667 /*
1668 * Now let's check for trapped underflow case.
1669 */
1670 else if (Sgl_iszero_exponent(opnd3) &&
1671 Is_underflowtrap_enabled()) {
1672 /* need to normalize results mantissa */
1673 sign_save = Sgl_signextendedsign(opnd3);
1674 result_exponent = 0;
1675 Sgl_leftshiftby1(opnd3);
1676 Sgl_normalize(opnd3,result_exponent);
1677 Sgl_set_sign(opnd3,/*using*/sign_save);
1678 Sgl_setwrapped_exponent(opnd3,result_exponent,
1679 unfl);
1680 Sgl_copytoptr(opnd3,dstptr);
1681 /* inexact = FALSE */
1682 return(OPC_2E_UNDERFLOWEXCEPTION);
1683 }
1684 Sgl_copytoptr(opnd3,dstptr);
1685 return(NOEXCEPTION);
1686 }
1687 /* is denormalized; want to normalize */
1688 Sgl_clear_signexponent(opnd2);
1689 Sgl_leftshiftby1(opnd2);
1690 Sgl_normalize(opnd2,mpy_exponent);
1691 }
1692
1693 /* Multiply the first two source mantissas together */
1694
1695 /*
1696 * The intermediate result will be kept in tmpres,
1697 * which needs enough room for 106 bits of mantissa,
1698 * so lets call it a Double extended.
1699 */
1700 Sglext_setzero(tmpresp1,tmpresp2);
1701
1702 /*
1703 * Four bits at a time are inspected in each loop, and a
1704 * simple shift and add multiply algorithm is used.
1705 */
1706 for (count = SGL_P-1; count >= 0; count -= 4) {
1707 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708 if (Sbit28(opnd1)) {
1709 /* Twoword_add should be an ADD followed by 2 ADDC's */
1710 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711 }
1712 if (Sbit29(opnd1)) {
1713 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714 }
1715 if (Sbit30(opnd1)) {
1716 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717 }
1718 if (Sbit31(opnd1)) {
1719 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720 }
1721 Sgl_rightshiftby4(opnd1);
1722 }
1723 if (Is_sexthiddenoverflow(tmpresp1)) {
1724 /* result mantissa >= 2 (mantissa overflow) */
1725 mpy_exponent++;
1726 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727 } else {
1728 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729 }
1730
1731 /*
1732 * Restore the sign of the mpy result which was saved in resultp1.
1733 * The exponent will continue to be kept in mpy_exponent.
1734 */
1735 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737 /*
1738 * No rounding is required, since the result of the multiply
1739 * is exact in the extended format.
1740 */
1741
1742 /*
1743 * Now we are ready to perform the add portion of the operation.
1744 *
1745 * The exponents need to be kept as integers for now, since the
1746 * multiply result might not fit into the exponent field. We
1747 * can't overflow or underflow because of this yet, since the
1748 * add could bring the final result back into range.
1749 */
1750 add_exponent = Sgl_exponent(opnd3);
1751
1752 /*
1753 * Check for denormalized or zero add operand.
1754 */
1755 if (add_exponent == 0) {
1756 /* check for zero */
1757 if (Sgl_iszero_mantissa(opnd3)) {
1758 /* right is zero */
1759 /* Left can't be zero and must be result.
1760 *
1761 * The final result is now in tmpres and mpy_exponent,
1762 * and needs to be rounded and squeezed back into
1763 * double precision format from double extended.
1764 */
1765 result_exponent = mpy_exponent;
1766 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768 goto round;
1769 }
1770
1771 /*
1772 * Neither are zeroes.
1773 * Adjust exponent and normalize add operand.
1774 */
1775 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1776 Sgl_clear_signexponent(opnd3);
1777 Sgl_leftshiftby1(opnd3);
1778 Sgl_normalize(opnd3,add_exponent);
1779 Sgl_set_sign(opnd3,sign_save); /* restore sign */
1780 } else {
1781 Sgl_clear_exponent_set_hidden(opnd3);
1782 }
1783 /*
1784 * Copy opnd3 to the double extended variable called right.
1785 */
1786 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788 /*
1789 * A zero "save" helps discover equal operands (for later),
1790 * and is used in swapping operands (if needed).
1791 */
1792 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794 /*
1795 * Compare magnitude of operands.
1796 */
1797 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801 /*
1802 * Set the left operand to the larger one by XOR swap.
1803 * First finish the first word "save".
1804 */
1805 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807 Sglext_swap_lower(tmpresp2,rightp2);
1808 /* also setup exponents used in rest of routine */
1809 diff_exponent = add_exponent - mpy_exponent;
1810 result_exponent = add_exponent;
1811 } else {
1812 /* also setup exponents used in rest of routine */
1813 diff_exponent = mpy_exponent - add_exponent;
1814 result_exponent = mpy_exponent;
1815 }
1816 /* Invariant: left is not smaller than right. */
1817
1818 /*
1819 * Special case alignment of operands that would force alignment
1820 * beyond the extent of the extension. A further optimization
1821 * could special case this but only reduces the path length for
1822 * this infrequent case.
1823 */
1824 if (diff_exponent > SGLEXT_THRESHOLD) {
1825 diff_exponent = SGLEXT_THRESHOLD;
1826 }
1827
1828 /* Align right operand by shifting it to the right */
1829 Sglext_clear_sign(rightp1);
1830 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831
1832 /* Treat sum and difference of the operands separately. */
1833 if ((int)save < 0) {
1834 /*
1835 * Difference of the two operands. Overflow can occur if the
1836 * multiply overflowed. A borrow can occur out of the hidden
1837 * bit and force a post normalization phase.
1838 */
1839 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840 resultp1,resultp2);
1841 sign_save = Sgl_signextendedsign(resultp1);
1842 if (Sgl_iszero_hidden(resultp1)) {
1843 /* Handle normalization */
1844 /* A straight foward algorithm would now shift the
1845 * result and extension left until the hidden bit
1846 * becomes one. Not all of the extension bits need
1847 * participate in the shift. Only the two most
1848 * significant bits (round and guard) are needed.
1849 * If only a single shift is needed then the guard
1850 * bit becomes a significant low order bit and the
1851 * extension must participate in the rounding.
1852 * If more than a single shift is needed, then all
1853 * bits to the right of the guard bit are zeros,
1854 * and the guard bit may or may not be zero. */
1855 Sglext_leftshiftby1(resultp1,resultp2);
1856
1857 /* Need to check for a zero result. The sign and
1858 * exponent fields have already been zeroed. The more
1859 * efficient test of the full object can be used.
1860 */
1861 if (Sglext_iszero(resultp1,resultp2)) {
1862 /* Must have been "x-x" or "x+(-x)". */
1863 if (Is_rounding_mode(ROUNDMINUS))
1864 Sgl_setone_sign(resultp1);
1865 Sgl_copytoptr(resultp1,dstptr);
1866 return(NOEXCEPTION);
1867 }
1868 result_exponent--;
1869
1870 /* Look to see if normalization is finished. */
1871 if (Sgl_isone_hidden(resultp1)) {
1872 /* No further normalization is needed */
1873 goto round;
1874 }
1875
1876 /* Discover first one bit to determine shift amount.
1877 * Use a modified binary search. We have already
1878 * shifted the result one position right and still
1879 * not found a one so the remainder of the extension
1880 * must be zero and simplifies rounding. */
1881 /* Scan bytes */
1882 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883 Sglext_leftshiftby8(resultp1,resultp2);
1884 result_exponent -= 8;
1885 }
1886 /* Now narrow it down to the nibble */
1887 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888 /* The lower nibble contains the
1889 * normalizing one */
1890 Sglext_leftshiftby4(resultp1,resultp2);
1891 result_exponent -= 4;
1892 }
1893 /* Select case where first bit is set (already
1894 * normalized) otherwise select the proper shift. */
1895 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896 if (jumpsize <= 7) switch(jumpsize) {
1897 case 1:
1898 Sglext_leftshiftby3(resultp1,resultp2);
1899 result_exponent -= 3;
1900 break;
1901 case 2:
1902 case 3:
1903 Sglext_leftshiftby2(resultp1,resultp2);
1904 result_exponent -= 2;
1905 break;
1906 case 4:
1907 case 5:
1908 case 6:
1909 case 7:
1910 Sglext_leftshiftby1(resultp1,resultp2);
1911 result_exponent -= 1;
1912 break;
1913 }
1914 } /* end if (hidden...)... */
1915 /* Fall through and round */
1916 } /* end if (save < 0)... */
1917 else {
1918 /* Add magnitudes */
1919 Sglext_addition(tmpresp1,tmpresp2,
1920 rightp1,rightp2, /*to*/resultp1,resultp2);
1921 sign_save = Sgl_signextendedsign(resultp1);
1922 if (Sgl_isone_hiddenoverflow(resultp1)) {
1923 /* Prenormalization required. */
1924 Sglext_arithrightshiftby1(resultp1,resultp2);
1925 result_exponent++;
1926 } /* end if hiddenoverflow... */
1927 } /* end else ...add magnitudes... */
1928
1929 /* Round the result. If the extension and lower two words are
1930 * all zeros, then the result is exact. Otherwise round in the
1931 * correct direction. Underflow is possible. If a postnormalization
1932 * is necessary, then the mantissa is all zeros so no shift is needed.
1933 */
1934 round:
1935 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937 }
1938 Sgl_set_sign(resultp1,/*using*/sign_save);
1939 if (Sglext_isnotzero_mantissap2(resultp2)) {
1940 inexact = TRUE;
1941 switch(Rounding_mode()) {
1942 case ROUNDNEAREST: /* The default. */
1943 if (Sglext_isone_highp2(resultp2)) {
1944 /* at least 1/2 ulp */
1945 if (Sglext_isnotzero_low31p2(resultp2) ||
1946 Sglext_isone_lowp1(resultp1)) {
1947 /* either exactly half way and odd or
1948 * more than 1/2ulp */
1949 Sgl_increment(resultp1);
1950 }
1951 }
1952 break;
1953
1954 case ROUNDPLUS:
1955 if (Sgl_iszero_sign(resultp1)) {
1956 /* Round up positive results */
1957 Sgl_increment(resultp1);
1958 }
1959 break;
1960
1961 case ROUNDMINUS:
1962 if (Sgl_isone_sign(resultp1)) {
1963 /* Round down negative results */
1964 Sgl_increment(resultp1);
1965 }
1966
1967 case ROUNDZERO:;
1968 /* truncate is simple */
1969 } /* end switch... */
1970 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971 }
1972 if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973 /* Overflow */
1974 if (Is_overflowtrap_enabled()) {
1975 /*
1976 * Adjust bias of result
1977 */
1978 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979 Sgl_copytoptr(resultp1,dstptr);
1980 if (inexact)
1981 if (Is_inexacttrap_enabled())
1982 return (OPC_2E_OVERFLOWEXCEPTION |
1983 OPC_2E_INEXACTEXCEPTION);
1984 else Set_inexactflag();
1985 return (OPC_2E_OVERFLOWEXCEPTION);
1986 }
1987 inexact = TRUE;
1988 Set_overflowflag();
1989 Sgl_setoverflow(resultp1);
1990 } else if (result_exponent <= 0) { /* underflow case */
1991 if (Is_underflowtrap_enabled()) {
1992 /*
1993 * Adjust bias of result
1994 */
1995 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996 Sgl_copytoptr(resultp1,dstptr);
1997 if (inexact)
1998 if (Is_inexacttrap_enabled())
1999 return (OPC_2E_UNDERFLOWEXCEPTION |
2000 OPC_2E_INEXACTEXCEPTION);
2001 else Set_inexactflag();
2002 return(OPC_2E_UNDERFLOWEXCEPTION);
2003 }
2004 else if (inexact && is_tiny) Set_underflowflag();
2005 }
2006 else Sgl_set_exponent(resultp1,result_exponent);
2007 Sgl_copytoptr(resultp1,dstptr);
2008 if (inexact)
2009 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010 else Set_inexactflag();
2011 return(NOEXCEPTION);
2012}
2013
2014/*
2015 * Single Floating-point Multiply Negate Fused Add
2016 */
2017
2018sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021unsigned int *status;
2022{
2023 unsigned int opnd1, opnd2, opnd3;
2024 register unsigned int tmpresp1, tmpresp2;
2025 unsigned int rightp1, rightp2;
2026 unsigned int resultp1, resultp2 = 0;
2027 register int mpy_exponent, add_exponent, count;
2028 boolean inexact = FALSE, is_tiny = FALSE;
2029
2030 unsigned int signlessleft1, signlessright1, save;
2031 register int result_exponent, diff_exponent;
2032 int sign_save, jumpsize;
2033
2034 Sgl_copyfromptr(src1ptr,opnd1);
2035 Sgl_copyfromptr(src2ptr,opnd2);
2036 Sgl_copyfromptr(src3ptr,opnd3);
2037
2038 /*
2039 * set sign bit of result of multiply
2040 */
2041 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2042 Sgl_setzero(resultp1);
2043 else
2044 Sgl_setnegativezero(resultp1);
2045
2046 /*
2047 * Generate multiply exponent
2048 */
2049 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051 /*
2052 * check first operand for NaN's or infinity
2053 */
2054 if (Sgl_isinfinity_exponent(opnd1)) {
2055 if (Sgl_iszero_mantissa(opnd1)) {
2056 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057 if (Sgl_iszero_exponentmantissa(opnd2)) {
2058 /*
2059 * invalid since operands are infinity
2060 * and zero
2061 */
2062 if (Is_invalidtrap_enabled())
2063 return(OPC_2E_INVALIDEXCEPTION);
2064 Set_invalidflag();
2065 Sgl_makequietnan(resultp1);
2066 Sgl_copytoptr(resultp1,dstptr);
2067 return(NOEXCEPTION);
2068 }
2069 /*
2070 * Check third operand for infinity with a
2071 * sign opposite of the multiply result
2072 */
2073 if (Sgl_isinfinity(opnd3) &&
2074 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075 /*
2076 * invalid since attempting a magnitude
2077 * subtraction of infinities
2078 */
2079 if (Is_invalidtrap_enabled())
2080 return(OPC_2E_INVALIDEXCEPTION);
2081 Set_invalidflag();
2082 Sgl_makequietnan(resultp1);
2083 Sgl_copytoptr(resultp1,dstptr);
2084 return(NOEXCEPTION);
2085 }
2086
2087 /*
2088 * return infinity
2089 */
2090 Sgl_setinfinity_exponentmantissa(resultp1);
2091 Sgl_copytoptr(resultp1,dstptr);
2092 return(NOEXCEPTION);
2093 }
2094 }
2095 else {
2096 /*
2097 * is NaN; signaling or quiet?
2098 */
2099 if (Sgl_isone_signaling(opnd1)) {
2100 /* trap if INVALIDTRAP enabled */
2101 if (Is_invalidtrap_enabled())
2102 return(OPC_2E_INVALIDEXCEPTION);
2103 /* make NaN quiet */
2104 Set_invalidflag();
2105 Sgl_set_quiet(opnd1);
2106 }
2107 /*
2108 * is second operand a signaling NaN?
2109 */
2110 else if (Sgl_is_signalingnan(opnd2)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION);
2114 /* make NaN quiet */
2115 Set_invalidflag();
2116 Sgl_set_quiet(opnd2);
2117 Sgl_copytoptr(opnd2,dstptr);
2118 return(NOEXCEPTION);
2119 }
2120 /*
2121 * is third operand a signaling NaN?
2122 */
2123 else if (Sgl_is_signalingnan(opnd3)) {
2124 /* trap if INVALIDTRAP enabled */
2125 if (Is_invalidtrap_enabled())
2126 return(OPC_2E_INVALIDEXCEPTION);
2127 /* make NaN quiet */
2128 Set_invalidflag();
2129 Sgl_set_quiet(opnd3);
2130 Sgl_copytoptr(opnd3,dstptr);
2131 return(NOEXCEPTION);
2132 }
2133 /*
2134 * return quiet NaN
2135 */
2136 Sgl_copytoptr(opnd1,dstptr);
2137 return(NOEXCEPTION);
2138 }
2139 }
2140
2141 /*
2142 * check second operand for NaN's or infinity
2143 */
2144 if (Sgl_isinfinity_exponent(opnd2)) {
2145 if (Sgl_iszero_mantissa(opnd2)) {
2146 if (Sgl_isnotnan(opnd3)) {
2147 if (Sgl_iszero_exponentmantissa(opnd1)) {
2148 /*
2149 * invalid since multiply operands are
2150 * zero & infinity
2151 */
2152 if (Is_invalidtrap_enabled())
2153 return(OPC_2E_INVALIDEXCEPTION);
2154 Set_invalidflag();
2155 Sgl_makequietnan(opnd2);
2156 Sgl_copytoptr(opnd2,dstptr);
2157 return(NOEXCEPTION);
2158 }
2159
2160 /*
2161 * Check third operand for infinity with a
2162 * sign opposite of the multiply result
2163 */
2164 if (Sgl_isinfinity(opnd3) &&
2165 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166 /*
2167 * invalid since attempting a magnitude
2168 * subtraction of infinities
2169 */
2170 if (Is_invalidtrap_enabled())
2171 return(OPC_2E_INVALIDEXCEPTION);
2172 Set_invalidflag();
2173 Sgl_makequietnan(resultp1);
2174 Sgl_copytoptr(resultp1,dstptr);
2175 return(NOEXCEPTION);
2176 }
2177
2178 /*
2179 * return infinity
2180 */
2181 Sgl_setinfinity_exponentmantissa(resultp1);
2182 Sgl_copytoptr(resultp1,dstptr);
2183 return(NOEXCEPTION);
2184 }
2185 }
2186 else {
2187 /*
2188 * is NaN; signaling or quiet?
2189 */
2190 if (Sgl_isone_signaling(opnd2)) {
2191 /* trap if INVALIDTRAP enabled */
2192 if (Is_invalidtrap_enabled())
2193 return(OPC_2E_INVALIDEXCEPTION);
2194 /* make NaN quiet */
2195 Set_invalidflag();
2196 Sgl_set_quiet(opnd2);
2197 }
2198 /*
2199 * is third operand a signaling NaN?
2200 */
2201 else if (Sgl_is_signalingnan(opnd3)) {
2202 /* trap if INVALIDTRAP enabled */
2203 if (Is_invalidtrap_enabled())
2204 return(OPC_2E_INVALIDEXCEPTION);
2205 /* make NaN quiet */
2206 Set_invalidflag();
2207 Sgl_set_quiet(opnd3);
2208 Sgl_copytoptr(opnd3,dstptr);
2209 return(NOEXCEPTION);
2210 }
2211 /*
2212 * return quiet NaN
2213 */
2214 Sgl_copytoptr(opnd2,dstptr);
2215 return(NOEXCEPTION);
2216 }
2217 }
2218
2219 /*
2220 * check third operand for NaN's or infinity
2221 */
2222 if (Sgl_isinfinity_exponent(opnd3)) {
2223 if (Sgl_iszero_mantissa(opnd3)) {
2224 /* return infinity */
2225 Sgl_copytoptr(opnd3,dstptr);
2226 return(NOEXCEPTION);
2227 } else {
2228 /*
2229 * is NaN; signaling or quiet?
2230 */
2231 if (Sgl_isone_signaling(opnd3)) {
2232 /* trap if INVALIDTRAP enabled */
2233 if (Is_invalidtrap_enabled())
2234 return(OPC_2E_INVALIDEXCEPTION);
2235 /* make NaN quiet */
2236 Set_invalidflag();
2237 Sgl_set_quiet(opnd3);
2238 }
2239 /*
2240 * return quiet NaN
2241 */
2242 Sgl_copytoptr(opnd3,dstptr);
2243 return(NOEXCEPTION);
2244 }
2245 }
2246
2247 /*
2248 * Generate multiply mantissa
2249 */
2250 if (Sgl_isnotzero_exponent(opnd1)) {
2251 /* set hidden bit */
2252 Sgl_clear_signexponent_set_hidden(opnd1);
2253 }
2254 else {
2255 /* check for zero */
2256 if (Sgl_iszero_mantissa(opnd1)) {
2257 /*
2258 * Perform the add opnd3 with zero here.
2259 */
2260 if (Sgl_iszero_exponentmantissa(opnd3)) {
2261 if (Is_rounding_mode(ROUNDMINUS)) {
2262 Sgl_or_signs(opnd3,resultp1);
2263 } else {
2264 Sgl_and_signs(opnd3,resultp1);
2265 }
2266 }
2267 /*
2268 * Now let's check for trapped underflow case.
2269 */
2270 else if (Sgl_iszero_exponent(opnd3) &&
2271 Is_underflowtrap_enabled()) {
2272 /* need to normalize results mantissa */
2273 sign_save = Sgl_signextendedsign(opnd3);
2274 result_exponent = 0;
2275 Sgl_leftshiftby1(opnd3);
2276 Sgl_normalize(opnd3,result_exponent);
2277 Sgl_set_sign(opnd3,/*using*/sign_save);
2278 Sgl_setwrapped_exponent(opnd3,result_exponent,
2279 unfl);
2280 Sgl_copytoptr(opnd3,dstptr);
2281 /* inexact = FALSE */
2282 return(OPC_2E_UNDERFLOWEXCEPTION);
2283 }
2284 Sgl_copytoptr(opnd3,dstptr);
2285 return(NOEXCEPTION);
2286 }
2287 /* is denormalized, adjust exponent */
2288 Sgl_clear_signexponent(opnd1);
2289 Sgl_leftshiftby1(opnd1);
2290 Sgl_normalize(opnd1,mpy_exponent);
2291 }
2292 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293 if (Sgl_isnotzero_exponent(opnd2)) {
2294 Sgl_clear_signexponent_set_hidden(opnd2);
2295 }
2296 else {
2297 /* check for zero */
2298 if (Sgl_iszero_mantissa(opnd2)) {
2299 /*
2300 * Perform the add opnd3 with zero here.
2301 */
2302 if (Sgl_iszero_exponentmantissa(opnd3)) {
2303 if (Is_rounding_mode(ROUNDMINUS)) {
2304 Sgl_or_signs(opnd3,resultp1);
2305 } else {
2306 Sgl_and_signs(opnd3,resultp1);
2307 }
2308 }
2309 /*
2310 * Now let's check for trapped underflow case.
2311 */
2312 else if (Sgl_iszero_exponent(opnd3) &&
2313 Is_underflowtrap_enabled()) {
2314 /* need to normalize results mantissa */
2315 sign_save = Sgl_signextendedsign(opnd3);
2316 result_exponent = 0;
2317 Sgl_leftshiftby1(opnd3);
2318 Sgl_normalize(opnd3,result_exponent);
2319 Sgl_set_sign(opnd3,/*using*/sign_save);
2320 Sgl_setwrapped_exponent(opnd3,result_exponent,
2321 unfl);
2322 Sgl_copytoptr(opnd3,dstptr);
2323 /* inexact = FALSE */
2324 return(OPC_2E_UNDERFLOWEXCEPTION);
2325 }
2326 Sgl_copytoptr(opnd3,dstptr);
2327 return(NOEXCEPTION);
2328 }
2329 /* is denormalized; want to normalize */
2330 Sgl_clear_signexponent(opnd2);
2331 Sgl_leftshiftby1(opnd2);
2332 Sgl_normalize(opnd2,mpy_exponent);
2333 }
2334
2335 /* Multiply the first two source mantissas together */
2336
2337 /*
2338 * The intermediate result will be kept in tmpres,
2339 * which needs enough room for 106 bits of mantissa,
2340 * so lets call it a Double extended.
2341 */
2342 Sglext_setzero(tmpresp1,tmpresp2);
2343
2344 /*
2345 * Four bits at a time are inspected in each loop, and a
2346 * simple shift and add multiply algorithm is used.
2347 */
2348 for (count = SGL_P-1; count >= 0; count -= 4) {
2349 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350 if (Sbit28(opnd1)) {
2351 /* Twoword_add should be an ADD followed by 2 ADDC's */
2352 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353 }
2354 if (Sbit29(opnd1)) {
2355 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356 }
2357 if (Sbit30(opnd1)) {
2358 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359 }
2360 if (Sbit31(opnd1)) {
2361 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362 }
2363 Sgl_rightshiftby4(opnd1);
2364 }
2365 if (Is_sexthiddenoverflow(tmpresp1)) {
2366 /* result mantissa >= 2 (mantissa overflow) */
2367 mpy_exponent++;
2368 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369 } else {
2370 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371 }
2372
2373 /*
2374 * Restore the sign of the mpy result which was saved in resultp1.
2375 * The exponent will continue to be kept in mpy_exponent.
2376 */
2377 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379 /*
2380 * No rounding is required, since the result of the multiply
2381 * is exact in the extended format.
2382 */
2383
2384 /*
2385 * Now we are ready to perform the add portion of the operation.
2386 *
2387 * The exponents need to be kept as integers for now, since the
2388 * multiply result might not fit into the exponent field. We
2389 * can't overflow or underflow because of this yet, since the
2390 * add could bring the final result back into range.
2391 */
2392 add_exponent = Sgl_exponent(opnd3);
2393
2394 /*
2395 * Check for denormalized or zero add operand.
2396 */
2397 if (add_exponent == 0) {
2398 /* check for zero */
2399 if (Sgl_iszero_mantissa(opnd3)) {
2400 /* right is zero */
2401 /* Left can't be zero and must be result.
2402 *
2403 * The final result is now in tmpres and mpy_exponent,
2404 * and needs to be rounded and squeezed back into
2405 * double precision format from double extended.
2406 */
2407 result_exponent = mpy_exponent;
2408 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410 goto round;
2411 }
2412
2413 /*
2414 * Neither are zeroes.
2415 * Adjust exponent and normalize add operand.
2416 */
2417 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2418 Sgl_clear_signexponent(opnd3);
2419 Sgl_leftshiftby1(opnd3);
2420 Sgl_normalize(opnd3,add_exponent);
2421 Sgl_set_sign(opnd3,sign_save); /* restore sign */
2422 } else {
2423 Sgl_clear_exponent_set_hidden(opnd3);
2424 }
2425 /*
2426 * Copy opnd3 to the double extended variable called right.
2427 */
2428 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430 /*
2431 * A zero "save" helps discover equal operands (for later),
2432 * and is used in swapping operands (if needed).
2433 */
2434 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436 /*
2437 * Compare magnitude of operands.
2438 */
2439 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443 /*
2444 * Set the left operand to the larger one by XOR swap.
2445 * First finish the first word "save".
2446 */
2447 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449 Sglext_swap_lower(tmpresp2,rightp2);
2450 /* also setup exponents used in rest of routine */
2451 diff_exponent = add_exponent - mpy_exponent;
2452 result_exponent = add_exponent;
2453 } else {
2454 /* also setup exponents used in rest of routine */
2455 diff_exponent = mpy_exponent - add_exponent;
2456 result_exponent = mpy_exponent;
2457 }
2458 /* Invariant: left is not smaller than right. */
2459
2460 /*
2461 * Special case alignment of operands that would force alignment
2462 * beyond the extent of the extension. A further optimization
2463 * could special case this but only reduces the path length for
2464 * this infrequent case.
2465 */
2466 if (diff_exponent > SGLEXT_THRESHOLD) {
2467 diff_exponent = SGLEXT_THRESHOLD;
2468 }
2469
2470 /* Align right operand by shifting it to the right */
2471 Sglext_clear_sign(rightp1);
2472 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473
2474 /* Treat sum and difference of the operands separately. */
2475 if ((int)save < 0) {
2476 /*
2477 * Difference of the two operands. Overflow can occur if the
2478 * multiply overflowed. A borrow can occur out of the hidden
2479 * bit and force a post normalization phase.
2480 */
2481 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482 resultp1,resultp2);
2483 sign_save = Sgl_signextendedsign(resultp1);
2484 if (Sgl_iszero_hidden(resultp1)) {
2485 /* Handle normalization */
2486 /* A straight foward algorithm would now shift the
2487 * result and extension left until the hidden bit
2488 * becomes one. Not all of the extension bits need
2489 * participate in the shift. Only the two most
2490 * significant bits (round and guard) are needed.
2491 * If only a single shift is needed then the guard
2492 * bit becomes a significant low order bit and the
2493 * extension must participate in the rounding.
2494 * If more than a single shift is needed, then all
2495 * bits to the right of the guard bit are zeros,
2496 * and the guard bit may or may not be zero. */
2497 Sglext_leftshiftby1(resultp1,resultp2);
2498
2499 /* Need to check for a zero result. The sign and
2500 * exponent fields have already been zeroed. The more
2501 * efficient test of the full object can be used.
2502 */
2503 if (Sglext_iszero(resultp1,resultp2)) {
2504 /* Must have been "x-x" or "x+(-x)". */
2505 if (Is_rounding_mode(ROUNDMINUS))
2506 Sgl_setone_sign(resultp1);
2507 Sgl_copytoptr(resultp1,dstptr);
2508 return(NOEXCEPTION);
2509 }
2510 result_exponent--;
2511
2512 /* Look to see if normalization is finished. */
2513 if (Sgl_isone_hidden(resultp1)) {
2514 /* No further normalization is needed */
2515 goto round;
2516 }
2517
2518 /* Discover first one bit to determine shift amount.
2519 * Use a modified binary search. We have already
2520 * shifted the result one position right and still
2521 * not found a one so the remainder of the extension
2522 * must be zero and simplifies rounding. */
2523 /* Scan bytes */
2524 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525 Sglext_leftshiftby8(resultp1,resultp2);
2526 result_exponent -= 8;
2527 }
2528 /* Now narrow it down to the nibble */
2529 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530 /* The lower nibble contains the
2531 * normalizing one */
2532 Sglext_leftshiftby4(resultp1,resultp2);
2533 result_exponent -= 4;
2534 }
2535 /* Select case where first bit is set (already
2536 * normalized) otherwise select the proper shift. */
2537 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538 if (jumpsize <= 7) switch(jumpsize) {
2539 case 1:
2540 Sglext_leftshiftby3(resultp1,resultp2);
2541 result_exponent -= 3;
2542 break;
2543 case 2:
2544 case 3:
2545 Sglext_leftshiftby2(resultp1,resultp2);
2546 result_exponent -= 2;
2547 break;
2548 case 4:
2549 case 5:
2550 case 6:
2551 case 7:
2552 Sglext_leftshiftby1(resultp1,resultp2);
2553 result_exponent -= 1;
2554 break;
2555 }
2556 } /* end if (hidden...)... */
2557 /* Fall through and round */
2558 } /* end if (save < 0)... */
2559 else {
2560 /* Add magnitudes */
2561 Sglext_addition(tmpresp1,tmpresp2,
2562 rightp1,rightp2, /*to*/resultp1,resultp2);
2563 sign_save = Sgl_signextendedsign(resultp1);
2564 if (Sgl_isone_hiddenoverflow(resultp1)) {
2565 /* Prenormalization required. */
2566 Sglext_arithrightshiftby1(resultp1,resultp2);
2567 result_exponent++;
2568 } /* end if hiddenoverflow... */
2569 } /* end else ...add magnitudes... */
2570
2571 /* Round the result. If the extension and lower two words are
2572 * all zeros, then the result is exact. Otherwise round in the
2573 * correct direction. Underflow is possible. If a postnormalization
2574 * is necessary, then the mantissa is all zeros so no shift is needed.
2575 */
2576 round:
2577 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579 }
2580 Sgl_set_sign(resultp1,/*using*/sign_save);
2581 if (Sglext_isnotzero_mantissap2(resultp2)) {
2582 inexact = TRUE;
2583 switch(Rounding_mode()) {
2584 case ROUNDNEAREST: /* The default. */
2585 if (Sglext_isone_highp2(resultp2)) {
2586 /* at least 1/2 ulp */
2587 if (Sglext_isnotzero_low31p2(resultp2) ||
2588 Sglext_isone_lowp1(resultp1)) {
2589 /* either exactly half way and odd or
2590 * more than 1/2ulp */
2591 Sgl_increment(resultp1);
2592 }
2593 }
2594 break;
2595
2596 case ROUNDPLUS:
2597 if (Sgl_iszero_sign(resultp1)) {
2598 /* Round up positive results */
2599 Sgl_increment(resultp1);
2600 }
2601 break;
2602
2603 case ROUNDMINUS:
2604 if (Sgl_isone_sign(resultp1)) {
2605 /* Round down negative results */
2606 Sgl_increment(resultp1);
2607 }
2608
2609 case ROUNDZERO:;
2610 /* truncate is simple */
2611 } /* end switch... */
2612 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613 }
2614 if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615 /* Overflow */
2616 if (Is_overflowtrap_enabled()) {
2617 /*
2618 * Adjust bias of result
2619 */
2620 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621 Sgl_copytoptr(resultp1,dstptr);
2622 if (inexact)
2623 if (Is_inexacttrap_enabled())
2624 return (OPC_2E_OVERFLOWEXCEPTION |
2625 OPC_2E_INEXACTEXCEPTION);
2626 else Set_inexactflag();
2627 return (OPC_2E_OVERFLOWEXCEPTION);
2628 }
2629 inexact = TRUE;
2630 Set_overflowflag();
2631 Sgl_setoverflow(resultp1);
2632 } else if (result_exponent <= 0) { /* underflow case */
2633 if (Is_underflowtrap_enabled()) {
2634 /*
2635 * Adjust bias of result
2636 */
2637 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638 Sgl_copytoptr(resultp1,dstptr);
2639 if (inexact)
2640 if (Is_inexacttrap_enabled())
2641 return (OPC_2E_UNDERFLOWEXCEPTION |
2642 OPC_2E_INEXACTEXCEPTION);
2643 else Set_inexactflag();
2644 return(OPC_2E_UNDERFLOWEXCEPTION);
2645 }
2646 else if (inexact && is_tiny) Set_underflowflag();
2647 }
2648 else Sgl_set_exponent(resultp1,result_exponent);
2649 Sgl_copytoptr(resultp1,dstptr);
2650 if (inexact)
2651 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652 else Set_inexactflag();
2653 return(NOEXCEPTION);
2654}
2655