Blame - arch/parisc/math-emu/fmpyfadd.c - kernel/msm-4.19

blob: b067c45c872d4fac64142b1784162ca3154bc78b [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Linux/PA-RISC Project (http://www.parisc-linux.org/)
				3	*
				4	* Floating-point emulation code
				5	* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
				6	*
				7	* This program is free software; you can redistribute it and/or modify
				8	* it under the terms of the GNU General Public License as published by
				9	* the Free Software Foundation; either version 2, or (at your option)
				10	* any later version.
				11	*
				12	* This program is distributed in the hope that it will be useful,
				13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				15	* GNU General Public License for more details.
				16	*
				17	* You should have received a copy of the GNU General Public License
				18	* along with this program; if not, write to the Free Software
				19	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				20	*/
				21	/*
				22	* BEGIN_DESC
				23	*
				24	* File:
				25	* @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
				26	*
				27	* Purpose:
				28	* Double Floating-point Multiply Fused Add
				29	* Double Floating-point Multiply Negate Fused Add
				30	* Single Floating-point Multiply Fused Add
				31	* Single Floating-point Multiply Negate Fused Add
				32	*
				33	* External Interfaces:
				34	* dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				35	* dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				36	* sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				37	* sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				38	*
				39	* Internal Interfaces:
				40	*
				41	* Theory:
				42	* <<please update with a overview of the operation of this file>>
				43	*
				44	* END_DESC
				45	*/
				46
				47
				48	#include "float.h"
				49	#include "sgl_float.h"
				50	#include "dbl_float.h"
				51
				52
				53	/*
				54	* Double Floating-point Multiply Fused Add
				55	*/
				56
				57	int
				58	dbl_fmpyfadd(
				59	dbl_floating_point *src1ptr,
				60	dbl_floating_point *src2ptr,
				61	dbl_floating_point *src3ptr,
				62	unsigned int *status,
				63	dbl_floating_point *dstptr)
				64	{
				65	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
				66	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
				67	unsigned int rightp1, rightp2, rightp3, rightp4;
				68	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
				69	register int mpy_exponent, add_exponent, count;
				70	boolean inexact = FALSE, is_tiny = FALSE;
				71
				72	unsigned int signlessleft1, signlessright1, save;
				73	register int result_exponent, diff_exponent;
				74	int sign_save, jumpsize;
				75
				76	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
				77	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
				78	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
				79
				80	/*
				81	* set sign bit of result of multiply
				82	*/
				83	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
				84	Dbl_setnegativezerop1(resultp1);
				85	else Dbl_setzerop1(resultp1);
				86
				87	/*
				88	* Generate multiply exponent
				89	*/
				90	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
				91
				92	/*
				93	* check first operand for NaN's or infinity
				94	*/
				95	if (Dbl_isinfinity_exponent(opnd1p1)) {
				96	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				97	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
				98	Dbl_isnotnan(opnd3p1,opnd3p2)) {
				99	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
				100	/*
				101	* invalid since operands are infinity
				102	* and zero
				103	*/
				104	if (Is_invalidtrap_enabled())
				105	return(OPC_2E_INVALIDEXCEPTION);
				106	Set_invalidflag();
				107	Dbl_makequietnan(resultp1,resultp2);
				108	Dbl_copytoptr(resultp1,resultp2,dstptr);
				109	return(NOEXCEPTION);
				110	}
				111	/*
				112	* Check third operand for infinity with a
				113	* sign opposite of the multiply result
				114	*/
				115	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				116	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				117	/*
				118	* invalid since attempting a magnitude
				119	* subtraction of infinities
				120	*/
				121	if (Is_invalidtrap_enabled())
				122	return(OPC_2E_INVALIDEXCEPTION);
				123	Set_invalidflag();
				124	Dbl_makequietnan(resultp1,resultp2);
				125	Dbl_copytoptr(resultp1,resultp2,dstptr);
				126	return(NOEXCEPTION);
				127	}
				128
				129	/*
				130	* return infinity
				131	*/
				132	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				133	Dbl_copytoptr(resultp1,resultp2,dstptr);
				134	return(NOEXCEPTION);
				135	}
				136	}
				137	else {
				138	/*
				139	* is NaN; signaling or quiet?
				140	*/
				141	if (Dbl_isone_signaling(opnd1p1)) {
				142	/* trap if INVALIDTRAP enabled */
				143	if (Is_invalidtrap_enabled())
				144	return(OPC_2E_INVALIDEXCEPTION);
				145	/* make NaN quiet */
				146	Set_invalidflag();
				147	Dbl_set_quiet(opnd1p1);
				148	}
				149	/*
				150	* is second operand a signaling NaN?
				151	*/
				152	else if (Dbl_is_signalingnan(opnd2p1)) {
				153	/* trap if INVALIDTRAP enabled */
				154	if (Is_invalidtrap_enabled())
				155	return(OPC_2E_INVALIDEXCEPTION);
				156	/* make NaN quiet */
				157	Set_invalidflag();
				158	Dbl_set_quiet(opnd2p1);
				159	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				160	return(NOEXCEPTION);
				161	}
				162	/*
				163	* is third operand a signaling NaN?
				164	*/
				165	else if (Dbl_is_signalingnan(opnd3p1)) {
				166	/* trap if INVALIDTRAP enabled */
				167	if (Is_invalidtrap_enabled())
				168	return(OPC_2E_INVALIDEXCEPTION);
				169	/* make NaN quiet */
				170	Set_invalidflag();
				171	Dbl_set_quiet(opnd3p1);
				172	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				173	return(NOEXCEPTION);
				174	}
				175	/*
				176	* return quiet NaN
				177	*/
				178	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
				179	return(NOEXCEPTION);
				180	}
				181	}
				182
				183	/*
				184	* check second operand for NaN's or infinity
				185	*/
				186	if (Dbl_isinfinity_exponent(opnd2p1)) {
				187	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				188	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
				189	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
				190	/*
				191	* invalid since multiply operands are
				192	* zero & infinity
				193	*/
				194	if (Is_invalidtrap_enabled())
				195	return(OPC_2E_INVALIDEXCEPTION);
				196	Set_invalidflag();
				197	Dbl_makequietnan(opnd2p1,opnd2p2);
				198	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				199	return(NOEXCEPTION);
				200	}
				201
				202	/*
				203	* Check third operand for infinity with a
				204	* sign opposite of the multiply result
				205	*/
				206	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				207	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				208	/*
				209	* invalid since attempting a magnitude
				210	* subtraction of infinities
				211	*/
				212	if (Is_invalidtrap_enabled())
				213	return(OPC_2E_INVALIDEXCEPTION);
				214	Set_invalidflag();
				215	Dbl_makequietnan(resultp1,resultp2);
				216	Dbl_copytoptr(resultp1,resultp2,dstptr);
				217	return(NOEXCEPTION);
				218	}
				219
				220	/*
				221	* return infinity
				222	*/
				223	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				224	Dbl_copytoptr(resultp1,resultp2,dstptr);
				225	return(NOEXCEPTION);
				226	}
				227	}
				228	else {
				229	/*
				230	* is NaN; signaling or quiet?
				231	*/
				232	if (Dbl_isone_signaling(opnd2p1)) {
				233	/* trap if INVALIDTRAP enabled */
				234	if (Is_invalidtrap_enabled())
				235	return(OPC_2E_INVALIDEXCEPTION);
				236	/* make NaN quiet */
				237	Set_invalidflag();
				238	Dbl_set_quiet(opnd2p1);
				239	}
				240	/*
				241	* is third operand a signaling NaN?
				242	*/
				243	else if (Dbl_is_signalingnan(opnd3p1)) {
				244	/* trap if INVALIDTRAP enabled */
				245	if (Is_invalidtrap_enabled())
				246	return(OPC_2E_INVALIDEXCEPTION);
				247	/* make NaN quiet */
				248	Set_invalidflag();
				249	Dbl_set_quiet(opnd3p1);
				250	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				251	return(NOEXCEPTION);
				252	}
				253	/*
				254	* return quiet NaN
				255	*/
				256	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				257	return(NOEXCEPTION);
				258	}
				259	}
				260
				261	/*
				262	* check third operand for NaN's or infinity
				263	*/
				264	if (Dbl_isinfinity_exponent(opnd3p1)) {
				265	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				266	/* return infinity */
				267	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				268	return(NOEXCEPTION);
				269	} else {
				270	/*
				271	* is NaN; signaling or quiet?
				272	*/
				273	if (Dbl_isone_signaling(opnd3p1)) {
				274	/* trap if INVALIDTRAP enabled */
				275	if (Is_invalidtrap_enabled())
				276	return(OPC_2E_INVALIDEXCEPTION);
				277	/* make NaN quiet */
				278	Set_invalidflag();
				279	Dbl_set_quiet(opnd3p1);
				280	}
				281	/*
				282	* return quiet NaN
				283	*/
				284	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				285	return(NOEXCEPTION);
				286	}
				287	}
				288
				289	/*
				290	* Generate multiply mantissa
				291	*/
				292	if (Dbl_isnotzero_exponent(opnd1p1)) {
				293	/* set hidden bit */
				294	Dbl_clear_signexponent_set_hidden(opnd1p1);
				295	}
				296	else {
				297	/* check for zero */
				298	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				299	/*
				300	* Perform the add opnd3 with zero here.
				301	*/
				302	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				303	if (Is_rounding_mode(ROUNDMINUS)) {
				304	Dbl_or_signs(opnd3p1,resultp1);
				305	} else {
				306	Dbl_and_signs(opnd3p1,resultp1);
				307	}
				308	}
				309	/*
				310	* Now let's check for trapped underflow case.
				311	*/
				312	else if (Dbl_iszero_exponent(opnd3p1) &&
				313	Is_underflowtrap_enabled()) {
				314	/* need to normalize results mantissa */
				315	sign_save = Dbl_signextendedsign(opnd3p1);
				316	result_exponent = 0;
				317	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				318	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				319	Dbl_set_sign(opnd3p1,/using/sign_save);
				320	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				321	unfl);
				322	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				323	/* inexact = FALSE */
				324	return(OPC_2E_UNDERFLOWEXCEPTION);
				325	}
				326	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				327	return(NOEXCEPTION);
				328	}
				329	/* is denormalized, adjust exponent */
				330	Dbl_clear_signexponent(opnd1p1);
				331	Dbl_leftshiftby1(opnd1p1,opnd1p2);
				332	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
				333	}
				334	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				335	if (Dbl_isnotzero_exponent(opnd2p1)) {
				336	Dbl_clear_signexponent_set_hidden(opnd2p1);
				337	}
				338	else {
				339	/* check for zero */
				340	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				341	/*
				342	* Perform the add opnd3 with zero here.
				343	*/
				344	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				345	if (Is_rounding_mode(ROUNDMINUS)) {
				346	Dbl_or_signs(opnd3p1,resultp1);
				347	} else {
				348	Dbl_and_signs(opnd3p1,resultp1);
				349	}
				350	}
				351	/*
				352	* Now let's check for trapped underflow case.
				353	*/
				354	else if (Dbl_iszero_exponent(opnd3p1) &&
				355	Is_underflowtrap_enabled()) {
				356	/* need to normalize results mantissa */
				357	sign_save = Dbl_signextendedsign(opnd3p1);
				358	result_exponent = 0;
				359	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				360	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				361	Dbl_set_sign(opnd3p1,/using/sign_save);
				362	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				363	unfl);
				364	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				365	/* inexact = FALSE */
				366	return(OPC_2E_UNDERFLOWEXCEPTION);
				367	}
				368	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				369	return(NOEXCEPTION);
				370	}
				371	/* is denormalized; want to normalize */
				372	Dbl_clear_signexponent(opnd2p1);
				373	Dbl_leftshiftby1(opnd2p1,opnd2p2);
				374	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
				375	}
				376
				377	/* Multiply the first two source mantissas together */
				378
				379	/*
				380	* The intermediate result will be kept in tmpres,
				381	* which needs enough room for 106 bits of mantissa,
				382	* so lets call it a Double extended.
				383	*/
				384	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				385
				386	/*
				387	* Four bits at a time are inspected in each loop, and a
				388	* simple shift and add multiply algorithm is used.
				389	*/
				390	for (count = DBL_P-1; count >= 0; count -= 4) {
				391	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				392	if (Dbit28p2(opnd1p2)) {
				393	/* Fourword_add should be an ADD followed by 3 ADDC's */
				394	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				395	opnd2p1<<3 \| opnd2p2>>29, opnd2p2<<3, 0, 0);
				396	}
				397	if (Dbit29p2(opnd1p2)) {
				398	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				399	opnd2p1<<2 \| opnd2p2>>30, opnd2p2<<2, 0, 0);
				400	}
				401	if (Dbit30p2(opnd1p2)) {
				402	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				403	opnd2p1<<1 \| opnd2p2>>31, opnd2p2<<1, 0, 0);
				404	}
				405	if (Dbit31p2(opnd1p2)) {
				406	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				407	opnd2p1, opnd2p2, 0, 0);
				408	}
				409	Dbl_rightshiftby4(opnd1p1,opnd1p2);
				410	}
				411	if (Is_dexthiddenoverflow(tmpresp1)) {
				412	/* result mantissa >= 2 (mantissa overflow) */
				413	mpy_exponent++;
				414	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				415	}
				416
				417	/*
				418	* Restore the sign of the mpy result which was saved in resultp1.
				419	* The exponent will continue to be kept in mpy_exponent.
				420	*/
				421	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
				422
				423	/*
				424	* No rounding is required, since the result of the multiply
				425	* is exact in the extended format.
				426	*/
				427
				428	/*
				429	* Now we are ready to perform the add portion of the operation.
				430	*
				431	* The exponents need to be kept as integers for now, since the
				432	* multiply result might not fit into the exponent field. We
				433	* can't overflow or underflow because of this yet, since the
				434	* add could bring the final result back into range.
				435	*/
				436	add_exponent = Dbl_exponent(opnd3p1);
				437
				438	/*
				439	* Check for denormalized or zero add operand.
				440	*/
				441	if (add_exponent == 0) {
				442	/* check for zero */
				443	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				444	/* right is zero */
				445	/* Left can't be zero and must be result.
				446	*
				447	* The final result is now in tmpres and mpy_exponent,
				448	* and needs to be rounded and squeezed back into
				449	* double precision format from double extended.
				450	*/
				451	result_exponent = mpy_exponent;
				452	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				453	resultp1,resultp2,resultp3,resultp4);
				454	sign_save = Dbl_signextendedsign(resultp1);/save sign/
				455	goto round;
				456	}
				457
				458	/*
				459	* Neither are zeroes.
				460	* Adjust exponent and normalize add operand.
				461	*/
				462	sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
				463	Dbl_clear_signexponent(opnd3p1);
				464	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				465	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
				466	Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
				467	} else {
				468	Dbl_clear_exponent_set_hidden(opnd3p1);
				469	}
				470	/*
				471	* Copy opnd3 to the double extended variable called right.
				472	*/
				473	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
				474
				475	/*
				476	* A zero "save" helps discover equal operands (for later),
				477	* and is used in swapping operands (if needed).
				478	*/
				479	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
				480
				481	/*
				482	* Compare magnitude of operands.
				483	*/
				484	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
				485	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
				486	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				487	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
				488	/*
				489	* Set the left operand to the larger one by XOR swap.
				490	* First finish the first word "save".
				491	*/
				492	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
				493	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				494	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
				495	rightp2,rightp3,rightp4);
				496	/* also setup exponents used in rest of routine */
				497	diff_exponent = add_exponent - mpy_exponent;
				498	result_exponent = add_exponent;
				499	} else {
				500	/* also setup exponents used in rest of routine */
				501	diff_exponent = mpy_exponent - add_exponent;
				502	result_exponent = mpy_exponent;
				503	}
				504	/* Invariant: left is not smaller than right. */
				505
				506	/*
				507	* Special case alignment of operands that would force alignment
				508	* beyond the extent of the extension. A further optimization
				509	* could special case this but only reduces the path length for
				510	* this infrequent case.
				511	*/
				512	if (diff_exponent > DBLEXT_THRESHOLD) {
				513	diff_exponent = DBLEXT_THRESHOLD;
				514	}
				515
				516	/* Align right operand by shifting it to the right */
				517	Dblext_clear_sign(rightp1);
				518	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
				519	/shifted by/diff_exponent);
				520
				521	/* Treat sum and difference of the operands separately. */
				522	if ((int)save < 0) {
				523	/*
				524	* Difference of the two operands. Overflow can occur if the
				525	* multiply overflowed. A borrow can occur out of the hidden
				526	* bit and force a post normalization phase.
				527	*/
				528	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				529	rightp1,rightp2,rightp3,rightp4,
				530	resultp1,resultp2,resultp3,resultp4);
				531	sign_save = Dbl_signextendedsign(resultp1);
				532	if (Dbl_iszero_hidden(resultp1)) {
				533	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	534	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	535	* result and extension left until the hidden bit
				536	* becomes one. Not all of the extension bits need
				537	* participate in the shift. Only the two most
				538	* significant bits (round and guard) are needed.
				539	* If only a single shift is needed then the guard
				540	* bit becomes a significant low order bit and the
				541	* extension must participate in the rounding.
				542	* If more than a single shift is needed, then all
				543	* bits to the right of the guard bit are zeros,
				544	* and the guard bit may or may not be zero. */
				545	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				546	resultp4);
				547
				548	/* Need to check for a zero result. The sign and
				549	* exponent fields have already been zeroed. The more
				550	* efficient test of the full object can be used.
				551	*/
				552	if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
				553	/* Must have been "x-x" or "x+(-x)". */
				554	if (Is_rounding_mode(ROUNDMINUS))
				555	Dbl_setone_sign(resultp1);
				556	Dbl_copytoptr(resultp1,resultp2,dstptr);
				557	return(NOEXCEPTION);
				558	}
				559	result_exponent--;
				560
				561	/* Look to see if normalization is finished. */
				562	if (Dbl_isone_hidden(resultp1)) {
				563	/* No further normalization is needed */
				564	goto round;
				565	}
				566
				567	/* Discover first one bit to determine shift amount.
				568	* Use a modified binary search. We have already
				569	* shifted the result one position right and still
				570	* not found a one so the remainder of the extension
				571	* must be zero and simplifies rounding. */
				572	/* Scan bytes */
				573	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
				574	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
				575	result_exponent -= 8;
				576	}
				577	/* Now narrow it down to the nibble */
				578	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
				579	/* The lower nibble contains the
				580	* normalizing one */
				581	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
				582	result_exponent -= 4;
				583	}
				584	/* Select case where first bit is set (already
				585	* normalized) otherwise select the proper shift. */
				586	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
				587	if (jumpsize <= 7) switch(jumpsize) {
				588	case 1:
				589	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
				590	resultp4);
				591	result_exponent -= 3;
				592	break;
				593	case 2:
				594	case 3:
				595	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
				596	resultp4);
				597	result_exponent -= 2;
				598	break;
				599	case 4:
				600	case 5:
				601	case 6:
				602	case 7:
				603	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				604	resultp4);
				605	result_exponent -= 1;
				606	break;
				607	}
				608	} /* end if (hidden...)... */
				609	/* Fall through and round */
				610	} /* end if (save < 0)... */
				611	else {
				612	/* Add magnitudes */
				613	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				614	rightp1,rightp2,rightp3,rightp4,
				615	/to/resultp1,resultp2,resultp3,resultp4);
				616	sign_save = Dbl_signextendedsign(resultp1);
				617	if (Dbl_isone_hiddenoverflow(resultp1)) {
				618	/* Prenormalization required. */
				619	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
				620	resultp4);
				621	result_exponent++;
				622	} /* end if hiddenoverflow... */
				623	} /* end else ...add magnitudes... */
				624
				625	/* Round the result. If the extension and lower two words are
				626	* all zeros, then the result is exact. Otherwise round in the
				627	* correct direction. Underflow is possible. If a postnormalization
				628	* is necessary, then the mantissa is all zeros so no shift is needed.
				629	*/
				630	round:
				631	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				632	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
				633	result_exponent,is_tiny);
				634	}
				635	Dbl_set_sign(resultp1,/using/sign_save);
				636	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
				637	Dblext_isnotzero_mantissap4(resultp4)) {
				638	inexact = TRUE;
				639	switch(Rounding_mode()) {
				640	case ROUNDNEAREST: /* The default. */
				641	if (Dblext_isone_highp3(resultp3)) {
				642	/* at least 1/2 ulp */
				643	if (Dblext_isnotzero_low31p3(resultp3) \|\|
				644	Dblext_isnotzero_mantissap4(resultp4) \|\|
				645	Dblext_isone_lowp2(resultp2)) {
				646	/* either exactly half way and odd or
				647	* more than 1/2ulp */
				648	Dbl_increment(resultp1,resultp2);
				649	}
				650	}
				651	break;
				652
				653	case ROUNDPLUS:
				654	if (Dbl_iszero_sign(resultp1)) {
				655	/* Round up positive results */
				656	Dbl_increment(resultp1,resultp2);
				657	}
				658	break;
				659
				660	case ROUNDMINUS:
				661	if (Dbl_isone_sign(resultp1)) {
				662	/* Round down negative results */
				663	Dbl_increment(resultp1,resultp2);
				664	}
				665
				666	case ROUNDZERO:;
				667	/* truncate is simple */
				668	} /* end switch... */
				669	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
				670	}
				671	if (result_exponent >= DBL_INFINITY_EXPONENT) {
				672	/* trap if OVERFLOWTRAP enabled */
				673	if (Is_overflowtrap_enabled()) {
				674	/*
				675	* Adjust bias of result
				676	*/
				677	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				678	Dbl_copytoptr(resultp1,resultp2,dstptr);
				679	if (inexact)
				680	if (Is_inexacttrap_enabled())
				681	return (OPC_2E_OVERFLOWEXCEPTION \|
				682	OPC_2E_INEXACTEXCEPTION);
				683	else Set_inexactflag();
				684	return (OPC_2E_OVERFLOWEXCEPTION);
				685	}
				686	inexact = TRUE;
				687	Set_overflowflag();
				688	/* set result to infinity or largest number */
				689	Dbl_setoverflow(resultp1,resultp2);
				690
				691	} else if (result_exponent <= 0) { /* underflow case */
				692	if (Is_underflowtrap_enabled()) {
				693	/*
				694	* Adjust bias of result
				695	*/
				696	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
				697	Dbl_copytoptr(resultp1,resultp2,dstptr);
				698	if (inexact)
				699	if (Is_inexacttrap_enabled())
				700	return (OPC_2E_UNDERFLOWEXCEPTION \|
				701	OPC_2E_INEXACTEXCEPTION);
				702	else Set_inexactflag();
				703	return(OPC_2E_UNDERFLOWEXCEPTION);
				704	}
				705	else if (inexact && is_tiny) Set_underflowflag();
				706	}
				707	else Dbl_set_exponent(resultp1,result_exponent);
				708	Dbl_copytoptr(resultp1,resultp2,dstptr);
				709	if (inexact)
				710	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				711	else Set_inexactflag();
				712	return(NOEXCEPTION);
				713	}
				714
				715	/*
				716	* Double Floating-point Multiply Negate Fused Add
				717	*/
				718
				719	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				720
				721	dbl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				722	unsigned int *status;
				723	{
				724	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
				725	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
				726	unsigned int rightp1, rightp2, rightp3, rightp4;
				727	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
				728	register int mpy_exponent, add_exponent, count;
				729	boolean inexact = FALSE, is_tiny = FALSE;
				730
				731	unsigned int signlessleft1, signlessright1, save;
				732	register int result_exponent, diff_exponent;
				733	int sign_save, jumpsize;
				734
				735	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
				736	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
				737	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
				738
				739	/*
				740	* set sign bit of result of multiply
				741	*/
				742	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
				743	Dbl_setzerop1(resultp1);
				744	else
				745	Dbl_setnegativezerop1(resultp1);
				746
				747	/*
				748	* Generate multiply exponent
				749	*/
				750	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
				751
				752	/*
				753	* check first operand for NaN's or infinity
				754	*/
				755	if (Dbl_isinfinity_exponent(opnd1p1)) {
				756	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				757	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
				758	Dbl_isnotnan(opnd3p1,opnd3p2)) {
				759	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
				760	/*
				761	* invalid since operands are infinity
				762	* and zero
				763	*/
				764	if (Is_invalidtrap_enabled())
				765	return(OPC_2E_INVALIDEXCEPTION);
				766	Set_invalidflag();
				767	Dbl_makequietnan(resultp1,resultp2);
				768	Dbl_copytoptr(resultp1,resultp2,dstptr);
				769	return(NOEXCEPTION);
				770	}
				771	/*
				772	* Check third operand for infinity with a
				773	* sign opposite of the multiply result
				774	*/
				775	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				776	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				777	/*
				778	* invalid since attempting a magnitude
				779	* subtraction of infinities
				780	*/
				781	if (Is_invalidtrap_enabled())
				782	return(OPC_2E_INVALIDEXCEPTION);
				783	Set_invalidflag();
				784	Dbl_makequietnan(resultp1,resultp2);
				785	Dbl_copytoptr(resultp1,resultp2,dstptr);
				786	return(NOEXCEPTION);
				787	}
				788
				789	/*
				790	* return infinity
				791	*/
				792	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				793	Dbl_copytoptr(resultp1,resultp2,dstptr);
				794	return(NOEXCEPTION);
				795	}
				796	}
				797	else {
				798	/*
				799	* is NaN; signaling or quiet?
				800	*/
				801	if (Dbl_isone_signaling(opnd1p1)) {
				802	/* trap if INVALIDTRAP enabled */
				803	if (Is_invalidtrap_enabled())
				804	return(OPC_2E_INVALIDEXCEPTION);
				805	/* make NaN quiet */
				806	Set_invalidflag();
				807	Dbl_set_quiet(opnd1p1);
				808	}
				809	/*
				810	* is second operand a signaling NaN?
				811	*/
				812	else if (Dbl_is_signalingnan(opnd2p1)) {
				813	/* trap if INVALIDTRAP enabled */
				814	if (Is_invalidtrap_enabled())
				815	return(OPC_2E_INVALIDEXCEPTION);
				816	/* make NaN quiet */
				817	Set_invalidflag();
				818	Dbl_set_quiet(opnd2p1);
				819	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				820	return(NOEXCEPTION);
				821	}
				822	/*
				823	* is third operand a signaling NaN?
				824	*/
				825	else if (Dbl_is_signalingnan(opnd3p1)) {
				826	/* trap if INVALIDTRAP enabled */
				827	if (Is_invalidtrap_enabled())
				828	return(OPC_2E_INVALIDEXCEPTION);
				829	/* make NaN quiet */
				830	Set_invalidflag();
				831	Dbl_set_quiet(opnd3p1);
				832	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				833	return(NOEXCEPTION);
				834	}
				835	/*
				836	* return quiet NaN
				837	*/
				838	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
				839	return(NOEXCEPTION);
				840	}
				841	}
				842
				843	/*
				844	* check second operand for NaN's or infinity
				845	*/
				846	if (Dbl_isinfinity_exponent(opnd2p1)) {
				847	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				848	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
				849	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
				850	/*
				851	* invalid since multiply operands are
				852	* zero & infinity
				853	*/
				854	if (Is_invalidtrap_enabled())
				855	return(OPC_2E_INVALIDEXCEPTION);
				856	Set_invalidflag();
				857	Dbl_makequietnan(opnd2p1,opnd2p2);
				858	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				859	return(NOEXCEPTION);
				860	}
				861
				862	/*
				863	* Check third operand for infinity with a
				864	* sign opposite of the multiply result
				865	*/
				866	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				867	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				868	/*
				869	* invalid since attempting a magnitude
				870	* subtraction of infinities
				871	*/
				872	if (Is_invalidtrap_enabled())
				873	return(OPC_2E_INVALIDEXCEPTION);
				874	Set_invalidflag();
				875	Dbl_makequietnan(resultp1,resultp2);
				876	Dbl_copytoptr(resultp1,resultp2,dstptr);
				877	return(NOEXCEPTION);
				878	}
				879
				880	/*
				881	* return infinity
				882	*/
				883	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				884	Dbl_copytoptr(resultp1,resultp2,dstptr);
				885	return(NOEXCEPTION);
				886	}
				887	}
				888	else {
				889	/*
				890	* is NaN; signaling or quiet?
				891	*/
				892	if (Dbl_isone_signaling(opnd2p1)) {
				893	/* trap if INVALIDTRAP enabled */
				894	if (Is_invalidtrap_enabled())
				895	return(OPC_2E_INVALIDEXCEPTION);
				896	/* make NaN quiet */
				897	Set_invalidflag();
				898	Dbl_set_quiet(opnd2p1);
				899	}
				900	/*
				901	* is third operand a signaling NaN?
				902	*/
				903	else if (Dbl_is_signalingnan(opnd3p1)) {
				904	/* trap if INVALIDTRAP enabled */
				905	if (Is_invalidtrap_enabled())
				906	return(OPC_2E_INVALIDEXCEPTION);
				907	/* make NaN quiet */
				908	Set_invalidflag();
				909	Dbl_set_quiet(opnd3p1);
				910	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				911	return(NOEXCEPTION);
				912	}
				913	/*
				914	* return quiet NaN
				915	*/
				916	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				917	return(NOEXCEPTION);
				918	}
				919	}
				920
				921	/*
				922	* check third operand for NaN's or infinity
				923	*/
				924	if (Dbl_isinfinity_exponent(opnd3p1)) {
				925	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				926	/* return infinity */
				927	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				928	return(NOEXCEPTION);
				929	} else {
				930	/*
				931	* is NaN; signaling or quiet?
				932	*/
				933	if (Dbl_isone_signaling(opnd3p1)) {
				934	/* trap if INVALIDTRAP enabled */
				935	if (Is_invalidtrap_enabled())
				936	return(OPC_2E_INVALIDEXCEPTION);
				937	/* make NaN quiet */
				938	Set_invalidflag();
				939	Dbl_set_quiet(opnd3p1);
				940	}
				941	/*
				942	* return quiet NaN
				943	*/
				944	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				945	return(NOEXCEPTION);
				946	}
				947	}
				948
				949	/*
				950	* Generate multiply mantissa
				951	*/
				952	if (Dbl_isnotzero_exponent(opnd1p1)) {
				953	/* set hidden bit */
				954	Dbl_clear_signexponent_set_hidden(opnd1p1);
				955	}
				956	else {
				957	/* check for zero */
				958	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				959	/*
				960	* Perform the add opnd3 with zero here.
				961	*/
				962	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				963	if (Is_rounding_mode(ROUNDMINUS)) {
				964	Dbl_or_signs(opnd3p1,resultp1);
				965	} else {
				966	Dbl_and_signs(opnd3p1,resultp1);
				967	}
				968	}
				969	/*
				970	* Now let's check for trapped underflow case.
				971	*/
				972	else if (Dbl_iszero_exponent(opnd3p1) &&
				973	Is_underflowtrap_enabled()) {
				974	/* need to normalize results mantissa */
				975	sign_save = Dbl_signextendedsign(opnd3p1);
				976	result_exponent = 0;
				977	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				978	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				979	Dbl_set_sign(opnd3p1,/using/sign_save);
				980	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				981	unfl);
				982	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				983	/* inexact = FALSE */
				984	return(OPC_2E_UNDERFLOWEXCEPTION);
				985	}
				986	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				987	return(NOEXCEPTION);
				988	}
				989	/* is denormalized, adjust exponent */
				990	Dbl_clear_signexponent(opnd1p1);
				991	Dbl_leftshiftby1(opnd1p1,opnd1p2);
				992	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
				993	}
				994	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				995	if (Dbl_isnotzero_exponent(opnd2p1)) {
				996	Dbl_clear_signexponent_set_hidden(opnd2p1);
				997	}
				998	else {
				999	/* check for zero */
				1000	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				1001	/*
				1002	* Perform the add opnd3 with zero here.
				1003	*/
				1004	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				1005	if (Is_rounding_mode(ROUNDMINUS)) {
				1006	Dbl_or_signs(opnd3p1,resultp1);
				1007	} else {
				1008	Dbl_and_signs(opnd3p1,resultp1);
				1009	}
				1010	}
				1011	/*
				1012	* Now let's check for trapped underflow case.
				1013	*/
				1014	else if (Dbl_iszero_exponent(opnd3p1) &&
				1015	Is_underflowtrap_enabled()) {
				1016	/* need to normalize results mantissa */
				1017	sign_save = Dbl_signextendedsign(opnd3p1);
				1018	result_exponent = 0;
				1019	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				1020	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				1021	Dbl_set_sign(opnd3p1,/using/sign_save);
				1022	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				1023	unfl);
				1024	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				1025	/* inexact = FALSE */
				1026	return(OPC_2E_UNDERFLOWEXCEPTION);
				1027	}
				1028	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				1029	return(NOEXCEPTION);
				1030	}
				1031	/* is denormalized; want to normalize */
				1032	Dbl_clear_signexponent(opnd2p1);
				1033	Dbl_leftshiftby1(opnd2p1,opnd2p2);
				1034	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
				1035	}
				1036
				1037	/* Multiply the first two source mantissas together */
				1038
				1039	/*
				1040	* The intermediate result will be kept in tmpres,
				1041	* which needs enough room for 106 bits of mantissa,
				1042	* so lets call it a Double extended.
				1043	*/
				1044	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1045
				1046	/*
				1047	* Four bits at a time are inspected in each loop, and a
				1048	* simple shift and add multiply algorithm is used.
				1049	*/
				1050	for (count = DBL_P-1; count >= 0; count -= 4) {
				1051	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1052	if (Dbit28p2(opnd1p2)) {
				1053	/* Fourword_add should be an ADD followed by 3 ADDC's */
				1054	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1055	opnd2p1<<3 \| opnd2p2>>29, opnd2p2<<3, 0, 0);
				1056	}
				1057	if (Dbit29p2(opnd1p2)) {
				1058	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1059	opnd2p1<<2 \| opnd2p2>>30, opnd2p2<<2, 0, 0);
				1060	}
				1061	if (Dbit30p2(opnd1p2)) {
				1062	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1063	opnd2p1<<1 \| opnd2p2>>31, opnd2p2<<1, 0, 0);
				1064	}
				1065	if (Dbit31p2(opnd1p2)) {
				1066	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1067	opnd2p1, opnd2p2, 0, 0);
				1068	}
				1069	Dbl_rightshiftby4(opnd1p1,opnd1p2);
				1070	}
				1071	if (Is_dexthiddenoverflow(tmpresp1)) {
				1072	/* result mantissa >= 2 (mantissa overflow) */
				1073	mpy_exponent++;
				1074	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1075	}
				1076
				1077	/*
				1078	* Restore the sign of the mpy result which was saved in resultp1.
				1079	* The exponent will continue to be kept in mpy_exponent.
				1080	*/
				1081	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
				1082
				1083	/*
				1084	* No rounding is required, since the result of the multiply
				1085	* is exact in the extended format.
				1086	*/
				1087
				1088	/*
				1089	* Now we are ready to perform the add portion of the operation.
				1090	*
				1091	* The exponents need to be kept as integers for now, since the
				1092	* multiply result might not fit into the exponent field. We
				1093	* can't overflow or underflow because of this yet, since the
				1094	* add could bring the final result back into range.
				1095	*/
				1096	add_exponent = Dbl_exponent(opnd3p1);
				1097
				1098	/*
				1099	* Check for denormalized or zero add operand.
				1100	*/
				1101	if (add_exponent == 0) {
				1102	/* check for zero */
				1103	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				1104	/* right is zero */
				1105	/* Left can't be zero and must be result.
				1106	*
				1107	* The final result is now in tmpres and mpy_exponent,
				1108	* and needs to be rounded and squeezed back into
				1109	* double precision format from double extended.
				1110	*/
				1111	result_exponent = mpy_exponent;
				1112	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1113	resultp1,resultp2,resultp3,resultp4);
				1114	sign_save = Dbl_signextendedsign(resultp1);/save sign/
				1115	goto round;
				1116	}
				1117
				1118	/*
				1119	* Neither are zeroes.
				1120	* Adjust exponent and normalize add operand.
				1121	*/
				1122	sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
				1123	Dbl_clear_signexponent(opnd3p1);
				1124	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				1125	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
				1126	Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
				1127	} else {
				1128	Dbl_clear_exponent_set_hidden(opnd3p1);
				1129	}
				1130	/*
				1131	* Copy opnd3 to the double extended variable called right.
				1132	*/
				1133	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
				1134
				1135	/*
				1136	* A zero "save" helps discover equal operands (for later),
				1137	* and is used in swapping operands (if needed).
				1138	*/
				1139	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
				1140
				1141	/*
				1142	* Compare magnitude of operands.
				1143	*/
				1144	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
				1145	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
				1146	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				1147	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
				1148	/*
				1149	* Set the left operand to the larger one by XOR swap.
				1150	* First finish the first word "save".
				1151	*/
				1152	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
				1153	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				1154	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
				1155	rightp2,rightp3,rightp4);
				1156	/* also setup exponents used in rest of routine */
				1157	diff_exponent = add_exponent - mpy_exponent;
				1158	result_exponent = add_exponent;
				1159	} else {
				1160	/* also setup exponents used in rest of routine */
				1161	diff_exponent = mpy_exponent - add_exponent;
				1162	result_exponent = mpy_exponent;
				1163	}
				1164	/* Invariant: left is not smaller than right. */
				1165
				1166	/*
				1167	* Special case alignment of operands that would force alignment
				1168	* beyond the extent of the extension. A further optimization
				1169	* could special case this but only reduces the path length for
				1170	* this infrequent case.
				1171	*/
				1172	if (diff_exponent > DBLEXT_THRESHOLD) {
				1173	diff_exponent = DBLEXT_THRESHOLD;
				1174	}
				1175
				1176	/* Align right operand by shifting it to the right */
				1177	Dblext_clear_sign(rightp1);
				1178	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
				1179	/shifted by/diff_exponent);
				1180
				1181	/* Treat sum and difference of the operands separately. */
				1182	if ((int)save < 0) {
				1183	/*
				1184	* Difference of the two operands. Overflow can occur if the
				1185	* multiply overflowed. A borrow can occur out of the hidden
				1186	* bit and force a post normalization phase.
				1187	*/
				1188	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1189	rightp1,rightp2,rightp3,rightp4,
				1190	resultp1,resultp2,resultp3,resultp4);
				1191	sign_save = Dbl_signextendedsign(resultp1);
				1192	if (Dbl_iszero_hidden(resultp1)) {
				1193	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1194	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1195	* result and extension left until the hidden bit
				1196	* becomes one. Not all of the extension bits need
				1197	* participate in the shift. Only the two most
				1198	* significant bits (round and guard) are needed.
				1199	* If only a single shift is needed then the guard
				1200	* bit becomes a significant low order bit and the
				1201	* extension must participate in the rounding.
				1202	* If more than a single shift is needed, then all
				1203	* bits to the right of the guard bit are zeros,
				1204	* and the guard bit may or may not be zero. */
				1205	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				1206	resultp4);
				1207
				1208	/* Need to check for a zero result. The sign and
				1209	* exponent fields have already been zeroed. The more
				1210	* efficient test of the full object can be used.
				1211	*/
				1212	if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
				1213	/* Must have been "x-x" or "x+(-x)". */
				1214	if (Is_rounding_mode(ROUNDMINUS))
				1215	Dbl_setone_sign(resultp1);
				1216	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1217	return(NOEXCEPTION);
				1218	}
				1219	result_exponent--;
				1220
				1221	/* Look to see if normalization is finished. */
				1222	if (Dbl_isone_hidden(resultp1)) {
				1223	/* No further normalization is needed */
				1224	goto round;
				1225	}
				1226
				1227	/* Discover first one bit to determine shift amount.
				1228	* Use a modified binary search. We have already
				1229	* shifted the result one position right and still
				1230	* not found a one so the remainder of the extension
				1231	* must be zero and simplifies rounding. */
				1232	/* Scan bytes */
				1233	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
				1234	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
				1235	result_exponent -= 8;
				1236	}
				1237	/* Now narrow it down to the nibble */
				1238	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
				1239	/* The lower nibble contains the
				1240	* normalizing one */
				1241	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
				1242	result_exponent -= 4;
				1243	}
				1244	/* Select case where first bit is set (already
				1245	* normalized) otherwise select the proper shift. */
				1246	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
				1247	if (jumpsize <= 7) switch(jumpsize) {
				1248	case 1:
				1249	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
				1250	resultp4);
				1251	result_exponent -= 3;
				1252	break;
				1253	case 2:
				1254	case 3:
				1255	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
				1256	resultp4);
				1257	result_exponent -= 2;
				1258	break;
				1259	case 4:
				1260	case 5:
				1261	case 6:
				1262	case 7:
				1263	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				1264	resultp4);
				1265	result_exponent -= 1;
				1266	break;
				1267	}
				1268	} /* end if (hidden...)... */
				1269	/* Fall through and round */
				1270	} /* end if (save < 0)... */
				1271	else {
				1272	/* Add magnitudes */
				1273	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1274	rightp1,rightp2,rightp3,rightp4,
				1275	/to/resultp1,resultp2,resultp3,resultp4);
				1276	sign_save = Dbl_signextendedsign(resultp1);
				1277	if (Dbl_isone_hiddenoverflow(resultp1)) {
				1278	/* Prenormalization required. */
				1279	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
				1280	resultp4);
				1281	result_exponent++;
				1282	} /* end if hiddenoverflow... */
				1283	} /* end else ...add magnitudes... */
				1284
				1285	/* Round the result. If the extension and lower two words are
				1286	* all zeros, then the result is exact. Otherwise round in the
				1287	* correct direction. Underflow is possible. If a postnormalization
				1288	* is necessary, then the mantissa is all zeros so no shift is needed.
				1289	*/
				1290	round:
				1291	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				1292	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
				1293	result_exponent,is_tiny);
				1294	}
				1295	Dbl_set_sign(resultp1,/using/sign_save);
				1296	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
				1297	Dblext_isnotzero_mantissap4(resultp4)) {
				1298	inexact = TRUE;
				1299	switch(Rounding_mode()) {
				1300	case ROUNDNEAREST: /* The default. */
				1301	if (Dblext_isone_highp3(resultp3)) {
				1302	/* at least 1/2 ulp */
				1303	if (Dblext_isnotzero_low31p3(resultp3) \|\|
				1304	Dblext_isnotzero_mantissap4(resultp4) \|\|
				1305	Dblext_isone_lowp2(resultp2)) {
				1306	/* either exactly half way and odd or
				1307	* more than 1/2ulp */
				1308	Dbl_increment(resultp1,resultp2);
				1309	}
				1310	}
				1311	break;
				1312
				1313	case ROUNDPLUS:
				1314	if (Dbl_iszero_sign(resultp1)) {
				1315	/* Round up positive results */
				1316	Dbl_increment(resultp1,resultp2);
				1317	}
				1318	break;
				1319
				1320	case ROUNDMINUS:
				1321	if (Dbl_isone_sign(resultp1)) {
				1322	/* Round down negative results */
				1323	Dbl_increment(resultp1,resultp2);
				1324	}
				1325
				1326	case ROUNDZERO:;
				1327	/* truncate is simple */
				1328	} /* end switch... */
				1329	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
				1330	}
				1331	if (result_exponent >= DBL_INFINITY_EXPONENT) {
				1332	/* Overflow */
				1333	if (Is_overflowtrap_enabled()) {
				1334	/*
				1335	* Adjust bias of result
				1336	*/
				1337	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				1338	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1339	if (inexact)
				1340	if (Is_inexacttrap_enabled())
				1341	return (OPC_2E_OVERFLOWEXCEPTION \|
				1342	OPC_2E_INEXACTEXCEPTION);
				1343	else Set_inexactflag();
				1344	return (OPC_2E_OVERFLOWEXCEPTION);
				1345	}
				1346	inexact = TRUE;
				1347	Set_overflowflag();
				1348	Dbl_setoverflow(resultp1,resultp2);
				1349	} else if (result_exponent <= 0) { /* underflow case */
				1350	if (Is_underflowtrap_enabled()) {
				1351	/*
				1352	* Adjust bias of result
				1353	*/
				1354	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
				1355	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1356	if (inexact)
				1357	if (Is_inexacttrap_enabled())
				1358	return (OPC_2E_UNDERFLOWEXCEPTION \|
				1359	OPC_2E_INEXACTEXCEPTION);
				1360	else Set_inexactflag();
				1361	return(OPC_2E_UNDERFLOWEXCEPTION);
				1362	}
				1363	else if (inexact && is_tiny) Set_underflowflag();
				1364	}
				1365	else Dbl_set_exponent(resultp1,result_exponent);
				1366	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1367	if (inexact)
				1368	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				1369	else Set_inexactflag();
				1370	return(NOEXCEPTION);
				1371	}
				1372
				1373	/*
				1374	* Single Floating-point Multiply Fused Add
				1375	*/
				1376
				1377	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				1378
				1379	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				1380	unsigned int *status;
				1381	{
				1382	unsigned int opnd1, opnd2, opnd3;
				1383	register unsigned int tmpresp1, tmpresp2;
				1384	unsigned int rightp1, rightp2;
				1385	unsigned int resultp1, resultp2 = 0;
				1386	register int mpy_exponent, add_exponent, count;
				1387	boolean inexact = FALSE, is_tiny = FALSE;
				1388
				1389	unsigned int signlessleft1, signlessright1, save;
				1390	register int result_exponent, diff_exponent;
				1391	int sign_save, jumpsize;
				1392
				1393	Sgl_copyfromptr(src1ptr,opnd1);
				1394	Sgl_copyfromptr(src2ptr,opnd2);
				1395	Sgl_copyfromptr(src3ptr,opnd3);
				1396
				1397	/*
				1398	* set sign bit of result of multiply
				1399	*/
				1400	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
				1401	Sgl_setnegativezero(resultp1);
				1402	else Sgl_setzero(resultp1);
				1403
				1404	/*
				1405	* Generate multiply exponent
				1406	*/
				1407	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
				1408
				1409	/*
				1410	* check first operand for NaN's or infinity
				1411	*/
				1412	if (Sgl_isinfinity_exponent(opnd1)) {
				1413	if (Sgl_iszero_mantissa(opnd1)) {
				1414	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
				1415	if (Sgl_iszero_exponentmantissa(opnd2)) {
				1416	/*
				1417	* invalid since operands are infinity
				1418	* and zero
				1419	*/
				1420	if (Is_invalidtrap_enabled())
				1421	return(OPC_2E_INVALIDEXCEPTION);
				1422	Set_invalidflag();
				1423	Sgl_makequietnan(resultp1);
				1424	Sgl_copytoptr(resultp1,dstptr);
				1425	return(NOEXCEPTION);
				1426	}
				1427	/*
				1428	* Check third operand for infinity with a
				1429	* sign opposite of the multiply result
				1430	*/
				1431	if (Sgl_isinfinity(opnd3) &&
				1432	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				1433	/*
				1434	* invalid since attempting a magnitude
				1435	* subtraction of infinities
				1436	*/
				1437	if (Is_invalidtrap_enabled())
				1438	return(OPC_2E_INVALIDEXCEPTION);
				1439	Set_invalidflag();
				1440	Sgl_makequietnan(resultp1);
				1441	Sgl_copytoptr(resultp1,dstptr);
				1442	return(NOEXCEPTION);
				1443	}
				1444
				1445	/*
				1446	* return infinity
				1447	*/
				1448	Sgl_setinfinity_exponentmantissa(resultp1);
				1449	Sgl_copytoptr(resultp1,dstptr);
				1450	return(NOEXCEPTION);
				1451	}
				1452	}
				1453	else {
				1454	/*
				1455	* is NaN; signaling or quiet?
				1456	*/
				1457	if (Sgl_isone_signaling(opnd1)) {
				1458	/* trap if INVALIDTRAP enabled */
				1459	if (Is_invalidtrap_enabled())
				1460	return(OPC_2E_INVALIDEXCEPTION);
				1461	/* make NaN quiet */
				1462	Set_invalidflag();
				1463	Sgl_set_quiet(opnd1);
				1464	}
				1465	/*
				1466	* is second operand a signaling NaN?
				1467	*/
				1468	else if (Sgl_is_signalingnan(opnd2)) {
				1469	/* trap if INVALIDTRAP enabled */
				1470	if (Is_invalidtrap_enabled())
				1471	return(OPC_2E_INVALIDEXCEPTION);
				1472	/* make NaN quiet */
				1473	Set_invalidflag();
				1474	Sgl_set_quiet(opnd2);
				1475	Sgl_copytoptr(opnd2,dstptr);
				1476	return(NOEXCEPTION);
				1477	}
				1478	/*
				1479	* is third operand a signaling NaN?
				1480	*/
				1481	else if (Sgl_is_signalingnan(opnd3)) {
				1482	/* trap if INVALIDTRAP enabled */
				1483	if (Is_invalidtrap_enabled())
				1484	return(OPC_2E_INVALIDEXCEPTION);
				1485	/* make NaN quiet */
				1486	Set_invalidflag();
				1487	Sgl_set_quiet(opnd3);
				1488	Sgl_copytoptr(opnd3,dstptr);
				1489	return(NOEXCEPTION);
				1490	}
				1491	/*
				1492	* return quiet NaN
				1493	*/
				1494	Sgl_copytoptr(opnd1,dstptr);
				1495	return(NOEXCEPTION);
				1496	}
				1497	}
				1498
				1499	/*
				1500	* check second operand for NaN's or infinity
				1501	*/
				1502	if (Sgl_isinfinity_exponent(opnd2)) {
				1503	if (Sgl_iszero_mantissa(opnd2)) {
				1504	if (Sgl_isnotnan(opnd3)) {
				1505	if (Sgl_iszero_exponentmantissa(opnd1)) {
				1506	/*
				1507	* invalid since multiply operands are
				1508	* zero & infinity
				1509	*/
				1510	if (Is_invalidtrap_enabled())
				1511	return(OPC_2E_INVALIDEXCEPTION);
				1512	Set_invalidflag();
				1513	Sgl_makequietnan(opnd2);
				1514	Sgl_copytoptr(opnd2,dstptr);
				1515	return(NOEXCEPTION);
				1516	}
				1517
				1518	/*
				1519	* Check third operand for infinity with a
				1520	* sign opposite of the multiply result
				1521	*/
				1522	if (Sgl_isinfinity(opnd3) &&
				1523	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				1524	/*
				1525	* invalid since attempting a magnitude
				1526	* subtraction of infinities
				1527	*/
				1528	if (Is_invalidtrap_enabled())
				1529	return(OPC_2E_INVALIDEXCEPTION);
				1530	Set_invalidflag();
				1531	Sgl_makequietnan(resultp1);
				1532	Sgl_copytoptr(resultp1,dstptr);
				1533	return(NOEXCEPTION);
				1534	}
				1535
				1536	/*
				1537	* return infinity
				1538	*/
				1539	Sgl_setinfinity_exponentmantissa(resultp1);
				1540	Sgl_copytoptr(resultp1,dstptr);
				1541	return(NOEXCEPTION);
				1542	}
				1543	}
				1544	else {
				1545	/*
				1546	* is NaN; signaling or quiet?
				1547	*/
				1548	if (Sgl_isone_signaling(opnd2)) {
				1549	/* trap if INVALIDTRAP enabled */
				1550	if (Is_invalidtrap_enabled())
				1551	return(OPC_2E_INVALIDEXCEPTION);
				1552	/* make NaN quiet */
				1553	Set_invalidflag();
				1554	Sgl_set_quiet(opnd2);
				1555	}
				1556	/*
				1557	* is third operand a signaling NaN?
				1558	*/
				1559	else if (Sgl_is_signalingnan(opnd3)) {
				1560	/* trap if INVALIDTRAP enabled */
				1561	if (Is_invalidtrap_enabled())
				1562	return(OPC_2E_INVALIDEXCEPTION);
				1563	/* make NaN quiet */
				1564	Set_invalidflag();
				1565	Sgl_set_quiet(opnd3);
				1566	Sgl_copytoptr(opnd3,dstptr);
				1567	return(NOEXCEPTION);
				1568	}
				1569	/*
				1570	* return quiet NaN
				1571	*/
				1572	Sgl_copytoptr(opnd2,dstptr);
				1573	return(NOEXCEPTION);
				1574	}
				1575	}
				1576
				1577	/*
				1578	* check third operand for NaN's or infinity
				1579	*/
				1580	if (Sgl_isinfinity_exponent(opnd3)) {
				1581	if (Sgl_iszero_mantissa(opnd3)) {
				1582	/* return infinity */
				1583	Sgl_copytoptr(opnd3,dstptr);
				1584	return(NOEXCEPTION);
				1585	} else {
				1586	/*
				1587	* is NaN; signaling or quiet?
				1588	*/
				1589	if (Sgl_isone_signaling(opnd3)) {
				1590	/* trap if INVALIDTRAP enabled */
				1591	if (Is_invalidtrap_enabled())
				1592	return(OPC_2E_INVALIDEXCEPTION);
				1593	/* make NaN quiet */
				1594	Set_invalidflag();
				1595	Sgl_set_quiet(opnd3);
				1596	}
				1597	/*
				1598	* return quiet NaN
				1599	*/
				1600	Sgl_copytoptr(opnd3,dstptr);
				1601	return(NOEXCEPTION);
				1602	}
				1603	}
				1604
				1605	/*
				1606	* Generate multiply mantissa
				1607	*/
				1608	if (Sgl_isnotzero_exponent(opnd1)) {
				1609	/* set hidden bit */
				1610	Sgl_clear_signexponent_set_hidden(opnd1);
				1611	}
				1612	else {
				1613	/* check for zero */
				1614	if (Sgl_iszero_mantissa(opnd1)) {
				1615	/*
				1616	* Perform the add opnd3 with zero here.
				1617	*/
				1618	if (Sgl_iszero_exponentmantissa(opnd3)) {
				1619	if (Is_rounding_mode(ROUNDMINUS)) {
				1620	Sgl_or_signs(opnd3,resultp1);
				1621	} else {
				1622	Sgl_and_signs(opnd3,resultp1);
				1623	}
				1624	}
				1625	/*
				1626	* Now let's check for trapped underflow case.
				1627	*/
				1628	else if (Sgl_iszero_exponent(opnd3) &&
				1629	Is_underflowtrap_enabled()) {
				1630	/* need to normalize results mantissa */
				1631	sign_save = Sgl_signextendedsign(opnd3);
				1632	result_exponent = 0;
				1633	Sgl_leftshiftby1(opnd3);
				1634	Sgl_normalize(opnd3,result_exponent);
				1635	Sgl_set_sign(opnd3,/using/sign_save);
				1636	Sgl_setwrapped_exponent(opnd3,result_exponent,
				1637	unfl);
				1638	Sgl_copytoptr(opnd3,dstptr);
				1639	/* inexact = FALSE */
				1640	return(OPC_2E_UNDERFLOWEXCEPTION);
				1641	}
				1642	Sgl_copytoptr(opnd3,dstptr);
				1643	return(NOEXCEPTION);
				1644	}
				1645	/* is denormalized, adjust exponent */
				1646	Sgl_clear_signexponent(opnd1);
				1647	Sgl_leftshiftby1(opnd1);
				1648	Sgl_normalize(opnd1,mpy_exponent);
				1649	}
				1650	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				1651	if (Sgl_isnotzero_exponent(opnd2)) {
				1652	Sgl_clear_signexponent_set_hidden(opnd2);
				1653	}
				1654	else {
				1655	/* check for zero */
				1656	if (Sgl_iszero_mantissa(opnd2)) {
				1657	/*
				1658	* Perform the add opnd3 with zero here.
				1659	*/
				1660	if (Sgl_iszero_exponentmantissa(opnd3)) {
				1661	if (Is_rounding_mode(ROUNDMINUS)) {
				1662	Sgl_or_signs(opnd3,resultp1);
				1663	} else {
				1664	Sgl_and_signs(opnd3,resultp1);
				1665	}
				1666	}
				1667	/*
				1668	* Now let's check for trapped underflow case.
				1669	*/
				1670	else if (Sgl_iszero_exponent(opnd3) &&
				1671	Is_underflowtrap_enabled()) {
				1672	/* need to normalize results mantissa */
				1673	sign_save = Sgl_signextendedsign(opnd3);
				1674	result_exponent = 0;
				1675	Sgl_leftshiftby1(opnd3);
				1676	Sgl_normalize(opnd3,result_exponent);
				1677	Sgl_set_sign(opnd3,/using/sign_save);
				1678	Sgl_setwrapped_exponent(opnd3,result_exponent,
				1679	unfl);
				1680	Sgl_copytoptr(opnd3,dstptr);
				1681	/* inexact = FALSE */
				1682	return(OPC_2E_UNDERFLOWEXCEPTION);
				1683	}
				1684	Sgl_copytoptr(opnd3,dstptr);
				1685	return(NOEXCEPTION);
				1686	}
				1687	/* is denormalized; want to normalize */
				1688	Sgl_clear_signexponent(opnd2);
				1689	Sgl_leftshiftby1(opnd2);
				1690	Sgl_normalize(opnd2,mpy_exponent);
				1691	}
				1692
				1693	/* Multiply the first two source mantissas together */
				1694
				1695	/*
				1696	* The intermediate result will be kept in tmpres,
				1697	* which needs enough room for 106 bits of mantissa,
				1698	* so lets call it a Double extended.
				1699	*/
				1700	Sglext_setzero(tmpresp1,tmpresp2);
				1701
				1702	/*
				1703	* Four bits at a time are inspected in each loop, and a
				1704	* simple shift and add multiply algorithm is used.
				1705	*/
				1706	for (count = SGL_P-1; count >= 0; count -= 4) {
				1707	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				1708	if (Sbit28(opnd1)) {
				1709	/* Twoword_add should be an ADD followed by 2 ADDC's */
				1710	Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
				1711	}
				1712	if (Sbit29(opnd1)) {
				1713	Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
				1714	}
				1715	if (Sbit30(opnd1)) {
				1716	Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
				1717	}
				1718	if (Sbit31(opnd1)) {
				1719	Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
				1720	}
				1721	Sgl_rightshiftby4(opnd1);
				1722	}
				1723	if (Is_sexthiddenoverflow(tmpresp1)) {
				1724	/* result mantissa >= 2 (mantissa overflow) */
				1725	mpy_exponent++;
				1726	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				1727	} else {
				1728	Sglext_rightshiftby3(tmpresp1,tmpresp2);
				1729	}
				1730
				1731	/*
				1732	* Restore the sign of the mpy result which was saved in resultp1.
				1733	* The exponent will continue to be kept in mpy_exponent.
				1734	*/
				1735	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
				1736
				1737	/*
				1738	* No rounding is required, since the result of the multiply
				1739	* is exact in the extended format.
				1740	*/
				1741
				1742	/*
				1743	* Now we are ready to perform the add portion of the operation.
				1744	*
				1745	* The exponents need to be kept as integers for now, since the
				1746	* multiply result might not fit into the exponent field. We
				1747	* can't overflow or underflow because of this yet, since the
				1748	* add could bring the final result back into range.
				1749	*/
				1750	add_exponent = Sgl_exponent(opnd3);
				1751
				1752	/*
				1753	* Check for denormalized or zero add operand.
				1754	*/
				1755	if (add_exponent == 0) {
				1756	/* check for zero */
				1757	if (Sgl_iszero_mantissa(opnd3)) {
				1758	/* right is zero */
				1759	/* Left can't be zero and must be result.
				1760	*
				1761	* The final result is now in tmpres and mpy_exponent,
				1762	* and needs to be rounded and squeezed back into
				1763	* double precision format from double extended.
				1764	*/
				1765	result_exponent = mpy_exponent;
				1766	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
				1767	sign_save = Sgl_signextendedsign(resultp1);/save sign/
				1768	goto round;
				1769	}
				1770
				1771	/*
				1772	* Neither are zeroes.
				1773	* Adjust exponent and normalize add operand.
				1774	*/
				1775	sign_save = Sgl_signextendedsign(opnd3); /* save sign */
				1776	Sgl_clear_signexponent(opnd3);
				1777	Sgl_leftshiftby1(opnd3);
				1778	Sgl_normalize(opnd3,add_exponent);
				1779	Sgl_set_sign(opnd3,sign_save); /* restore sign */
				1780	} else {
				1781	Sgl_clear_exponent_set_hidden(opnd3);
				1782	}
				1783	/*
				1784	* Copy opnd3 to the double extended variable called right.
				1785	*/
				1786	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
				1787
				1788	/*
				1789	* A zero "save" helps discover equal operands (for later),
				1790	* and is used in swapping operands (if needed).
				1791	*/
				1792	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
				1793
				1794	/*
				1795	* Compare magnitude of operands.
				1796	*/
				1797	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
				1798	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
				1799	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				1800	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
				1801	/*
				1802	* Set the left operand to the larger one by XOR swap.
				1803	* First finish the first word "save".
				1804	*/
				1805	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
				1806	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				1807	Sglext_swap_lower(tmpresp2,rightp2);
				1808	/* also setup exponents used in rest of routine */
				1809	diff_exponent = add_exponent - mpy_exponent;
				1810	result_exponent = add_exponent;
				1811	} else {
				1812	/* also setup exponents used in rest of routine */
				1813	diff_exponent = mpy_exponent - add_exponent;
				1814	result_exponent = mpy_exponent;
				1815	}
				1816	/* Invariant: left is not smaller than right. */
				1817
				1818	/*
				1819	* Special case alignment of operands that would force alignment
				1820	* beyond the extent of the extension. A further optimization
				1821	* could special case this but only reduces the path length for
				1822	* this infrequent case.
				1823	*/
				1824	if (diff_exponent > SGLEXT_THRESHOLD) {
				1825	diff_exponent = SGLEXT_THRESHOLD;
				1826	}
				1827
				1828	/* Align right operand by shifting it to the right */
				1829	Sglext_clear_sign(rightp1);
				1830	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
				1831
				1832	/* Treat sum and difference of the operands separately. */
				1833	if ((int)save < 0) {
				1834	/*
				1835	* Difference of the two operands. Overflow can occur if the
				1836	* multiply overflowed. A borrow can occur out of the hidden
				1837	* bit and force a post normalization phase.
				1838	*/
				1839	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
				1840	resultp1,resultp2);
				1841	sign_save = Sgl_signextendedsign(resultp1);
				1842	if (Sgl_iszero_hidden(resultp1)) {
				1843	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1844	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1845	* result and extension left until the hidden bit
				1846	* becomes one. Not all of the extension bits need
				1847	* participate in the shift. Only the two most
				1848	* significant bits (round and guard) are needed.
				1849	* If only a single shift is needed then the guard
				1850	* bit becomes a significant low order bit and the
				1851	* extension must participate in the rounding.
				1852	* If more than a single shift is needed, then all
				1853	* bits to the right of the guard bit are zeros,
				1854	* and the guard bit may or may not be zero. */
				1855	Sglext_leftshiftby1(resultp1,resultp2);
				1856
				1857	/* Need to check for a zero result. The sign and
				1858	* exponent fields have already been zeroed. The more
				1859	* efficient test of the full object can be used.
				1860	*/
				1861	if (Sglext_iszero(resultp1,resultp2)) {
				1862	/* Must have been "x-x" or "x+(-x)". */
				1863	if (Is_rounding_mode(ROUNDMINUS))
				1864	Sgl_setone_sign(resultp1);
				1865	Sgl_copytoptr(resultp1,dstptr);
				1866	return(NOEXCEPTION);
				1867	}
				1868	result_exponent--;
				1869
				1870	/* Look to see if normalization is finished. */
				1871	if (Sgl_isone_hidden(resultp1)) {
				1872	/* No further normalization is needed */
				1873	goto round;
				1874	}
				1875
				1876	/* Discover first one bit to determine shift amount.
				1877	* Use a modified binary search. We have already
				1878	* shifted the result one position right and still
				1879	* not found a one so the remainder of the extension
				1880	* must be zero and simplifies rounding. */
				1881	/* Scan bytes */
				1882	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
				1883	Sglext_leftshiftby8(resultp1,resultp2);
				1884	result_exponent -= 8;
				1885	}
				1886	/* Now narrow it down to the nibble */
				1887	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
				1888	/* The lower nibble contains the
				1889	* normalizing one */
				1890	Sglext_leftshiftby4(resultp1,resultp2);
				1891	result_exponent -= 4;
				1892	}
				1893	/* Select case where first bit is set (already
				1894	* normalized) otherwise select the proper shift. */
				1895	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
				1896	if (jumpsize <= 7) switch(jumpsize) {
				1897	case 1:
				1898	Sglext_leftshiftby3(resultp1,resultp2);
				1899	result_exponent -= 3;
				1900	break;
				1901	case 2:
				1902	case 3:
				1903	Sglext_leftshiftby2(resultp1,resultp2);
				1904	result_exponent -= 2;
				1905	break;
				1906	case 4:
				1907	case 5:
				1908	case 6:
				1909	case 7:
				1910	Sglext_leftshiftby1(resultp1,resultp2);
				1911	result_exponent -= 1;
				1912	break;
				1913	}
				1914	} /* end if (hidden...)... */
				1915	/* Fall through and round */
				1916	} /* end if (save < 0)... */
				1917	else {
				1918	/* Add magnitudes */
				1919	Sglext_addition(tmpresp1,tmpresp2,
				1920	rightp1,rightp2, /to/resultp1,resultp2);
				1921	sign_save = Sgl_signextendedsign(resultp1);
				1922	if (Sgl_isone_hiddenoverflow(resultp1)) {
				1923	/* Prenormalization required. */
				1924	Sglext_arithrightshiftby1(resultp1,resultp2);
				1925	result_exponent++;
				1926	} /* end if hiddenoverflow... */
				1927	} /* end else ...add magnitudes... */
				1928
				1929	/* Round the result. If the extension and lower two words are
				1930	* all zeros, then the result is exact. Otherwise round in the
				1931	* correct direction. Underflow is possible. If a postnormalization
				1932	* is necessary, then the mantissa is all zeros so no shift is needed.
				1933	*/
				1934	round:
				1935	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				1936	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
				1937	}
				1938	Sgl_set_sign(resultp1,/using/sign_save);
				1939	if (Sglext_isnotzero_mantissap2(resultp2)) {
				1940	inexact = TRUE;
				1941	switch(Rounding_mode()) {
				1942	case ROUNDNEAREST: /* The default. */
				1943	if (Sglext_isone_highp2(resultp2)) {
				1944	/* at least 1/2 ulp */
				1945	if (Sglext_isnotzero_low31p2(resultp2) \|\|
				1946	Sglext_isone_lowp1(resultp1)) {
				1947	/* either exactly half way and odd or
				1948	* more than 1/2ulp */
				1949	Sgl_increment(resultp1);
				1950	}
				1951	}
				1952	break;
				1953
				1954	case ROUNDPLUS:
				1955	if (Sgl_iszero_sign(resultp1)) {
				1956	/* Round up positive results */
				1957	Sgl_increment(resultp1);
				1958	}
				1959	break;
				1960
				1961	case ROUNDMINUS:
				1962	if (Sgl_isone_sign(resultp1)) {
				1963	/* Round down negative results */
				1964	Sgl_increment(resultp1);
				1965	}
				1966
				1967	case ROUNDZERO:;
				1968	/* truncate is simple */
				1969	} /* end switch... */
				1970	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
				1971	}
				1972	if (result_exponent >= SGL_INFINITY_EXPONENT) {
				1973	/* Overflow */
				1974	if (Is_overflowtrap_enabled()) {
				1975	/*
				1976	* Adjust bias of result
				1977	*/
				1978	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				1979	Sgl_copytoptr(resultp1,dstptr);
				1980	if (inexact)
				1981	if (Is_inexacttrap_enabled())
				1982	return (OPC_2E_OVERFLOWEXCEPTION \|
				1983	OPC_2E_INEXACTEXCEPTION);
				1984	else Set_inexactflag();
				1985	return (OPC_2E_OVERFLOWEXCEPTION);
				1986	}
				1987	inexact = TRUE;
				1988	Set_overflowflag();
				1989	Sgl_setoverflow(resultp1);
				1990	} else if (result_exponent <= 0) { /* underflow case */
				1991	if (Is_underflowtrap_enabled()) {
				1992	/*
				1993	* Adjust bias of result
				1994	*/
				1995	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
				1996	Sgl_copytoptr(resultp1,dstptr);
				1997	if (inexact)
				1998	if (Is_inexacttrap_enabled())
				1999	return (OPC_2E_UNDERFLOWEXCEPTION \|
				2000	OPC_2E_INEXACTEXCEPTION);
				2001	else Set_inexactflag();
				2002	return(OPC_2E_UNDERFLOWEXCEPTION);
				2003	}
				2004	else if (inexact && is_tiny) Set_underflowflag();
				2005	}
				2006	else Sgl_set_exponent(resultp1,result_exponent);
				2007	Sgl_copytoptr(resultp1,dstptr);
				2008	if (inexact)
				2009	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				2010	else Set_inexactflag();
				2011	return(NOEXCEPTION);
				2012	}
				2013
				2014	/*
				2015	* Single Floating-point Multiply Negate Fused Add
				2016	*/
				2017
				2018	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				2019
				2020	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				2021	unsigned int *status;
				2022	{
				2023	unsigned int opnd1, opnd2, opnd3;
				2024	register unsigned int tmpresp1, tmpresp2;
				2025	unsigned int rightp1, rightp2;
				2026	unsigned int resultp1, resultp2 = 0;
				2027	register int mpy_exponent, add_exponent, count;
				2028	boolean inexact = FALSE, is_tiny = FALSE;
				2029
				2030	unsigned int signlessleft1, signlessright1, save;
				2031	register int result_exponent, diff_exponent;
				2032	int sign_save, jumpsize;
				2033
				2034	Sgl_copyfromptr(src1ptr,opnd1);
				2035	Sgl_copyfromptr(src2ptr,opnd2);
				2036	Sgl_copyfromptr(src3ptr,opnd3);
				2037
				2038	/*
				2039	* set sign bit of result of multiply
				2040	*/
				2041	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
				2042	Sgl_setzero(resultp1);
				2043	else
				2044	Sgl_setnegativezero(resultp1);
				2045
				2046	/*
				2047	* Generate multiply exponent
				2048	*/
				2049	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
				2050
				2051	/*
				2052	* check first operand for NaN's or infinity
				2053	*/
				2054	if (Sgl_isinfinity_exponent(opnd1)) {
				2055	if (Sgl_iszero_mantissa(opnd1)) {
				2056	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
				2057	if (Sgl_iszero_exponentmantissa(opnd2)) {
				2058	/*
				2059	* invalid since operands are infinity
				2060	* and zero
				2061	*/
				2062	if (Is_invalidtrap_enabled())
				2063	return(OPC_2E_INVALIDEXCEPTION);
				2064	Set_invalidflag();
				2065	Sgl_makequietnan(resultp1);
				2066	Sgl_copytoptr(resultp1,dstptr);
				2067	return(NOEXCEPTION);
				2068	}
				2069	/*
				2070	* Check third operand for infinity with a
				2071	* sign opposite of the multiply result
				2072	*/
				2073	if (Sgl_isinfinity(opnd3) &&
				2074	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				2075	/*
				2076	* invalid since attempting a magnitude
				2077	* subtraction of infinities
				2078	*/
				2079	if (Is_invalidtrap_enabled())
				2080	return(OPC_2E_INVALIDEXCEPTION);
				2081	Set_invalidflag();
				2082	Sgl_makequietnan(resultp1);
				2083	Sgl_copytoptr(resultp1,dstptr);
				2084	return(NOEXCEPTION);
				2085	}
				2086
				2087	/*
				2088	* return infinity
				2089	*/
				2090	Sgl_setinfinity_exponentmantissa(resultp1);
				2091	Sgl_copytoptr(resultp1,dstptr);
				2092	return(NOEXCEPTION);
				2093	}
				2094	}
				2095	else {
				2096	/*
				2097	* is NaN; signaling or quiet?
				2098	*/
				2099	if (Sgl_isone_signaling(opnd1)) {
				2100	/* trap if INVALIDTRAP enabled */
				2101	if (Is_invalidtrap_enabled())
				2102	return(OPC_2E_INVALIDEXCEPTION);
				2103	/* make NaN quiet */
				2104	Set_invalidflag();
				2105	Sgl_set_quiet(opnd1);
				2106	}
				2107	/*
				2108	* is second operand a signaling NaN?
				2109	*/
				2110	else if (Sgl_is_signalingnan(opnd2)) {
				2111	/* trap if INVALIDTRAP enabled */
				2112	if (Is_invalidtrap_enabled())
				2113	return(OPC_2E_INVALIDEXCEPTION);
				2114	/* make NaN quiet */
				2115	Set_invalidflag();
				2116	Sgl_set_quiet(opnd2);
				2117	Sgl_copytoptr(opnd2,dstptr);
				2118	return(NOEXCEPTION);
				2119	}
				2120	/*
				2121	* is third operand a signaling NaN?
				2122	*/
				2123	else if (Sgl_is_signalingnan(opnd3)) {
				2124	/* trap if INVALIDTRAP enabled */
				2125	if (Is_invalidtrap_enabled())
				2126	return(OPC_2E_INVALIDEXCEPTION);
				2127	/* make NaN quiet */
				2128	Set_invalidflag();
				2129	Sgl_set_quiet(opnd3);
				2130	Sgl_copytoptr(opnd3,dstptr);
				2131	return(NOEXCEPTION);
				2132	}
				2133	/*
				2134	* return quiet NaN
				2135	*/
				2136	Sgl_copytoptr(opnd1,dstptr);
				2137	return(NOEXCEPTION);
				2138	}
				2139	}
				2140
				2141	/*
				2142	* check second operand for NaN's or infinity
				2143	*/
				2144	if (Sgl_isinfinity_exponent(opnd2)) {
				2145	if (Sgl_iszero_mantissa(opnd2)) {
				2146	if (Sgl_isnotnan(opnd3)) {
				2147	if (Sgl_iszero_exponentmantissa(opnd1)) {
				2148	/*
				2149	* invalid since multiply operands are
				2150	* zero & infinity
				2151	*/
				2152	if (Is_invalidtrap_enabled())
				2153	return(OPC_2E_INVALIDEXCEPTION);
				2154	Set_invalidflag();
				2155	Sgl_makequietnan(opnd2);
				2156	Sgl_copytoptr(opnd2,dstptr);
				2157	return(NOEXCEPTION);
				2158	}
				2159
				2160	/*
				2161	* Check third operand for infinity with a
				2162	* sign opposite of the multiply result
				2163	*/
				2164	if (Sgl_isinfinity(opnd3) &&
				2165	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				2166	/*
				2167	* invalid since attempting a magnitude
				2168	* subtraction of infinities
				2169	*/
				2170	if (Is_invalidtrap_enabled())
				2171	return(OPC_2E_INVALIDEXCEPTION);
				2172	Set_invalidflag();
				2173	Sgl_makequietnan(resultp1);
				2174	Sgl_copytoptr(resultp1,dstptr);
				2175	return(NOEXCEPTION);
				2176	}
				2177
				2178	/*
				2179	* return infinity
				2180	*/
				2181	Sgl_setinfinity_exponentmantissa(resultp1);
				2182	Sgl_copytoptr(resultp1,dstptr);
				2183	return(NOEXCEPTION);
				2184	}
				2185	}
				2186	else {
				2187	/*
				2188	* is NaN; signaling or quiet?
				2189	*/
				2190	if (Sgl_isone_signaling(opnd2)) {
				2191	/* trap if INVALIDTRAP enabled */
				2192	if (Is_invalidtrap_enabled())
				2193	return(OPC_2E_INVALIDEXCEPTION);
				2194	/* make NaN quiet */
				2195	Set_invalidflag();
				2196	Sgl_set_quiet(opnd2);
				2197	}
				2198	/*
				2199	* is third operand a signaling NaN?
				2200	*/
				2201	else if (Sgl_is_signalingnan(opnd3)) {
				2202	/* trap if INVALIDTRAP enabled */
				2203	if (Is_invalidtrap_enabled())
				2204	return(OPC_2E_INVALIDEXCEPTION);
				2205	/* make NaN quiet */
				2206	Set_invalidflag();
				2207	Sgl_set_quiet(opnd3);
				2208	Sgl_copytoptr(opnd3,dstptr);
				2209	return(NOEXCEPTION);
				2210	}
				2211	/*
				2212	* return quiet NaN
				2213	*/
				2214	Sgl_copytoptr(opnd2,dstptr);
				2215	return(NOEXCEPTION);
				2216	}
				2217	}
				2218
				2219	/*
				2220	* check third operand for NaN's or infinity
				2221	*/
				2222	if (Sgl_isinfinity_exponent(opnd3)) {
				2223	if (Sgl_iszero_mantissa(opnd3)) {
				2224	/* return infinity */
				2225	Sgl_copytoptr(opnd3,dstptr);
				2226	return(NOEXCEPTION);
				2227	} else {
				2228	/*
				2229	* is NaN; signaling or quiet?
				2230	*/
				2231	if (Sgl_isone_signaling(opnd3)) {
				2232	/* trap if INVALIDTRAP enabled */
				2233	if (Is_invalidtrap_enabled())
				2234	return(OPC_2E_INVALIDEXCEPTION);
				2235	/* make NaN quiet */
				2236	Set_invalidflag();
				2237	Sgl_set_quiet(opnd3);
				2238	}
				2239	/*
				2240	* return quiet NaN
				2241	*/
				2242	Sgl_copytoptr(opnd3,dstptr);
				2243	return(NOEXCEPTION);
				2244	}
				2245	}
				2246
				2247	/*
				2248	* Generate multiply mantissa
				2249	*/
				2250	if (Sgl_isnotzero_exponent(opnd1)) {
				2251	/* set hidden bit */
				2252	Sgl_clear_signexponent_set_hidden(opnd1);
				2253	}
				2254	else {
				2255	/* check for zero */
				2256	if (Sgl_iszero_mantissa(opnd1)) {
				2257	/*
				2258	* Perform the add opnd3 with zero here.
				2259	*/
				2260	if (Sgl_iszero_exponentmantissa(opnd3)) {
				2261	if (Is_rounding_mode(ROUNDMINUS)) {
				2262	Sgl_or_signs(opnd3,resultp1);
				2263	} else {
				2264	Sgl_and_signs(opnd3,resultp1);
				2265	}
				2266	}
				2267	/*
				2268	* Now let's check for trapped underflow case.
				2269	*/
				2270	else if (Sgl_iszero_exponent(opnd3) &&
				2271	Is_underflowtrap_enabled()) {
				2272	/* need to normalize results mantissa */
				2273	sign_save = Sgl_signextendedsign(opnd3);
				2274	result_exponent = 0;
				2275	Sgl_leftshiftby1(opnd3);
				2276	Sgl_normalize(opnd3,result_exponent);
				2277	Sgl_set_sign(opnd3,/using/sign_save);
				2278	Sgl_setwrapped_exponent(opnd3,result_exponent,
				2279	unfl);
				2280	Sgl_copytoptr(opnd3,dstptr);
				2281	/* inexact = FALSE */
				2282	return(OPC_2E_UNDERFLOWEXCEPTION);
				2283	}
				2284	Sgl_copytoptr(opnd3,dstptr);
				2285	return(NOEXCEPTION);
				2286	}
				2287	/* is denormalized, adjust exponent */
				2288	Sgl_clear_signexponent(opnd1);
				2289	Sgl_leftshiftby1(opnd1);
				2290	Sgl_normalize(opnd1,mpy_exponent);
				2291	}
				2292	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				2293	if (Sgl_isnotzero_exponent(opnd2)) {
				2294	Sgl_clear_signexponent_set_hidden(opnd2);
				2295	}
				2296	else {
				2297	/* check for zero */
				2298	if (Sgl_iszero_mantissa(opnd2)) {
				2299	/*
				2300	* Perform the add opnd3 with zero here.
				2301	*/
				2302	if (Sgl_iszero_exponentmantissa(opnd3)) {
				2303	if (Is_rounding_mode(ROUNDMINUS)) {
				2304	Sgl_or_signs(opnd3,resultp1);
				2305	} else {
				2306	Sgl_and_signs(opnd3,resultp1);
				2307	}
				2308	}
				2309	/*
				2310	* Now let's check for trapped underflow case.
				2311	*/
				2312	else if (Sgl_iszero_exponent(opnd3) &&
				2313	Is_underflowtrap_enabled()) {
				2314	/* need to normalize results mantissa */
				2315	sign_save = Sgl_signextendedsign(opnd3);
				2316	result_exponent = 0;
				2317	Sgl_leftshiftby1(opnd3);
				2318	Sgl_normalize(opnd3,result_exponent);
				2319	Sgl_set_sign(opnd3,/using/sign_save);
				2320	Sgl_setwrapped_exponent(opnd3,result_exponent,
				2321	unfl);
				2322	Sgl_copytoptr(opnd3,dstptr);
				2323	/* inexact = FALSE */
				2324	return(OPC_2E_UNDERFLOWEXCEPTION);
				2325	}
				2326	Sgl_copytoptr(opnd3,dstptr);
				2327	return(NOEXCEPTION);
				2328	}
				2329	/* is denormalized; want to normalize */
				2330	Sgl_clear_signexponent(opnd2);
				2331	Sgl_leftshiftby1(opnd2);
				2332	Sgl_normalize(opnd2,mpy_exponent);
				2333	}
				2334
				2335	/* Multiply the first two source mantissas together */
				2336
				2337	/*
				2338	* The intermediate result will be kept in tmpres,
				2339	* which needs enough room for 106 bits of mantissa,
				2340	* so lets call it a Double extended.
				2341	*/
				2342	Sglext_setzero(tmpresp1,tmpresp2);
				2343
				2344	/*
				2345	* Four bits at a time are inspected in each loop, and a
				2346	* simple shift and add multiply algorithm is used.
				2347	*/
				2348	for (count = SGL_P-1; count >= 0; count -= 4) {
				2349	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				2350	if (Sbit28(opnd1)) {
				2351	/* Twoword_add should be an ADD followed by 2 ADDC's */
				2352	Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
				2353	}
				2354	if (Sbit29(opnd1)) {
				2355	Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
				2356	}
				2357	if (Sbit30(opnd1)) {
				2358	Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
				2359	}
				2360	if (Sbit31(opnd1)) {
				2361	Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
				2362	}
				2363	Sgl_rightshiftby4(opnd1);
				2364	}
				2365	if (Is_sexthiddenoverflow(tmpresp1)) {
				2366	/* result mantissa >= 2 (mantissa overflow) */
				2367	mpy_exponent++;
				2368	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				2369	} else {
				2370	Sglext_rightshiftby3(tmpresp1,tmpresp2);
				2371	}
				2372
				2373	/*
				2374	* Restore the sign of the mpy result which was saved in resultp1.
				2375	* The exponent will continue to be kept in mpy_exponent.
				2376	*/
				2377	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
				2378
				2379	/*
				2380	* No rounding is required, since the result of the multiply
				2381	* is exact in the extended format.
				2382	*/
				2383
				2384	/*
				2385	* Now we are ready to perform the add portion of the operation.
				2386	*
				2387	* The exponents need to be kept as integers for now, since the
				2388	* multiply result might not fit into the exponent field. We
				2389	* can't overflow or underflow because of this yet, since the
				2390	* add could bring the final result back into range.
				2391	*/
				2392	add_exponent = Sgl_exponent(opnd3);
				2393
				2394	/*
				2395	* Check for denormalized or zero add operand.
				2396	*/
				2397	if (add_exponent == 0) {
				2398	/* check for zero */
				2399	if (Sgl_iszero_mantissa(opnd3)) {
				2400	/* right is zero */
				2401	/* Left can't be zero and must be result.
				2402	*
				2403	* The final result is now in tmpres and mpy_exponent,
				2404	* and needs to be rounded and squeezed back into
				2405	* double precision format from double extended.
				2406	*/
				2407	result_exponent = mpy_exponent;
				2408	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
				2409	sign_save = Sgl_signextendedsign(resultp1);/save sign/
				2410	goto round;
				2411	}
				2412
				2413	/*
				2414	* Neither are zeroes.
				2415	* Adjust exponent and normalize add operand.
				2416	*/
				2417	sign_save = Sgl_signextendedsign(opnd3); /* save sign */
				2418	Sgl_clear_signexponent(opnd3);
				2419	Sgl_leftshiftby1(opnd3);
				2420	Sgl_normalize(opnd3,add_exponent);
				2421	Sgl_set_sign(opnd3,sign_save); /* restore sign */
				2422	} else {
				2423	Sgl_clear_exponent_set_hidden(opnd3);
				2424	}
				2425	/*
				2426	* Copy opnd3 to the double extended variable called right.
				2427	*/
				2428	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
				2429
				2430	/*
				2431	* A zero "save" helps discover equal operands (for later),
				2432	* and is used in swapping operands (if needed).
				2433	*/
				2434	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
				2435
				2436	/*
				2437	* Compare magnitude of operands.
				2438	*/
				2439	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
				2440	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
				2441	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				2442	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
				2443	/*
				2444	* Set the left operand to the larger one by XOR swap.
				2445	* First finish the first word "save".
				2446	*/
				2447	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
				2448	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				2449	Sglext_swap_lower(tmpresp2,rightp2);
				2450	/* also setup exponents used in rest of routine */
				2451	diff_exponent = add_exponent - mpy_exponent;
				2452	result_exponent = add_exponent;
				2453	} else {
				2454	/* also setup exponents used in rest of routine */
				2455	diff_exponent = mpy_exponent - add_exponent;
				2456	result_exponent = mpy_exponent;
				2457	}
				2458	/* Invariant: left is not smaller than right. */
				2459
				2460	/*
				2461	* Special case alignment of operands that would force alignment
				2462	* beyond the extent of the extension. A further optimization
				2463	* could special case this but only reduces the path length for
				2464	* this infrequent case.
				2465	*/
				2466	if (diff_exponent > SGLEXT_THRESHOLD) {
				2467	diff_exponent = SGLEXT_THRESHOLD;
				2468	}
				2469
				2470	/* Align right operand by shifting it to the right */
				2471	Sglext_clear_sign(rightp1);
				2472	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
				2473
				2474	/* Treat sum and difference of the operands separately. */
				2475	if ((int)save < 0) {
				2476	/*
				2477	* Difference of the two operands. Overflow can occur if the
				2478	* multiply overflowed. A borrow can occur out of the hidden
				2479	* bit and force a post normalization phase.
				2480	*/
				2481	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
				2482	resultp1,resultp2);
				2483	sign_save = Sgl_signextendedsign(resultp1);
				2484	if (Sgl_iszero_hidden(resultp1)) {
				2485	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	2486	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2487	* result and extension left until the hidden bit
				2488	* becomes one. Not all of the extension bits need
				2489	* participate in the shift. Only the two most
				2490	* significant bits (round and guard) are needed.
				2491	* If only a single shift is needed then the guard
				2492	* bit becomes a significant low order bit and the
				2493	* extension must participate in the rounding.
				2494	* If more than a single shift is needed, then all
				2495	* bits to the right of the guard bit are zeros,
				2496	* and the guard bit may or may not be zero. */
				2497	Sglext_leftshiftby1(resultp1,resultp2);
				2498
				2499	/* Need to check for a zero result. The sign and
				2500	* exponent fields have already been zeroed. The more
				2501	* efficient test of the full object can be used.
				2502	*/
				2503	if (Sglext_iszero(resultp1,resultp2)) {
				2504	/* Must have been "x-x" or "x+(-x)". */
				2505	if (Is_rounding_mode(ROUNDMINUS))
				2506	Sgl_setone_sign(resultp1);
				2507	Sgl_copytoptr(resultp1,dstptr);
				2508	return(NOEXCEPTION);
				2509	}
				2510	result_exponent--;
				2511
				2512	/* Look to see if normalization is finished. */
				2513	if (Sgl_isone_hidden(resultp1)) {
				2514	/* No further normalization is needed */
				2515	goto round;
				2516	}
				2517
				2518	/* Discover first one bit to determine shift amount.
				2519	* Use a modified binary search. We have already
				2520	* shifted the result one position right and still
				2521	* not found a one so the remainder of the extension
				2522	* must be zero and simplifies rounding. */
				2523	/* Scan bytes */
				2524	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
				2525	Sglext_leftshiftby8(resultp1,resultp2);
				2526	result_exponent -= 8;
				2527	}
				2528	/* Now narrow it down to the nibble */
				2529	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
				2530	/* The lower nibble contains the
				2531	* normalizing one */
				2532	Sglext_leftshiftby4(resultp1,resultp2);
				2533	result_exponent -= 4;
				2534	}
				2535	/* Select case where first bit is set (already
				2536	* normalized) otherwise select the proper shift. */
				2537	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
				2538	if (jumpsize <= 7) switch(jumpsize) {
				2539	case 1:
				2540	Sglext_leftshiftby3(resultp1,resultp2);
				2541	result_exponent -= 3;
				2542	break;
				2543	case 2:
				2544	case 3:
				2545	Sglext_leftshiftby2(resultp1,resultp2);
				2546	result_exponent -= 2;
				2547	break;
				2548	case 4:
				2549	case 5:
				2550	case 6:
				2551	case 7:
				2552	Sglext_leftshiftby1(resultp1,resultp2);
				2553	result_exponent -= 1;
				2554	break;
				2555	}
				2556	} /* end if (hidden...)... */
				2557	/* Fall through and round */
				2558	} /* end if (save < 0)... */
				2559	else {
				2560	/* Add magnitudes */
				2561	Sglext_addition(tmpresp1,tmpresp2,
				2562	rightp1,rightp2, /to/resultp1,resultp2);
				2563	sign_save = Sgl_signextendedsign(resultp1);
				2564	if (Sgl_isone_hiddenoverflow(resultp1)) {
				2565	/* Prenormalization required. */
				2566	Sglext_arithrightshiftby1(resultp1,resultp2);
				2567	result_exponent++;
				2568	} /* end if hiddenoverflow... */
				2569	} /* end else ...add magnitudes... */
				2570
				2571	/* Round the result. If the extension and lower two words are
				2572	* all zeros, then the result is exact. Otherwise round in the
				2573	* correct direction. Underflow is possible. If a postnormalization
				2574	* is necessary, then the mantissa is all zeros so no shift is needed.
				2575	*/
				2576	round:
				2577	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				2578	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
				2579	}
				2580	Sgl_set_sign(resultp1,/using/sign_save);
				2581	if (Sglext_isnotzero_mantissap2(resultp2)) {
				2582	inexact = TRUE;
				2583	switch(Rounding_mode()) {
				2584	case ROUNDNEAREST: /* The default. */
				2585	if (Sglext_isone_highp2(resultp2)) {
				2586	/* at least 1/2 ulp */
				2587	if (Sglext_isnotzero_low31p2(resultp2) \|\|
				2588	Sglext_isone_lowp1(resultp1)) {
				2589	/* either exactly half way and odd or
				2590	* more than 1/2ulp */
				2591	Sgl_increment(resultp1);
				2592	}
				2593	}
				2594	break;
				2595
				2596	case ROUNDPLUS:
				2597	if (Sgl_iszero_sign(resultp1)) {
				2598	/* Round up positive results */
				2599	Sgl_increment(resultp1);
				2600	}
				2601	break;
				2602
				2603	case ROUNDMINUS:
				2604	if (Sgl_isone_sign(resultp1)) {
				2605	/* Round down negative results */
				2606	Sgl_increment(resultp1);
				2607	}
				2608
				2609	case ROUNDZERO:;
				2610	/* truncate is simple */
				2611	} /* end switch... */
				2612	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
				2613	}
				2614	if (result_exponent >= SGL_INFINITY_EXPONENT) {
				2615	/* Overflow */
				2616	if (Is_overflowtrap_enabled()) {
				2617	/*
				2618	* Adjust bias of result
				2619	*/
				2620	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				2621	Sgl_copytoptr(resultp1,dstptr);
				2622	if (inexact)
				2623	if (Is_inexacttrap_enabled())
				2624	return (OPC_2E_OVERFLOWEXCEPTION \|
				2625	OPC_2E_INEXACTEXCEPTION);
				2626	else Set_inexactflag();
				2627	return (OPC_2E_OVERFLOWEXCEPTION);
				2628	}
				2629	inexact = TRUE;
				2630	Set_overflowflag();
				2631	Sgl_setoverflow(resultp1);
				2632	} else if (result_exponent <= 0) { /* underflow case */
				2633	if (Is_underflowtrap_enabled()) {
				2634	/*
				2635	* Adjust bias of result
				2636	*/
				2637	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
				2638	Sgl_copytoptr(resultp1,dstptr);
				2639	if (inexact)
				2640	if (Is_inexacttrap_enabled())
				2641	return (OPC_2E_UNDERFLOWEXCEPTION \|
				2642	OPC_2E_INEXACTEXCEPTION);
				2643	else Set_inexactflag();
				2644	return(OPC_2E_UNDERFLOWEXCEPTION);
				2645	}
				2646	else if (inexact && is_tiny) Set_underflowflag();
				2647	}
				2648	else Sgl_set_exponent(resultp1,result_exponent);
				2649	Sgl_copytoptr(resultp1,dstptr);
				2650	if (inexact)
				2651	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				2652	else Set_inexactflag();
				2653	return(NOEXCEPTION);
				2654	}
				2655