| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Linux/PA-RISC Project (http://www.parisc-linux.org/) | 
 | 3 |  * | 
 | 4 |  * Floating-point emulation code | 
 | 5 |  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> | 
 | 6 |  * | 
 | 7 |  *    This program is free software; you can redistribute it and/or modify | 
 | 8 |  *    it under the terms of the GNU General Public License as published by | 
 | 9 |  *    the Free Software Foundation; either version 2, or (at your option) | 
 | 10 |  *    any later version. | 
 | 11 |  * | 
 | 12 |  *    This program is distributed in the hope that it will be useful, | 
 | 13 |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 14 |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 | 15 |  *    GNU General Public License for more details. | 
 | 16 |  * | 
 | 17 |  *    You should have received a copy of the GNU General Public License | 
 | 18 |  *    along with this program; if not, write to the Free Software | 
 | 19 |  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | 
 | 20 |  */ | 
 | 21 | /* | 
 | 22 |  * BEGIN_DESC | 
 | 23 |  * | 
 | 24 |  *  File: | 
 | 25 |  *	@(#)	pa/spmath/dfmpy.c		$Revision: 1.1 $ | 
 | 26 |  * | 
 | 27 |  *  Purpose: | 
 | 28 |  *	Double Precision Floating-point Multiply | 
 | 29 |  * | 
 | 30 |  *  External Interfaces: | 
 | 31 |  *	dbl_fmpy(srcptr1,srcptr2,dstptr,status) | 
 | 32 |  * | 
 | 33 |  *  Internal Interfaces: | 
 | 34 |  * | 
 | 35 |  *  Theory: | 
 | 36 |  *	<<please update with a overview of the operation of this file>> | 
 | 37 |  * | 
 | 38 |  * END_DESC | 
 | 39 | */ | 
 | 40 |  | 
 | 41 |  | 
 | 42 | #include "float.h" | 
 | 43 | #include "dbl_float.h" | 
 | 44 |  | 
 | 45 | /* | 
 | 46 |  *  Double Precision Floating-point Multiply | 
 | 47 |  */ | 
 | 48 |  | 
 | 49 | int | 
 | 50 | dbl_fmpy( | 
 | 51 | 	    dbl_floating_point *srcptr1, | 
 | 52 | 	    dbl_floating_point *srcptr2, | 
 | 53 | 	    dbl_floating_point *dstptr, | 
 | 54 | 	    unsigned int *status) | 
 | 55 | { | 
 | 56 | 	register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2; | 
 | 57 | 	register unsigned int opnd3p1, opnd3p2, resultp1, resultp2; | 
 | 58 | 	register int dest_exponent, count; | 
 | 59 | 	register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE; | 
 | 60 | 	boolean is_tiny; | 
 | 61 |  | 
 | 62 | 	Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2); | 
 | 63 | 	Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2); | 
 | 64 |  | 
 | 65 | 	/*  | 
 | 66 | 	 * set sign bit of result  | 
 | 67 | 	 */ | 
 | 68 | 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))  | 
 | 69 | 		Dbl_setnegativezerop1(resultp1);  | 
 | 70 | 	else Dbl_setzerop1(resultp1); | 
 | 71 | 	/* | 
 | 72 | 	 * check first operand for NaN's or infinity | 
 | 73 | 	 */ | 
 | 74 | 	if (Dbl_isinfinity_exponent(opnd1p1)) { | 
 | 75 | 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | 
 | 76 | 			if (Dbl_isnotnan(opnd2p1,opnd2p2)) { | 
 | 77 | 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { | 
 | 78 | 					/*  | 
 | 79 | 					 * invalid since operands are infinity  | 
 | 80 | 					 * and zero  | 
 | 81 | 					 */ | 
 | 82 | 					if (Is_invalidtrap_enabled()) | 
 | 83 |                                 		return(INVALIDEXCEPTION); | 
 | 84 |                                 	Set_invalidflag(); | 
 | 85 |                                 	Dbl_makequietnan(resultp1,resultp2); | 
 | 86 | 					Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 87 | 					return(NOEXCEPTION); | 
 | 88 | 				} | 
 | 89 | 				/* | 
 | 90 | 			 	 * return infinity | 
 | 91 | 			 	 */ | 
 | 92 | 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | 
 | 93 | 				Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 94 | 				return(NOEXCEPTION); | 
 | 95 | 			} | 
 | 96 | 		} | 
 | 97 | 		else { | 
 | 98 |                 	/* | 
 | 99 |                  	 * is NaN; signaling or quiet? | 
 | 100 |                  	 */ | 
 | 101 |                 	if (Dbl_isone_signaling(opnd1p1)) { | 
 | 102 |                         	/* trap if INVALIDTRAP enabled */ | 
 | 103 |                         	if (Is_invalidtrap_enabled())  | 
 | 104 |                             		return(INVALIDEXCEPTION); | 
 | 105 |                         	/* make NaN quiet */ | 
 | 106 |                         	Set_invalidflag(); | 
 | 107 |                         	Dbl_set_quiet(opnd1p1); | 
 | 108 |                 	} | 
 | 109 | 			/*  | 
 | 110 | 			 * is second operand a signaling NaN?  | 
 | 111 | 			 */ | 
 | 112 | 			else if (Dbl_is_signalingnan(opnd2p1)) { | 
 | 113 |                         	/* trap if INVALIDTRAP enabled */ | 
 | 114 |                         	if (Is_invalidtrap_enabled()) | 
 | 115 |                             		return(INVALIDEXCEPTION); | 
 | 116 |                         	/* make NaN quiet */ | 
 | 117 |                         	Set_invalidflag(); | 
 | 118 |                         	Dbl_set_quiet(opnd2p1); | 
 | 119 | 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | 
 | 120 |                 		return(NOEXCEPTION); | 
 | 121 | 			} | 
 | 122 |                 	/* | 
 | 123 |                  	 * return quiet NaN | 
 | 124 |                  	 */ | 
 | 125 | 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); | 
 | 126 |                 	return(NOEXCEPTION); | 
 | 127 | 		} | 
 | 128 | 	} | 
 | 129 | 	/* | 
 | 130 | 	 * check second operand for NaN's or infinity | 
 | 131 | 	 */ | 
 | 132 | 	if (Dbl_isinfinity_exponent(opnd2p1)) { | 
 | 133 | 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | 
 | 134 | 			if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { | 
 | 135 | 				/* invalid since operands are zero & infinity */ | 
 | 136 | 				if (Is_invalidtrap_enabled()) | 
 | 137 |                                 	return(INVALIDEXCEPTION); | 
 | 138 |                                 Set_invalidflag(); | 
 | 139 |                                 Dbl_makequietnan(opnd2p1,opnd2p2); | 
 | 140 | 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | 
 | 141 | 				return(NOEXCEPTION); | 
 | 142 | 			} | 
 | 143 | 			/* | 
 | 144 | 			 * return infinity | 
 | 145 | 			 */ | 
 | 146 | 			Dbl_setinfinity_exponentmantissa(resultp1,resultp2); | 
 | 147 | 			Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 148 | 			return(NOEXCEPTION); | 
 | 149 | 		} | 
 | 150 |                 /* | 
 | 151 |                  * is NaN; signaling or quiet? | 
 | 152 |                  */ | 
 | 153 |                 if (Dbl_isone_signaling(opnd2p1)) { | 
 | 154 |                         /* trap if INVALIDTRAP enabled */ | 
 | 155 |                         if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION); | 
 | 156 |                         /* make NaN quiet */ | 
 | 157 |                         Set_invalidflag(); | 
 | 158 |                         Dbl_set_quiet(opnd2p1); | 
 | 159 |                 } | 
 | 160 |                 /* | 
 | 161 |                  * return quiet NaN | 
 | 162 |                  */ | 
 | 163 | 		Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); | 
 | 164 |                 return(NOEXCEPTION); | 
 | 165 | 	} | 
 | 166 | 	/* | 
 | 167 | 	 * Generate exponent  | 
 | 168 | 	 */ | 
 | 169 | 	dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS; | 
 | 170 |  | 
 | 171 | 	/* | 
 | 172 | 	 * Generate mantissa | 
 | 173 | 	 */ | 
 | 174 | 	if (Dbl_isnotzero_exponent(opnd1p1)) { | 
 | 175 | 		/* set hidden bit */ | 
 | 176 | 		Dbl_clear_signexponent_set_hidden(opnd1p1); | 
 | 177 | 	} | 
 | 178 | 	else { | 
 | 179 | 		/* check for zero */ | 
 | 180 | 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { | 
 | 181 | 			Dbl_setzero_exponentmantissa(resultp1,resultp2); | 
 | 182 | 			Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 183 | 			return(NOEXCEPTION); | 
 | 184 | 		} | 
 | 185 |                 /* is denormalized, adjust exponent */ | 
 | 186 |                 Dbl_clear_signexponent(opnd1p1); | 
 | 187 |                 Dbl_leftshiftby1(opnd1p1,opnd1p2); | 
 | 188 | 		Dbl_normalize(opnd1p1,opnd1p2,dest_exponent); | 
 | 189 | 	} | 
 | 190 | 	/* opnd2 needs to have hidden bit set with msb in hidden bit */ | 
 | 191 | 	if (Dbl_isnotzero_exponent(opnd2p1)) { | 
 | 192 | 		Dbl_clear_signexponent_set_hidden(opnd2p1); | 
 | 193 | 	} | 
 | 194 | 	else { | 
 | 195 | 		/* check for zero */ | 
 | 196 | 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { | 
 | 197 | 			Dbl_setzero_exponentmantissa(resultp1,resultp2); | 
 | 198 | 			Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 199 | 			return(NOEXCEPTION); | 
 | 200 | 		} | 
 | 201 |                 /* is denormalized; want to normalize */ | 
 | 202 |                 Dbl_clear_signexponent(opnd2p1); | 
 | 203 |                 Dbl_leftshiftby1(opnd2p1,opnd2p2); | 
 | 204 | 		Dbl_normalize(opnd2p1,opnd2p2,dest_exponent); | 
 | 205 | 	} | 
 | 206 |  | 
 | 207 | 	/* Multiply two source mantissas together */ | 
 | 208 |  | 
 | 209 | 	/* make room for guard bits */ | 
 | 210 | 	Dbl_leftshiftby7(opnd2p1,opnd2p2); | 
 | 211 | 	Dbl_setzero(opnd3p1,opnd3p2); | 
 | 212 |         /*  | 
 | 213 |          * Four bits at a time are inspected in each loop, and a  | 
 | 214 |          * simple shift and add multiply algorithm is used.  | 
 | 215 |          */  | 
 | 216 | 	for (count=1;count<=DBL_P;count+=4) { | 
 | 217 | 		stickybit |= Dlow4p2(opnd3p2); | 
 | 218 | 		Dbl_rightshiftby4(opnd3p1,opnd3p2); | 
 | 219 | 		if (Dbit28p2(opnd1p2)) { | 
 | 220 | 	 		/* Twoword_add should be an ADDC followed by an ADD. */ | 
 | 221 |                         Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,  | 
 | 222 | 				    opnd2p2<<3); | 
 | 223 | 		} | 
 | 224 | 		if (Dbit29p2(opnd1p2)) { | 
 | 225 |                         Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,  | 
 | 226 | 				    opnd2p2<<2); | 
 | 227 | 		} | 
 | 228 | 		if (Dbit30p2(opnd1p2)) { | 
 | 229 |                         Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31, | 
 | 230 | 				    opnd2p2<<1); | 
 | 231 | 		} | 
 | 232 | 		if (Dbit31p2(opnd1p2)) { | 
 | 233 |                         Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2); | 
 | 234 | 		} | 
 | 235 | 		Dbl_rightshiftby4(opnd1p1,opnd1p2); | 
 | 236 | 	} | 
 | 237 | 	if (Dbit3p1(opnd3p1)==0) { | 
 | 238 | 		Dbl_leftshiftby1(opnd3p1,opnd3p2); | 
 | 239 | 	} | 
 | 240 | 	else { | 
 | 241 | 		/* result mantissa >= 2. */ | 
 | 242 | 		dest_exponent++; | 
 | 243 | 	} | 
 | 244 | 	/* check for denormalized result */ | 
 | 245 | 	while (Dbit3p1(opnd3p1)==0) { | 
 | 246 | 		Dbl_leftshiftby1(opnd3p1,opnd3p2); | 
 | 247 | 		dest_exponent--; | 
 | 248 | 	} | 
 | 249 | 	/* | 
 | 250 | 	 * check for guard, sticky and inexact bits  | 
 | 251 | 	 */ | 
 | 252 | 	stickybit |= Dallp2(opnd3p2) << 25; | 
 | 253 | 	guardbit = (Dallp2(opnd3p2) << 24) >> 31; | 
 | 254 | 	inexact = guardbit | stickybit; | 
 | 255 |  | 
 | 256 | 	/* align result mantissa */ | 
 | 257 | 	Dbl_rightshiftby8(opnd3p1,opnd3p2); | 
 | 258 |  | 
 | 259 | 	/*  | 
 | 260 | 	 * round result  | 
 | 261 | 	 */ | 
 | 262 | 	if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) { | 
 | 263 | 		Dbl_clear_signexponent(opnd3p1); | 
 | 264 | 		switch (Rounding_mode()) { | 
 | 265 | 			case ROUNDPLUS:  | 
 | 266 | 				if (Dbl_iszero_sign(resultp1))  | 
 | 267 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 268 | 				break; | 
 | 269 | 			case ROUNDMINUS:  | 
 | 270 | 				if (Dbl_isone_sign(resultp1))  | 
 | 271 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 272 | 				break; | 
 | 273 | 			case ROUNDNEAREST: | 
 | 274 | 				if (guardbit) { | 
 | 275 | 			   	if (stickybit || Dbl_isone_lowmantissap2(opnd3p2)) | 
 | 276 | 			      	Dbl_increment(opnd3p1,opnd3p2); | 
 | 277 | 				} | 
 | 278 | 		} | 
 | 279 | 		if (Dbl_isone_hidden(opnd3p1)) dest_exponent++; | 
 | 280 | 	} | 
 | 281 | 	Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2); | 
 | 282 |  | 
 | 283 |         /*  | 
 | 284 |          * Test for overflow | 
 | 285 |          */ | 
 | 286 | 	if (dest_exponent >= DBL_INFINITY_EXPONENT) { | 
 | 287 |                 /* trap if OVERFLOWTRAP enabled */ | 
 | 288 |                 if (Is_overflowtrap_enabled()) { | 
 | 289 |                         /* | 
 | 290 |                          * Adjust bias of result | 
 | 291 |                          */ | 
 | 292 | 			Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl); | 
 | 293 | 			Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 294 | 			if (inexact)  | 
 | 295 | 			    if (Is_inexacttrap_enabled()) | 
 | 296 | 				return (OVERFLOWEXCEPTION | INEXACTEXCEPTION); | 
 | 297 | 			    else Set_inexactflag(); | 
 | 298 | 			return (OVERFLOWEXCEPTION); | 
 | 299 |                 } | 
 | 300 | 		inexact = TRUE; | 
 | 301 | 		Set_overflowflag(); | 
 | 302 |                 /* set result to infinity or largest number */ | 
 | 303 | 		Dbl_setoverflow(resultp1,resultp2); | 
 | 304 | 	} | 
 | 305 |         /*  | 
 | 306 |          * Test for underflow | 
 | 307 |          */ | 
 | 308 | 	else if (dest_exponent <= 0) { | 
 | 309 |                 /* trap if UNDERFLOWTRAP enabled */ | 
 | 310 |                 if (Is_underflowtrap_enabled()) { | 
 | 311 |                         /* | 
 | 312 |                          * Adjust bias of result | 
 | 313 |                          */ | 
 | 314 | 			Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl); | 
 | 315 | 			Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 316 | 			if (inexact)  | 
 | 317 | 			    if (Is_inexacttrap_enabled()) | 
 | 318 | 				return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION); | 
 | 319 | 			    else Set_inexactflag(); | 
 | 320 | 			return (UNDERFLOWEXCEPTION); | 
 | 321 |                 } | 
 | 322 |  | 
 | 323 | 		/* Determine if should set underflow flag */ | 
 | 324 | 		is_tiny = TRUE; | 
 | 325 | 		if (dest_exponent == 0 && inexact) { | 
 | 326 | 			switch (Rounding_mode()) { | 
 | 327 | 			case ROUNDPLUS:  | 
 | 328 | 				if (Dbl_iszero_sign(resultp1)) { | 
 | 329 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 330 | 					if (Dbl_isone_hiddenoverflow(opnd3p1)) | 
 | 331 |                 			    is_tiny = FALSE; | 
 | 332 | 					Dbl_decrement(opnd3p1,opnd3p2); | 
 | 333 | 				} | 
 | 334 | 				break; | 
 | 335 | 			case ROUNDMINUS:  | 
 | 336 | 				if (Dbl_isone_sign(resultp1)) { | 
 | 337 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 338 | 					if (Dbl_isone_hiddenoverflow(opnd3p1)) | 
 | 339 |                 			    is_tiny = FALSE; | 
 | 340 | 					Dbl_decrement(opnd3p1,opnd3p2); | 
 | 341 | 				} | 
 | 342 | 				break; | 
 | 343 | 			case ROUNDNEAREST: | 
 | 344 | 				if (guardbit && (stickybit ||  | 
 | 345 | 				    Dbl_isone_lowmantissap2(opnd3p2))) { | 
 | 346 | 				      	Dbl_increment(opnd3p1,opnd3p2); | 
 | 347 | 					if (Dbl_isone_hiddenoverflow(opnd3p1)) | 
 | 348 |                 			    is_tiny = FALSE; | 
 | 349 | 					Dbl_decrement(opnd3p1,opnd3p2); | 
 | 350 | 				} | 
 | 351 | 				break; | 
 | 352 | 			} | 
 | 353 | 		} | 
 | 354 |  | 
 | 355 | 		/* | 
 | 356 | 		 * denormalize result or set to signed zero | 
 | 357 | 		 */ | 
 | 358 | 		stickybit = inexact; | 
 | 359 | 		Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit, | 
 | 360 | 		 stickybit,inexact); | 
 | 361 |  | 
 | 362 | 		/* return zero or smallest number */ | 
 | 363 | 		if (inexact) { | 
 | 364 | 			switch (Rounding_mode()) { | 
 | 365 | 			case ROUNDPLUS:  | 
 | 366 | 				if (Dbl_iszero_sign(resultp1)) { | 
 | 367 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 368 | 				} | 
 | 369 | 				break; | 
 | 370 | 			case ROUNDMINUS:  | 
 | 371 | 				if (Dbl_isone_sign(resultp1)) { | 
 | 372 | 					Dbl_increment(opnd3p1,opnd3p2); | 
 | 373 | 				} | 
 | 374 | 				break; | 
 | 375 | 			case ROUNDNEAREST: | 
 | 376 | 				if (guardbit && (stickybit ||  | 
 | 377 | 				    Dbl_isone_lowmantissap2(opnd3p2))) { | 
 | 378 | 			      		Dbl_increment(opnd3p1,opnd3p2); | 
 | 379 | 				} | 
 | 380 | 				break; | 
 | 381 | 			} | 
 | 382 |                 	if (is_tiny) Set_underflowflag(); | 
 | 383 | 		} | 
 | 384 | 		Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2); | 
 | 385 | 	} | 
 | 386 | 	else Dbl_set_exponent(resultp1,dest_exponent); | 
 | 387 | 	/* check for inexact */ | 
 | 388 | 	Dbl_copytoptr(resultp1,resultp2,dstptr); | 
 | 389 | 	if (inexact) { | 
 | 390 | 		if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION); | 
 | 391 | 		else Set_inexactflag(); | 
 | 392 | 	} | 
 | 393 | 	return(NOEXCEPTION); | 
 | 394 | } |